source: golgotha/src/render/software/amd3d/affine_map_lit_asm_amd3d.cc

Last change on this file was 80, checked in by Sam Hocevar, 15 years ago
  • Adding the Golgotha source code. Not sure what's going to be interesting in there, but since it's all public domain, there's certainly stuff to pick up.
File size: 7.2 KB
Line 
1/********************************************************************** <BR>
2  This file is part of Crack dot Com's free source code release of
3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for
4  information about compiling & licensing issues visit this URL</a>
5  <PRE> If that doesn't help, contact Jonathan Clark at
6  golgotha_source@usa.net (Subject should have "GOLG" in it)
7***********************************************************************/
8
9#include "software/r1_software_globals.hh"
10#include "software/inline_fpu.hh"
11
12w32 *texture_affine_lit_starter_amd3d()
13{
14  w32 returnval;
15  _asm
16  {
17    mov eax,OFFSET dumb_addr
18dumb_addr:
19    mov returnval,eax
20  }
21  return (w32 *)returnval;
22}
23
24extern w8 last_bh;
25
26void texture_scanline_affine_lit_amd3d(w16 *start_pixel,
27                                       sw32 start_x,
28                                       void *left,//perspective_span *left,
29                                       sw32 width)
30{
31  start_pixel = (w16 *)((w8 *)start_pixel + start_x);
32  last_bh = 0;
33
34  _asm
35  {
36    //num_subdivisions = width >> 4;
37    //num_leftover     = width & 15;
38       
39    mov edi,dword ptr [left]
40
41    mov eax,dword ptr [width]
42    mov ebx,dword ptr [width]
43   
44    shr ebx,4
45    and eax,15
46
47    mov dword ptr [num_leftover],eax
48    mov dword ptr [num_subdivisions],ebx
49   
50    //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2
51    //ecx = starting_s_coordinate << 16
52    //edx = starting_t_coordinate << 16
53    //dx  = starting_light_value
54   
55    mov eax,dword ptr [edi]affine_span.s
56    mov ebx,dword ptr [edi]affine_span.t
57
58    sar eax,16
59    mov esi,dword ptr [r1_software_texture_ptr]
60
61    sar ebx,16
62    mov ecx,dword ptr [edi]affine_span.s
63   
64    shr esi,1
65    mov cl,byte ptr [r1_software_twidth_log2]
66
67    shl ebx,cl
68
69    sal ecx,16
70    mov edx,dword ptr [edi]affine_span.t
71
72    sal edx,16
73    add eax,ebx
74
75    mov dx,word ptr [edi+AFFINE_SPAN_L]
76    mov edi,dword ptr [start_pixel]
77   
78    add esi,eax
79    push ebp
80
81    mov ch,dl //copy the initial error from the 1st lighting value
82
83    mov ebx,0
84    mov eax,0 //must make sure the high bits of these are zeroed out
85  } 
86 
87  while (num_subdivisions)
88  {
89    _asm
90    {
91      mov cl,4
92      mov bh,byte ptr [last_bh] //necessary? bh should be preserved from the bottom of the loop..
93
94      add ch,0 //this is used to clear the carry flag (the actual clc instruction takes 2 cycles. stupid.)
95
96      ALIGN 16
97
98      //high 16 bits of ecx is the fractional s component
99      //high 16 bits of edx is the fractional t component
100
101      //eax is used to lookup the texel as well as the low 8-bits of the lit texel
102      //ebx is used to lookup the high 8-bits of the lit texel
103      //ebp is used to detect a t-carry as well as lookup the lit texel
104      //cl  is the loop count variable
105      //dx  is the lighting value (8 bits integer, 8 bits fraction)
106      //ch  is the lighting error
107      //bh  is used to dither the lighting (mov bh,0 then add ch,dl then adc bh,dh)
108
109    looper1:
110      adc bh,dh
111      add edi,8 //the only convenient place for the stepping of edi was way up here
112
113      movzx eax,word ptr [esi*2]
114      add edx,dword ptr [dtdx_frac]
115 
116      sbb ebp,ebp
117      mov bl,ah
118
119      add ecx,dword ptr [dsdx_frac]
120      mov ah,bh
121     
122      adc esi,dword ptr [4+s_t_carry+ebp*4]
123      mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
124       
125      add ebp,dword ptr [0xDEADBEEF+eax*4]
126      add edx,dword ptr [dldx_fixed]
127
128      mov bh,0
129      add ch,dl
130
131      mov word ptr [edi-8],bp
132      adc bh,dh
133
134      movzx eax,word ptr [esi*2]  //first pixel is finished aroundhere
135      add edx,dword ptr [dtdx_frac]
136
137      sbb ebp,ebp
138      mov bl,ah
139               
140      add ecx,dword ptr [dsdx_frac]
141      mov ah,bh
142
143      adc esi,dword ptr [4+s_t_carry+ebp*4]
144      mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
145       
146      add ebp,dword ptr [0xDEADBEEF+eax*4]
147      add edx,dword ptr [dldx_fixed]
148             
149      mov bh,0
150      add ch,dl
151
152      mov word ptr [edi-6],bp
153      adc bh,dh
154
155      movzx eax,word ptr [esi*2]
156      add edx,dword ptr [dtdx_frac]
157
158      sbb ebp,ebp
159      mov bl,ah
160
161      add ecx,dword ptr [dsdx_frac]
162      mov ah,bh
163
164      adc esi,dword ptr [4+s_t_carry+ebp*4]
165      mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
166       
167      add ebp,dword ptr [0xDEADBEEF+eax*4]
168      add edx,dword ptr [dldx_fixed]
169
170      mov bh,0
171      add ch,dl
172     
173      mov word ptr [edi-4],bp
174      adc bh,dh
175
176      movzx eax,word ptr [esi*2]  //first pixel is finished aroundhere
177      add edx,dword ptr [dtdx_frac]
178
179      sbb ebp,ebp
180      mov bl,ah
181
182      add ecx,dword ptr [dsdx_frac]
183      mov ah,bh
184
185      adc esi,dword ptr [4+s_t_carry+ebp*4]
186      mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
187       
188      add ebp,dword ptr [0xDEADBEEF+eax*4]
189      add edx,dword ptr [dldx_fixed]
190       
191      mov bh,0
192      add ch,dl
193
194      mov word ptr [edi-2],bp
195      dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
196     
197      jnz looper1
198     
199      adc bh,0 //if we're done looping, save the last carry information here
200      mov byte ptr [last_bh],bh
201    }
202
203    _asm dec dword ptr [num_subdivisions]
204  }
205
206  if (num_leftover)
207  {
208    _asm
209    {
210      mov cl, byte ptr [num_leftover]
211      mov bh, byte ptr [last_bh] //necessary?
212
213      add ch,0 //this is used to clear the carry flag (the actual clc instruction takes 2 cycles. stupid.)
214
215      ALIGN 16
216
217    looper3:
218      adc bh,dh
219      add edi,2
220
221      movzx eax,word ptr [esi*2]
222      add edx,dword ptr [dtdx_frac]
223
224      sbb ebp,ebp
225      mov bl,ah
226
227      add ecx,dword ptr [dsdx_frac]
228      mov ah,bh
229
230      adc esi,dword ptr [4+s_t_carry+ebp*4]
231      mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
232
233      add ebp,dword ptr [0xDEADBEEF+eax*4]
234      add edx,dword ptr [dldx_fixed]
235     
236      mov bh,0
237      add ch,dl
238     
239      mov word ptr [edi-2],bp
240      dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
241     
242      jnz looper3
243    }
244  } 
245  _asm pop ebp
246}
247
248w32 *texture_affine_lit_sentinel_amd3d()
249{
250  w32 returnval;
251  _asm
252  {
253    mov eax,OFFSET dumb_addr
254dumb_addr:
255    mov returnval,eax
256  }
257  return (w32 *)returnval;
258}
259
260void insert_color_modify_address_low(w32 *address);
261void insert_color_modify_address_high(w32 *address);
262extern w32 color_modify_list[];
263extern sw32 num_color_modifies;
264
265void setup_color_modify_affine_lit_amd3d()
266{
267  w32 *stop = texture_affine_lit_sentinel_amd3d();
268
269  w32 *search = texture_affine_lit_starter_amd3d();
270  //start searching for 0xDEADBEEF
271  while (search < stop)
272  {
273    //casting craziness
274    search = (w32 *)((w8 *)search + 1);
275    if (*search==0xDEADBEEF)
276    {
277      insert_color_modify_address_low(search);
278    }
279    else
280    if (*search==(0xDEADBEEF + ctable_size_bytes))
281    {
282      insert_color_modify_address_high(search);
283    }
284  }
285}
Note: See TracBrowser for help on using the repository browser.