source: golgotha/src/render/software/amd3d/perspective_map_lit_asm_amd3d.cc @ 80

Last change on this file since 80 was 80, checked in by Sam Hocevar, 11 years ago
  • Adding the Golgotha source code. Not sure what's going to be interesting in there, but since it's all public domain, there's certainly stuff to pick up.
File size: 21.2 KB
Line 
1/********************************************************************** <BR>
2  This file is part of Crack dot Com's free source code release of
3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for
4  information about compiling & licensing issues visit this URL</a>
5  <PRE> If that doesn't help, contact Jonathan Clark at
6  golgotha_source@usa.net (Subject should have "GOLG" in it)
7***********************************************************************/
8
9#include "software/r1_software_globals.hh"
10#include "software/inline_fpu.hh"
11#include "software/amd3d/amd3d.h"
12
13w32 *texture_perspective_lit_starter_amd3d()
14{
15  bogus_label:
16
17  w32 returnval;
18  _asm
19  {
20    lea eax,bogus_label
21    mov dword ptr [returnval],eax
22  }
23  return (w32 *)returnval;
24}
25
26extern sw32 had_subdivisions;
27extern w8 last_bh2;
28
29//instead of using left_s, left_t, right_s, and right_t,
30//the divides and multiplies are nicely vectorized by the amd3d,
31//and storing them is a single quad store to an array of 2 floats,
32//rather than two dword stores to two seperate floats
33
34sw32 left_s_t[2];
35sw32 right_s_t[2];
36
37float mmx0[2];
38float mmx1[2];
39float mmx2[2];
40float mmx3[2];
41float mmx4[2];
42float mmx5[2];
43float mmx6[2];
44float mmx7[2];
45
46void texture_scanline_perspective_lit_amd3d(w16 *start_pixel,
47                                            sw32 start_x,
48                                            void *_left,//perspective_span *left,
49                                            sw32 width)
50{
51  start_pixel = (w16 *)((w8 *)start_pixel + start_x);
52
53  perspective_span *left = (perspective_span *)_left;
54 
55  last_bh2 = 0;
56
57  _asm
58  {
59    //left_z = 1.f / left->ooz;
60    //left_s = qftoi(left->soz * left_z) + cur_grads.s_adjust;
61    //left_t = qftoi(left->toz * left_z) + cur_grads.t_adjust;
62   
63    //sw32 had_subdivisions = width & (~15);
64    //num_subdivisions = width >> 4;
65    //num_leftover     = width & 15;
66   
67    mov edi,dword ptr [left]
68    mov eax,dword ptr [width]
69
70    movd mm0, dword ptr [edi]perspective_span.ooz
71    mov ebx,eax
72   
73    pfrcp (m1, m0)
74    and eax,15
75
76    shr ebx,4
77    punpckldq mm0, mm0 //duplicate low 32bits of m0 into high 32 bits of m0
78       
79    pfrcpit1 (m0, m1)
80    mov ecx,dword ptr [width]
81   
82    movq mm2, qword ptr [edi]perspective_span.soz
83    mov dword ptr [num_leftover],eax
84   
85    pfrcpit2 (m0, m1)
86    and ecx,(~15)
87   
88    mov eax,dword ptr [edi]perspective_span.l
89    mov dword ptr [num_subdivisions],ebx
90
91    pfmul (m2, m0)
92    mov dword ptr [had_subdivisions],ecx
93   
94    mov dword ptr [left_l],eax
95    //clear these out
96    mov dword ptr [dsdx_frac],0
97
98    //high 32 bits of mm2 - toz / ooz (aka t)
99    //low  32 bits of mm2 - soz / ooz (aka s)
100
101    pf2id (m3, m2)
102    mov dword ptr [dtdx_frac],0
103
104    //high 32 bits of mm3 - toz / ooz (aka t) - truncated ints
105    //low  32 bits of mm3 - soz / ooz (aka s) - truncated ints
106
107    paddd mm3, qword ptr [cur_grads]tri_gradients.s_adjust
108
109    //high 32 bits of mm3 - t + t_adjust
110    //low  32 bits of mm3 - s + s_adjust
111
112    movq qword ptr [left_s_t], mm3
113  }
114
115  if (num_subdivisions)
116  {
117    _asm
118    {
119      //ooz_right = left->ooz + (cur_grads.doozdxspan);
120      //soz_right = left->soz + (cur_grads.dsozdxspan);
121      //toz_right = left->toz + (cur_grads.dtozdxspan);
122
123      //edi still has dword ptr [left]
124      lea ebx,dword ptr [cur_grads]
125      nop
126
127      movd mm1, dword ptr [edi]perspective_span.ooz
128      mov esi,dword ptr [r1_software_texture_ptr]
129     
130      movd mm3, dword ptr [ebx]tri_gradients.doozdxspan
131      mov eax,dword ptr [left_s_t] //left_s
132     
133      shr esi,1
134      movq mm0, qword ptr [edi]perspective_span.soz
135     
136      pfadd (m1, m3)
137      movq mm2, qword ptr [ebx]tri_gradients.dsozdxspan
138     
139      sar eax,16   //get integral left_s into eax
140      mov edi,dword ptr [start_pixel]
141     
142      pfrcp (m6, m1)
143      movq mm7,mm1
144     
145      pfadd (m0, m2)
146      mov ebx,dword ptr [left_s_t+4] //left_t     
147     
148      //calculate the 1st right_z in mm7
149      sar ebx,16 //get integral left_t into ebx
150      punpckldq mm7, mm7 //duplicate high 32bits of mm7 into low 32 bits of mm7
151     
152      pfrcpit1 (m7, m6)
153      mov edx,dword ptr [left_s_t+4] //left_t
154     
155      mov cl,byte ptr [r1_software_twidth_log2]
156      add esi,eax
157     
158      pfrcpit2 (m7, m6)
159     
160     
161
162      //calculate starting fractional and integral values for s and t
163      //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2
164      //ecx = starting_s_coordinate << 16
165      //edx = starting_t_coordinate << 16
166      //dx  = starting_light_value
167
168      //some stuff has been moved up, interleaved w/the mmx code above
169     
170      shl ebx,cl //multiply integral left_t by texture width
171     
172      sal edx,16 //get fractional left_t into edx
173      mov ecx,dword ptr [left_s_t] //left_s
174   
175      sal ecx,16
176      add esi,ebx
177
178      mov dx,word ptr [left_l]
179      mov ch,dl //store the initial lighting error from the 1st lighting value
180                //CH MUST not be touched between here and the actual rasterization loop
181    }
182
183    while (num_subdivisions)
184    {
185      _asm
186      {
187        //right_s = qftoi(soz_right * right_z);
188        //right_t = qftoi(toz_right * right_z);
189       
190        //soz_right and toz_right are in mm0
191        //right_z is in mm7
192        pfmul (m7, m0)
193       
194        pf2id (m7, m7)
195
196        movq qword ptr [right_s_t],mm7
197
198      //calculate ooz_right, soz_right, toz_right, and right_z for the end of the next span. if there are
199      //more subdivisions, calculate the end of the next span. if there are no more and there is > 1 leftover
200      //in the leftover span, calculate the end of that.
201
202      //if (num_subdivisions!=1)
203      //{
204          cmp dword ptr [num_subdivisions],1
205          je  last_subdivision
206       
207          //ooz_right += (cur_grads.doozdxspan);
208          //soz_right += (cur_grads.dsozdxspan);
209          //toz_right += (cur_grads.dtozdxspan);
210         
211          pfadd (m0, m2)
212          pfadd (m1, m3)
213
214          jmp proceed_with_mapping
215      //}
216      //else
217      //if (num_leftover > 1)
218      //{
219
220      last_subdivision:
221          cmp dword ptr [num_leftover],1
222          jle proceed_with_mapping
223       
224          //calculate the right_z for the end of the leftover span
225          //ooz_right += (cur_grads.doozdx * num_leftover);
226          //soz_right += (cur_grads.dsozdx * num_leftover);
227          //toz_right += (cur_grads.dtozdx * num_leftover);
228         
229          movd mm2,dword ptr [num_leftover]
230          movd mm3, dword ptr [cur_grads]tri_gradients.dsozdx
231         
232          pi2fd (m2, m2)
233          movd mm4, dword ptr [cur_grads]tri_gradients.dtozdx
234
235          pfmul (m3, m2)
236          movd mm5, dword ptr [cur_grads]tri_gradients.doozdx
237         
238          pfmul (m4, m2)
239          pfmul (m5, m2)
240
241          pfacc (m3, m4) //gets dtozdx*num_leftover into high 32 bits of m3
242
243          pfadd (m0, m3)
244          pfadd (m1, m5)
245      //}
246           
247      proceed_with_mapping:
248        //cap the right_s and right_t's so that they're valid
249
250        mov eax,dword ptr [right_s_t] //right_s
251        mov ebx,dword ptr [right_s_t+4] //right_t
252       
253        add eax,dword ptr [cur_grads]tri_gradients.s_adjust
254        add ebx,dword ptr [cur_grads]tri_gradients.t_adjust
255 
256        //cap the right s and t
257        cmp eax,0
258        jge cmp_eax_high
259
260        mov eax,0
261        jmp cmp_ebx_low
262
263      cmp_eax_high:
264        cmp eax,dword ptr [s_mask]
265        jle cmp_ebx_low
266
267        mov eax,dword ptr [s_mask]
268
269      cmp_ebx_low:
270        cmp ebx,0
271        jge cmp_ebx_high
272
273        mov ebx,0
274        jmp done_compare
275     
276      cmp_ebx_high:
277        cmp ebx,dword ptr [t_mask]
278        jle done_compare
279
280        mov ebx,dword ptr [t_mask]
281
282      done_compare:
283
284        //store the right_s and right_t
285        //so they can be copied into left_s and left_t at the end of the 16-pixel span
286        //(the cant be copied now because we have to calculate (right_s-left_s)>>4 and (right_t-left_t)>>4
287       
288        //calculate the next right_z in mm7
289        //unfortunately, if the span is a multiple of 16, and this is the last set of 16, it will
290        //calculate an unnecessary z. but its best to have the code here mixed in w/integer ops so
291        //that the amd3d code has something for its executation latencies to sit through
292        movq mm7, mm1
293        pfrcp (m6, m1)
294
295        mov dword ptr [right_s_t],eax //right_s
296        mov dword ptr [right_s_t+4],ebx //right_t
297
298        punpckldq mm7, mm7 //duplicate low 32bits of mm7 into high 32 bits of mm7
299        sub eax,dword ptr [left_s_t] //left_s
300
301        sar eax,4
302        push ebp
303
304        pfrcpit1 (m7, m6)
305        sub ebx,dword ptr [left_s_t+4] //left_t
306
307        sar ebx,4
308        mov word ptr [dsdx_frac+2],ax //this sets the upper 16 bits of dword ptr [dsdx_frac] to ((right_s-left_s)>>4)<<16
309       
310        pfrcpit2 (m7, m6)
311        nop
312       
313        sar eax,16
314        mov word ptr [dtdx_frac+2],bx //this sets the upper 16 bits of dword ptr [dtdx_frac] to ((right_t-left_t)>>4)<<16
315       
316        sar ebx,16
317        mov cl,byte ptr [r1_software_twidth_log2]
318       
319        shl ebx,cl
320
321        add eax,ebx
322        mov ebx,0 //clear high bits of ebx
323
324        //s_t_carry[1] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2
325        //s_t_carry[0] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2 + r1_software_texture_width
326
327        mov dword ptr [s_t_carry+4],eax
328        add eax,dword ptr [r1_software_texture_width]
329
330        mov dword ptr [s_t_carry],eax
331        mov cl,4 //loop is unrolled to 4 pixels - we want to draw 16, so loop 4 times
332
333        mov bh,byte ptr [last_bh2] //setup the initial dither
334        clc //clear the carry bit
335
336        ALIGN 16
337
338        //high 16 bits of ecx is the fractional s component
339        //high 16 bits of edx is the fractional t component
340
341        //eax is used to lookup the texel as well as the low 8-bits of the lit texel
342        //ebx is used to lookup the high 8-bits of the lit texel
343        //ebp is used to detect a t-carry as well as lookup the lit texel
344        //cl  is the loop count variable
345        //dx  is the lighting value (8 bits integer, 8 bits fraction)
346        //ch  is the lighting error
347        //bh  is used to dither the lighting (mov bh,0 then add ch,dl then adc bh,dh)
348
349    looper1:
350        adc bh,dh
351        add edi,8 //the only convenient place for the stepping of edi was way up here
352
353        movzx eax,word ptr [esi*2]
354        add edx,dword ptr [dtdx_frac]
355 
356        sbb ebp,ebp
357        mov bl,ah
358
359        add ecx,dword ptr [dsdx_frac]
360        mov ah,bh
361
362        adc esi,dword ptr [4+s_t_carry+ebp*4]
363        mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
364       
365        add ebp,dword ptr [0xDEADBEEF+eax*4]
366        add edx,dword ptr [dldx_fixed]
367
368        mov bh,0
369        add ch,dl
370
371        mov word ptr [edi-8],bp
372        adc bh,dh
373
374        movzx eax,word ptr [esi*2]  //first pixel is finished aroundhere
375        add edx,dword ptr [dtdx_frac]
376
377        sbb ebp,ebp
378        mov bl,ah
379               
380        add ecx,dword ptr [dsdx_frac]
381        mov ah,bh
382
383        adc esi,dword ptr [4+s_t_carry+ebp*4]
384        mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
385       
386        add ebp,dword ptr [0xDEADBEEF+eax*4]
387        add edx,dword ptr [dldx_fixed]
388             
389        mov bh,0
390        add ch,dl
391
392        mov word ptr [edi-6],bp
393        adc bh,dh
394
395        movzx eax,word ptr [esi*2]
396        add edx,dword ptr [dtdx_frac]
397
398        sbb ebp,ebp
399        mov bl,ah
400
401        add ecx,dword ptr [dsdx_frac]
402        mov ah,bh
403
404        adc esi,dword ptr [4+s_t_carry+ebp*4]
405        mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
406       
407        add ebp,dword ptr [0xDEADBEEF+eax*4]
408        add edx,dword ptr [dldx_fixed]
409
410        mov bh,0
411        add ch,dl
412     
413        mov word ptr [edi-4],bp
414        adc bh,dh
415
416        movzx eax,word ptr [esi*2]  //first pixel is finished aroundhere
417        add edx,dword ptr [dtdx_frac]
418
419        sbb ebp,ebp
420        mov bl,ah
421
422        add ecx,dword ptr [dsdx_frac]
423        mov ah,bh
424
425        adc esi,dword ptr [4+s_t_carry+ebp*4]
426        mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
427       
428        add ebp,dword ptr [0xDEADBEEF+eax*4]
429        add edx,dword ptr [dldx_fixed]
430       
431        mov bh,0
432        add ch,dl
433
434        mov word ptr [edi-2],bp
435        dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
436     
437        jnz looper1
438        adc bh,0 //if we're done looping, save the last carry information here
439       
440        pop ebp
441        mov byte ptr [last_bh2],bh //save it
442
443        //store right_s and right_t in left_s and left_t
444        //right_s is what left_s starts at on the next 16 pixel span
445        //right_t is what left_t starts at on the next 16 pixel span
446
447        mov eax,dword ptr [right_s_t] //right_s
448        mov ebx,dword ptr [right_s_t+4] //right_t
449
450        mov dword ptr [left_s_t],eax //left_s
451        mov dword ptr [left_s_t+4],ebx //left_t
452      }
453         
454      _asm dec dword ptr [num_subdivisions]
455    }
456   
457    //store these so that the C code below actually works
458    _asm mov word ptr [left_l],dx
459    _asm mov dword ptr [start_pixel],edi
460  }
461   
462  if (num_leftover)
463  {       
464    if (num_leftover > 1)
465    {     
466      if (had_subdivisions==0)
467      {
468        //calculate the right_z for the end of span
469        //ooz_right = left->ooz + (cur_grads.doozdx * num_leftover);
470        //soz_right = left->soz + (cur_grads.dsozdx * num_leftover);
471        //toz_right = left->toz + (cur_grads.dtozdx * num_leftover);
472
473        _asm
474        {
475          movd mm2,dword ptr [num_leftover]
476          lea ebx,dword ptr [cur_grads]
477         
478          movd mm3, dword ptr [ebx]tri_gradients.dsozdx
479          mov edi,dword ptr [left]
480
481          movd mm4, dword ptr [ebx]tri_gradients.dtozdx
482          pi2fd (m2, m2)
483         
484          movd mm5, dword ptr [ebx]tri_gradients.doozdx
485          pfmul (m3, m2)
486         
487          movq mm0, qword ptr [edi]perspective_span.soz
488          pfmul (m4, m2)
489
490          movd mm1, dword ptr [edi]perspective_span.ooz
491          pfmul (m5, m2)         
492         
493          pfacc (m3, m4) //gets dtozdx*num_leftover into high 32 bits of m3
494         
495          pfadd (m1, m5) //ooz += doozdx*num_leftover
496          pfadd (m0, m3) //soz += dsozdx*num_leftover AND toz += dtozdx*num_leftover
497
498          //calculate the z at the right endpoint in mm7
499          movq mm7, mm1
500          pfrcp (m6, m1)
501
502          punpckldq mm7, mm7 //duplicate low 32bits of mm7 into high 32 bits of mm7
503
504          pfrcpit1 (m7, m6) //terrible stalls. oh well
505       
506          pfrcpit2 (m7, m6)
507        }
508      }
509      else
510      {
511        //the correct ending right_z is already being calculated
512        //(see the if (num_subdivisions!=1) case above
513      }
514
515      _asm
516      {
517        //calculate starting fractional and integral values for s and t           
518       
519        //calculate the right endpoint
520        //right_s = qftoi(soz_right * right_z) + cur_grads.s_adjust;
521        //right_t = qftoi(toz_right * right_z) + cur_grads.t_adjust;
522       
523        //soz_right and toz_right are in mm0
524        //right_z is in mm7
525        pfmul (m7, m0) //calculate right_s and right_t
526        mov edi,dword ptr [start_pixel]
527
528        mov esi,dword ptr [r1_software_texture_ptr]
529        mov eax,dword ptr [left_s_t] //left_s
530
531        shr esi,1
532        pf2id (m7, m7) //truncate right_s and right_t
533       
534        sar eax,16
535        mov ebx,dword ptr [left_s_t+4] //left_t
536           
537        sar ebx,16
538        movq qword ptr [right_s_t],mm7
539
540        mov edx,dword ptr [left_s_t+4] //left_t
541        add esi,eax
542       
543        mov cl,byte ptr [r1_software_twidth_log2]
544        shl ebx,cl
545     
546        sal edx,16
547        mov ecx,dword ptr [left_s_t] //left_s
548     
549        sal ecx,16
550        add esi,ebx
551
552        mov eax,dword ptr [right_s_t] //right_s
553        mov ebx,dword ptr [right_s_t+4] //right_t
554       
555        add eax,dword ptr [cur_grads]tri_gradients.s_adjust
556        add ebx,dword ptr [cur_grads]tri_gradients.t_adjust
557 
558        //cap the right s and t
559        cmp eax,0
560        jge cmp_eax_high_2
561
562        mov eax,0
563        jmp cmp_ebx_low_2
564
565      cmp_eax_high_2:
566        cmp eax,dword ptr [s_mask]
567        jle cmp_ebx_low_2
568
569        mov eax,dword ptr [s_mask]
570
571      cmp_ebx_low_2:
572        cmp ebx,0
573        jge cmp_ebx_high_2
574
575        mov ebx,0
576        jmp done_compare_2
577     
578      cmp_ebx_high_2:
579        cmp ebx,dword ptr [t_mask]
580        jle done_compare_2
581
582        mov ebx,dword ptr [t_mask]
583
584      done_compare_2:
585           
586        //calculate the deltas (left to right)
587        //temp_dsdx = qftoi((float)(right_s - left_s) * inverse_leftover_lookup[num_leftover]);
588        //temp_dtdx = qftoi((float)(right_t - left_t) * inverse_leftover_lookup[num_leftover]);
589
590        sub eax,dword ptr [left_s_t] //left_s
591        sub ebx,dword ptr [left_s_t+4] //left_t
592
593        movd mm0,eax //temp_dsdx
594        push ebp
595       
596        movd mm1,ebx //temp_dtdx
597        mov ebp, dword ptr [num_leftover]
598       
599        pi2fd (m0, m0)
600        movd mm2, dword ptr [inverse_leftover_lookup + ebp*4]
601       
602        pi2fd (m1, m1)       
603        pfmul (m0, m2)
604
605        pfmul (m1, m2) //bad stalls here
606        pf2id (m0, m0)
607
608        pf2id (m1, m1)
609
610        movd eax, mm0 //temp_dsdx
611        movd ebx, mm1 //temp_dtdx
612
613        //calculate the fractional and integral delta vars
614        //s_t_carry[0] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2) + r1_software_texture_width;
615        //s_t_carry[1] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2);
616        //dsdx_frac    = (temp_dsdx<<16);
617        //dtdx_frac    = (temp_dtdx<<16);
618
619        mov word ptr [dsdx_frac+2],ax
620        mov word ptr [dtdx_frac+2],bx
621
622        sar eax,16
623        mov dx,word ptr [left_l]
624
625        sar ebx,16
626        mov cl,byte ptr [r1_software_twidth_log2]
627       
628        shl ebx,cl
629
630        add eax,ebx
631        mov ebx,0 //clear high bits
632
633        mov dword ptr [s_t_carry+4],eax
634        add eax,dword ptr [r1_software_texture_width]
635       
636        mov dword ptr [s_t_carry],eax
637        mov cl, byte ptr [num_leftover]
638       
639        mov ch,dl //setup the initial lighting error
640        mov bh,byte ptr [last_bh2] //setup the initial dither
641
642        clc //clear the carry bit
643
644        ALIGN 16
645
646      looper3:
647        adc bh,dh
648        add edi,2
649
650        movzx eax,word ptr [esi*2]
651        add edx,dword ptr [dtdx_frac]
652
653        sbb ebp,ebp
654        mov bl,ah
655
656        add ecx,dword ptr [dsdx_frac]
657        mov ah,bh
658
659        adc esi,dword ptr [4+s_t_carry+ebp*4]
660        mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
661
662        add ebp,dword ptr [0xDEADBEEF+eax*4]
663        add edx,dword ptr [dldx_fixed]
664       
665        mov bh,0
666        add ch,dl
667
668        mov word ptr [edi-2],bp
669        dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
670
671        jnz looper3
672
673        pop ebp
674      }
675    }
676    else
677    {
678      register w16 texel;
679      register w32 l_lookup;
680
681      //highly unoptimized single pixel drawer   //left_s         //left_t
682      texel = *(r1_software_texture_ptr + (left_s_t[0]>>16) + ((left_s_t[1]>>16)<<r1_software_twidth_log2));
683         
684      l_lookup = left_l & (NUM_LIGHT_SHADES<<8);
685 
686      //                                        low bits                                  high bits
687      *start_pixel = (w16)(((w32 *)(0xDEADBEEF))[l_lookup + (texel & 0xFF)] + ((w32 *)(0xDEADBEEF)+ctable_size)[l_lookup + (texel>>8)]);
688    }
689  }
690 
691  return;
692
693  _asm
694  {
695  dumpmmxregs:
696    movq qword ptr [mmx0],mm0
697    movq qword ptr [mmx1],mm1
698    movq qword ptr [mmx2],mm2
699    movq qword ptr [mmx3],mm3
700    movq qword ptr [mmx4],mm4
701    movq qword ptr [mmx5],mm5
702    movq qword ptr [mmx6],mm6
703    movq qword ptr [mmx7],mm7
704    ret
705  }
706
707}
708
709w32 *texture_perspective_lit_sentinel_amd3d()
710{
711  bogus_label:
712 
713  w32 returnval;
714  _asm
715  {
716    lea eax,bogus_label
717    mov dword ptr [returnval],eax
718  }
719  return (w32 *)returnval;
720}
721
722void insert_color_modify_address_low(w32 *address);
723void insert_color_modify_address_high(w32 *address);
724extern w32 color_modify_list[];
725extern sw32 num_color_modifies;
726
727void setup_color_modify_perspective_lit_amd3d()
728{
729  w32 *stop = texture_perspective_lit_sentinel_amd3d();
730
731  w32 *search = texture_perspective_lit_starter_amd3d();
732  //start searching for 0xDEADBEEF
733  while (search < stop)
734  {
735    //casting craziness
736    search = (w32 *)((w8 *)search + 1);
737    if (*search==0xDEADBEEF)
738    {
739      insert_color_modify_address_low(search);
740    }
741    else
742    if (*search==(0xDEADBEEF + ctable_size_bytes))
743    {
744      insert_color_modify_address_high(search);
745    }
746  }
747}
Note: See TracBrowser for help on using the repository browser.