1  /********************************************************************** <BR>


2  This file is part of Crack dot Com's free source code release of


3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


4  information about compiling & licensing issues visit this URL</a>


5  <PRE> If that doesn't help, contact Jonathan Clark at


6  golgotha_source@usa.net (Subject should have "GOLG" in it)


7  ***********************************************************************/


8 


9  #include "software/r1_software_globals.hh"


10  #include "software/inline_fpu.hh"


11 


12  extern sw32 had_subdivisions;


13 


14  void texture_scanline_perspective_unlit_holy(w16 *start_pixel,


15  sw32 start_x,


16  void *_left,//perspective_span *left,


17  sw32 width)


18  {


19  start_pixel = (w16 *)((w8 *)start_pixel + start_x);


20 


21  perspective_span *left = (perspective_span *)_left;


22 


23  _asm


24  {


25  //left_z = 1.f / left>ooz;


26  //left_s = qftoi(left>soz * left_z) + cur_grads.s_adjust;


27  //left_t = qftoi(left>toz * left_z) + cur_grads.t_adjust;


28 


29  //sw32 had_subdivisions = width & (~15);


30  //num_subdivisions = width >> 4;


31  //num_leftover = width & 15;


32 


33  mov esi,dword ptr [left]


34  mov eax,dword ptr [width]


35 


36  fld1


37  fdiv qword ptr [esi]perspective_span.ooz


38 


39  mov ebx,eax


40  and eax,15


41 


42  shr ebx,4


43  mov ecx,width


44 


45  and ecx,(~15)


46  mov dword ptr [num_leftover],eax


47 


48  mov dword ptr [num_subdivisions],ebx


49  mov dword ptr [had_subdivisions],ecx


50 


51  fld st(0)


52 


53  fmul dword ptr [esi]perspective_span.soz


54  fxch st(1)


55 


56  fmul dword ptr [esi]perspective_span.toz


57  fxch st(1)


58 


59  fistp dword ptr [left_s]


60  fistp dword ptr [left_t]


61 


62  mov eax,dword ptr [cur_grads].s_adjust


63  mov ebx,dword ptr [cur_grads].t_adjust


64 


65  add eax,dword ptr [left_s]


66  add ebx,dword ptr [left_t]


67 


68  mov dword ptr [left_s],eax


69  mov dword ptr [left_t],ebx


70 


71  //clear these out


72  mov dword ptr [dsdx_frac],0


73  mov dword ptr [dtdx_frac],0


74  }


75 


76  if (num_subdivisions)


77  {


78  _asm


79  {


80  //ooz_right = left>ooz + (cur_grads.doozdxspan);


81  //soz_right = left>soz + (cur_grads.dsozdxspan);


82  //toz_right = left>toz + (cur_grads.dtozdxspan);


83 


84  mov esi,dword ptr [left]


85  mov edi,dword ptr [start_pixel]


86 


87  fld qword ptr [esi]perspective_span.ooz


88  fld dword ptr [esi]perspective_span.soz


89  fld dword ptr [esi]perspective_span.toz


90 


91  //t s o


92  fadd dword ptr [cur_grads]tri_gradients.dtozdxspan


93  fxch st(2)


94 


95  //o s t


96 


97  fadd qword ptr [cur_grads]tri_gradients.doozdxspan


98  fxch st(1)


99 


100  //s o t


101 


102  fadd dword ptr [cur_grads]tri_gradients.dsozdxspan


103  fxch st(2)


104 


105  //t o s


106 


107  fstp dword ptr [toz_right]


108  fxch st(1)


109 


110  //s o


111 


112  fstp dword ptr [soz_right]


113 


114  fstp dword ptr [ooz_right]


115 


116  //calculate the 1st right_z


117  fld1


118  fdiv dword ptr [ooz_right]


119 


120  //calculate starting fractional and integral values for s and t


121  //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2


122  //ecx = starting_s_coordinate << 16


123  //edx = starting_t_coordinate << 16


124  //dx = starting_light_value


125 


126  mov esi,dword ptr [r1_software_texture_ptr]


127  mov eax,dword ptr [left_s]


128 


129  shr esi,1


130  mov ebx,dword ptr [left_t]


131 


132  sar eax,16


133  mov edx,dword ptr [left_t]


134 


135  sar ebx,16


136  add esi,eax


137 


138  mov cl,byte ptr [r1_software_twidth_log2]


139  shl ebx,cl


140 


141  sal edx,16


142  mov ecx,dword ptr [left_s]


143 


144  sal ecx,16


145  add esi,ebx


146  }


147 


148  while (num_subdivisions)


149  {


150  _asm


151  {


152  //right_s = qftoi(soz_right * right_z);


153  //right_t = qftoi(toz_right * right_z);


154 


155  //right_z is in st0


156  fld st(0)


157 


158  fmul dword ptr [soz_right]


159  fxch st(1)


160 


161  fmul dword ptr [toz_right]


162  fxch st(1)


163 


164  fistp dword ptr [right_s]


165  fistp dword ptr [right_t]


166 


167  //calculate ooz_right, soz_right, toz_right, and right_z for the end of the next span. if there are


168  //more subdivisions, calculate the end of the next span. if there are no more and there is > 1 leftover


169  //in the leftover span, calculate the end of that.


170 


171  //if (num_subdivisions!=1)


172  //{


173  cmp dword ptr [num_subdivisions],1


174  je last_subdivision


175 


176  //ooz_right += (cur_grads.doozdxspan);


177  //soz_right += (cur_grads.dsozdxspan);


178  //toz_right += (cur_grads.dtozdxspan);


179 


180  fld dword ptr [ooz_right]


181  fadd qword ptr [cur_grads]tri_gradients.doozdxspan


182 


183  fld dword ptr [soz_right]


184  fadd dword ptr [cur_grads]tri_gradients.dsozdxspan


185 


186  fld dword ptr [toz_right]


187  fadd dword ptr [cur_grads]tri_gradients.dtozdxspan


188 


189  fxch st(2)


190  fstp dword ptr [ooz_right]


191 


192  fstp dword ptr [soz_right]


193 


194  fstp dword ptr [toz_right]


195 


196  fld1


197  fdiv dword ptr [ooz_right]


198 


199  jmp not_last_subdivision


200  //}


201  //else


202  //if (num_leftover > 1)


203  //{


204 


205  last_subdivision:


206  cmp dword ptr [num_leftover],1


207  jle not_last_subdivision


208 


209  //calculate the right_z for the end of the leftover span


210  //ooz_right += (cur_grads.doozdx * num_leftover);


211  //soz_right += (cur_grads.dsozdx * num_leftover);


212  //toz_right += (cur_grads.dtozdx * num_leftover);


213 


214  fild dword ptr [num_leftover]


215 


216  //todo: pipeline these fpu ops


217  fld qword ptr [cur_grads]tri_gradients.doozdx


218  fmul st(0),st(1)


219  fadd dword ptr [ooz_right]


220  fstp dword ptr [ooz_right]


221 


222  fld dword ptr [cur_grads]tri_gradients.dsozdx


223  fmul st(0),st(1)


224  fadd dword ptr [soz_right]


225  fstp dword ptr [soz_right]


226 


227  fld dword ptr [cur_grads]tri_gradients.dtozdx


228  fmul st(0),st(1)


229  fadd dword ptr [toz_right]


230  fstp dword ptr [toz_right]


231 


232  fstp st(0) //nifty thing i found, a 1 cycle fpu pop


233 


234  fld1


235  fdiv dword ptr [ooz_right]


236  //}


237 


238  not_last_subdivision:


239  //cap the right_s and right_t's so that they're valid


240 


241  mov eax,dword ptr [right_s]


242  mov ebx,dword ptr [right_t]


243 


244  add eax,dword ptr [cur_grads]tri_gradients.s_adjust


245  add ebx,dword ptr [cur_grads]tri_gradients.t_adjust


246 


247  //cap the right s and t


248  cmp eax,0


249  jge cmp_eax_high


250 


251  mov eax,0


252  jmp cmp_ebx_low


253 


254  cmp_eax_high:


255  cmp eax,dword ptr [s_mask]


256  jle cmp_ebx_low


257 


258  mov eax,dword ptr [s_mask]


259 


260  cmp_ebx_low:


261  cmp ebx,0


262  jge cmp_ebx_high


263 


264  mov ebx,0


265  jmp done_compare


266 


267  cmp_ebx_high:


268  cmp ebx,dword ptr [t_mask]


269  jle done_compare


270 


271  mov ebx,dword ptr [t_mask]


272 


273  done_compare:


274 


275  //store the right_s and right_t


276  //so they can be copied into left_s and left_t at the end of the 16pixel span


277  //(the cant be copied now because we have to calculate (right_sleft_s)>>4 and (right_tleft_t)>>4


278 


279  mov dword ptr [right_s],eax


280  mov dword ptr [right_t],ebx


281 


282  sub eax,dword ptr [left_s]


283  push ebp


284 


285  sar eax,4


286  sub ebx,dword ptr [left_t]


287 


288  sar ebx,4


289  mov word ptr [dsdx_frac+2],ax //this sets the upper 16 bits of dword ptr [dsdx_frac] to ((right_sleft_s)>>4)<<16


290 


291  sar eax,16


292  mov word ptr [dtdx_frac+2],bx //this sets the upper 16 bits of dword ptr [dtdx_frac] to ((right_tleft_t)>>4)<<16


293 


294  sar ebx,16


295  mov cl,byte ptr [r1_software_twidth_log2]


296 


297  shl ebx,cl


298 


299  add eax,ebx


300  mov ebx,0


301 


302  //s_t_carry[1] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2


303  //s_t_carry[0] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2 + r1_software_texture_width


304 


305  mov dword ptr [s_t_carry+4],eax


306  add eax,dword ptr [r1_software_texture_width]


307 


308  mov dword ptr [s_t_carry],eax


309  mov eax,0 //must make sure the high bits of these are zeroed out


310 


311  mov cl,4 //loop is unrolled to 4 pixels  we want to draw 16, so loop 4 times


312  ALIGN 16


313 


314  //high 16 bits of ecx is the fractional s component


315  //high 16 bits of edx is the fractional t component


316 


317  //eax is used to lookup the texel as well as the low 8bits of the lit texel


318  //ebx is used to lookup the high 8bits of the lit texel


319  //ebp is used to detect a tcarry as well as lookup the lit texel


320  //cl is the loop count variable


321 


322  looper1:


323  add edi,8


324  add edx,dword ptr [dtdx_frac]


325 


326  sbb ebx,ebx


327  add ecx,dword ptr [dsdx_frac]


328 


329  mov ax,word ptr [esi*2]


330  nop


331 


332  adc esi,dword ptr [4+s_t_carry+ebx*4]


333  add edx,dword ptr [dtdx_frac]


334 


335  sbb ebx,ebx


336  and eax,eax //test to see if its zero


337 


338  jz skipped_1_pixel


339  mov word ptr [edi8],ax //store 1 pixel


340 


341  skipped_1_pixel:


342  mov ax,word ptr [esi*2]


343  add ecx,dword ptr [dsdx_frac]


344 


345  adc esi,dword ptr [4+s_t_carry+ebx*4]


346  add edx,dword ptr [dtdx_frac]


347 


348  sbb ebx,ebx


349  and eax,eax //test to see if its zero


350 


351  jz skipped_2_pixel


352  mov word ptr [edi6],ax


353 


354  skipped_2_pixel:


355  mov ax,word ptr [esi*2]


356  add ecx,dword ptr [dsdx_frac]


357 


358  adc esi,dword ptr [4+s_t_carry+ebx*4]


359  add edx,dword ptr [dtdx_frac]


360 


361  sbb ebx,ebx


362  and eax,eax //test to see if its zero


363 


364  jz skipped_3_pixel


365  mov word ptr [edi4],ax


366 


367  skipped_3_pixel:


368  mov ax,word ptr [esi*2]


369  add ecx,dword ptr [dsdx_frac]


370 


371  adc esi,dword ptr [4+s_t_carry+ebx*4]


372  and eax,eax //test to see if its zero


373 


374  jz skipped_4_pixel


375  mov word ptr [edi2],ax


376 


377  skipped_4_pixel:


378  dec cl


379  jnz looper1


380 


381  pop ebp


382 


383  //store right_s and right_s in left_s and left_t


384  //right_s is what left_s starts at on the next 16 pixel span


385  //right_t is what left_t starts at on the next 16 pixel span


386 


387  mov eax,dword ptr [right_s]


388  mov ebx,dword ptr [right_t]


389 


390  mov dword ptr [left_s],eax


391  mov dword ptr [left_t],ebx


392  }


393 


394  _asm dec dword ptr [num_subdivisions]


395  }


396 


397  //store these so that the C code below actually works


398  _asm mov dword ptr [start_pixel],edi


399  }


400 


401  if (num_leftover)


402  {


403  if (num_leftover > 1)


404  {


405  if (had_subdivisions==0)


406  {


407  //calculate the right_z for the end of span


408  ooz_right = left>ooz + (cur_grads.doozdx * num_leftover);


409  soz_right = left>soz + (cur_grads.dsozdx * num_leftover);


410  toz_right = left>toz + (cur_grads.dtozdx * num_leftover);


411 


412  //calculate the z at the right endpoint


413  _asm fld1


414  _asm fdiv dword ptr [ooz_right]


415  }


416  else


417  {


418  //the correct ending right_z is already being calculated


419  //(see the if (num_subdivisions!=1) case above


420  }


421 


422  _asm


423  {


424  //calculate starting fractional and integral values for s and t


425 


426  mov esi,dword ptr [r1_software_texture_ptr]


427  mov eax,dword ptr [left_s]


428 


429  shr esi,1


430  mov ebx,dword ptr [left_t]


431 


432  sar eax,16


433  mov edx,dword ptr [left_t]


434 


435  sar ebx,16


436  add esi,eax


437 


438  mov cl,byte ptr [r1_software_twidth_log2]


439  shl ebx,cl


440 


441  sal edx,16


442  mov ecx,dword ptr [left_s]


443 


444  sal ecx,16


445  add esi,ebx


446 


447  mov edi,dword ptr [start_pixel]


448 


449  //calculate the right endpoint


450  //right_s = qftoi(soz_right * right_z) + cur_grads.s_adjust;


451  //right_t = qftoi(toz_right * right_z) + cur_grads.t_adjust;


452 


453  //right_z is in st0


454  fld st(0)


455 


456  fmul dword ptr [soz_right]


457  fxch st(1)


458 


459  fmul dword ptr [toz_right]


460  fxch st(1)


461 


462  fistp dword ptr [right_s]


463  fistp dword ptr [right_t]


464 


465  mov eax,dword ptr [right_s]


466  mov ebx,dword ptr [right_t]


467 


468  add eax,dword ptr [cur_grads]tri_gradients.s_adjust


469  add ebx,dword ptr [cur_grads]tri_gradients.t_adjust


470 


471  //cap the right s and t


472  cmp eax,0


473  jge cmp_eax_high_2


474 


475  mov eax,0


476  jmp cmp_ebx_low_2


477 


478  cmp_eax_high_2:


479  cmp eax,dword ptr [s_mask]


480  jle cmp_ebx_low_2


481 


482  mov eax,dword ptr [s_mask]


483 


484  cmp_ebx_low_2:


485  cmp ebx,0


486  jge cmp_ebx_high_2


487 


488  mov ebx,0


489  jmp done_compare_2


490 


491  cmp_ebx_high_2:


492  cmp ebx,dword ptr [t_mask]


493  jle done_compare_2


494 


495  mov ebx,dword ptr [t_mask]


496 


497  done_compare_2:


498 


499  //calculate the deltas (left to right)


500  //temp_dsdx = qftoi((float)(right_s  left_s) * inverse_leftover_lookup[num_leftover]);


501  //temp_dtdx = qftoi((float)(right_t  left_t) * inverse_leftover_lookup[num_leftover]);


502 


503  push ebp


504  mov ebp,num_leftover


505 


506  sub eax,dword ptr [left_s]


507  sub ebx,dword ptr [left_t]


508 


509  mov dword ptr [temp_dsdx],eax


510  mov dword ptr [temp_dtdx],ebx


511 


512  fild dword ptr [temp_dsdx]


513  fild dword ptr [temp_dtdx]


514 


515  fmul dword ptr [inverse_leftover_lookup + ebp*4]


516  fxch st(1)


517 


518  fmul dword ptr [inverse_leftover_lookup + ebp*4]


519  fxch st(1)


520 


521  fistp dword ptr [temp_dtdx]


522  fistp dword ptr [temp_dsdx]


523 


524  //calculate the fractional and integral delta vars


525  //s_t_carry[0] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2) + r1_software_texture_width;


526  //s_t_carry[1] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2);


527  //dsdx_frac = (temp_dsdx<<16);


528  //dtdx_frac = (temp_dtdx<<16);


529 


530  mov eax,dword ptr [temp_dsdx]


531  mov ebx,dword ptr [temp_dtdx]


532 


533  mov word ptr [dsdx_frac+2],ax


534  mov word ptr [dtdx_frac+2],bx


535 


536  sar eax,16


537  nop //mov dx,word ptr [left_l]


538 


539  sar ebx,16


540  mov cl,byte ptr [r1_software_twidth_log2]


541 


542  shl ebx,cl


543 


544  add eax,ebx


545  nop //mov ebx,0


546 


547  mov dword ptr [s_t_carry+4],eax


548  add eax,dword ptr [r1_software_texture_width]


549 


550  mov dword ptr [s_t_carry],eax


551  mov cl, byte ptr [num_leftover]


552 


553  mov eax,0 //make sure these high bits are clear


554 


555  ALIGN 16


556 


557  looper3:


558  mov ax,word ptr [esi*2]


559  add edx,dword ptr [dtdx_frac]


560 


561  sbb ebp,ebp


562  add edi,2 //the only convenient place for the stepping of edi was way up here


563 


564  add ecx,dword ptr [dsdx_frac]


565  nop


566 


567  adc esi,dword ptr [4+s_t_carry+ebp*4]


568  and eax,eax


569 


570  jz skip_a_pixel


571  mov word ptr [edi2],ax


572 


573  skip_a_pixel:


574  dec cl


575  jnz looper3


576 


577  pop ebp


578  }


579  }


580  else


581  {


582  //highly unoptimized single pixel drawer


583  register w16 texel = *(r1_software_texture_ptr + (left_s>>16) + ((left_t>>16)<<r1_software_twidth_log2));


584 


585  if (texel)


586  *start_pixel = texel;


587  }


588  }


589  }

