1  /********************************************************************** <BR>


2  This file is part of Crack dot Com's free source code release of


3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


4  information about compiling & licensing issues visit this URL</a>


5  <PRE> If that doesn't help, contact Jonathan Clark at


6  golgotha_source@usa.net (Subject should have "GOLG" in it)


7  ***********************************************************************/


8 


9  #include "software/r1_software_globals.hh"


10  #include "software/inline_fpu.hh"


11 


12  extern sw32 had_subdivisions;


13  static w8 last_alpha_accumulated;


14 


15  void texture_scanline_perspective_unlit_alpha(w16 *start_pixel,


16  sw32 start_x,


17  void *_left,//perspective_span *left,


18  sw32 width)


19  {


20  start_pixel = (w16 *)((w8 *)start_pixel + start_x);


21 


22  perspective_span *left = (perspective_span *)_left;


23 


24  last_alpha_accumulated = 16;


25 


26  _asm


27  {


28  //left_z = 1.f / left>ooz;


29  //left_s = qftoi(left>soz * left_z) + cur_grads.s_adjust;


30  //left_t = qftoi(left>toz * left_z) + cur_grads.t_adjust;


31 


32  //sw32 had_subdivisions = width & (~15);


33  //num_subdivisions = width >> 4;


34  //num_leftover = width & 15;


35 


36  mov esi,dword ptr [left]


37  mov eax,dword ptr [width]


38 


39  fld1


40  fdiv qword ptr [esi]perspective_span.ooz


41 


42  mov ebx,eax


43  and eax,15


44 


45  shr ebx,4


46  mov ecx,width


47 


48  and ecx,(~15)


49  mov dword ptr [num_leftover],eax


50 


51  mov dword ptr [num_subdivisions],ebx


52  mov dword ptr [had_subdivisions],ecx


53 


54  fld st(0)


55 


56  fmul dword ptr [esi]perspective_span.soz


57  fxch st(1)


58 


59  fmul dword ptr [esi]perspective_span.toz


60  fxch st(1)


61 


62  fistp dword ptr [left_s]


63  fistp dword ptr [left_t]


64 


65  mov eax,dword ptr [cur_grads].s_adjust


66  mov ebx,dword ptr [cur_grads].t_adjust


67 


68  add eax,dword ptr [left_s]


69  add ebx,dword ptr [left_t]


70 


71  mov dword ptr [left_s],eax


72  mov dword ptr [left_t],ebx


73 


74  //clear these out


75  mov dword ptr [dsdx_frac],0


76  mov dword ptr [dtdx_frac],0


77  }


78 


79  if (num_subdivisions)


80  {


81  _asm


82  {


83  //ooz_right = left>ooz + (cur_grads.doozdxspan);


84  //soz_right = left>soz + (cur_grads.dsozdxspan);


85  //toz_right = left>toz + (cur_grads.dtozdxspan);


86 


87  mov esi,dword ptr [left]


88  mov edi,dword ptr [start_pixel]


89 


90  fld qword ptr [esi]perspective_span.ooz


91  fld dword ptr [esi]perspective_span.soz


92  fld dword ptr [esi]perspective_span.toz


93 


94  //t s o


95  fadd dword ptr [cur_grads]tri_gradients.dtozdxspan


96  fxch st(2)


97 


98  //o s t


99 


100  fadd qword ptr [cur_grads]tri_gradients.doozdxspan


101  fxch st(1)


102 


103  //s o t


104 


105  fadd dword ptr [cur_grads]tri_gradients.dsozdxspan


106  fxch st(2)


107 


108  //t o s


109 


110  fstp dword ptr [toz_right]


111  fxch st(1)


112 


113  //s o


114 


115  fstp dword ptr [soz_right]


116 


117  fstp dword ptr [ooz_right]


118 


119  //calculate the 1st right_z


120  fld1


121  fdiv dword ptr [ooz_right]


122 


123  //calculate starting fractional and integral values for s and t


124  //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2


125  //ecx = starting_s_coordinate << 16


126  //edx = starting_t_coordinate << 16


127  //dx = starting_light_value


128 


129  mov esi,dword ptr [r1_software_texture_ptr]


130  mov eax,dword ptr [left_s]


131 


132  shr esi,1


133  mov ebx,dword ptr [left_t]


134 


135  sar eax,16


136  mov edx,dword ptr [left_t]


137 


138  sar ebx,16


139  add esi,eax


140 


141  mov cl,byte ptr [r1_software_twidth_log2]


142  shl ebx,cl


143 


144  sal edx,16


145  mov ecx,dword ptr [left_s]


146 


147  sal ecx,16


148  add esi,ebx


149  }


150 


151  while (num_subdivisions)


152  {


153  _asm


154  {


155  //right_s = qftoi(soz_right * right_z);


156  //right_t = qftoi(toz_right * right_z);


157 


158  //right_z is in st0


159  fld st(0)


160 


161  fmul dword ptr [soz_right]


162  fxch st(1)


163 


164  fmul dword ptr [toz_right]


165  fxch st(1)


166 


167  fistp dword ptr [right_s]


168  fistp dword ptr [right_t]


169 


170  //calculate ooz_right, soz_right, toz_right, and right_z for the end of the next span. if there are


171  //more subdivisions, calculate the end of the next span. if there are no more and there is > 1 leftover


172  //in the leftover span, calculate the end of that.


173 


174  //if (num_subdivisions!=1)


175  //{


176  cmp dword ptr [num_subdivisions],1


177  je last_subdivision


178 


179  //ooz_right += (cur_grads.doozdxspan);


180  //soz_right += (cur_grads.dsozdxspan);


181  //toz_right += (cur_grads.dtozdxspan);


182 


183  fld dword ptr [ooz_right]


184  fadd qword ptr [cur_grads]tri_gradients.doozdxspan


185 


186  fld dword ptr [soz_right]


187  fadd dword ptr [cur_grads]tri_gradients.dsozdxspan


188 


189  fld dword ptr [toz_right]


190  fadd dword ptr [cur_grads]tri_gradients.dtozdxspan


191 


192  fxch st(2)


193  fstp dword ptr [ooz_right]


194 


195  fstp dword ptr [soz_right]


196 


197  fstp dword ptr [toz_right]


198 


199  fld1


200  fdiv dword ptr [ooz_right]


201 


202  jmp not_last_subdivision


203  //}


204  //else


205  //if (num_leftover > 1)


206  //{


207 


208  last_subdivision:


209  cmp dword ptr [num_leftover],1


210  jle not_last_subdivision


211 


212  //calculate the right_z for the end of the leftover span


213  //ooz_right += (cur_grads.doozdx * num_leftover);


214  //soz_right += (cur_grads.dsozdx * num_leftover);


215  //toz_right += (cur_grads.dtozdx * num_leftover);


216 


217  fild dword ptr [num_leftover]


218 


219  //todo: pipeline these fpu ops


220  fld qword ptr [cur_grads]tri_gradients.doozdx


221  fmul st(0),st(1)


222  fadd dword ptr [ooz_right]


223  fstp dword ptr [ooz_right]


224 


225  fld dword ptr [cur_grads]tri_gradients.dsozdx


226  fmul st(0),st(1)


227  fadd dword ptr [soz_right]


228  fstp dword ptr [soz_right]


229 


230  fld dword ptr [cur_grads]tri_gradients.dtozdx


231  fmul st(0),st(1)


232  fadd dword ptr [toz_right]


233  fstp dword ptr [toz_right]


234 


235  fstp st(0) //nifty thing i found, a 1 cycle fpu pop


236 


237  fld1


238  fdiv dword ptr [ooz_right]


239  //}


240 


241  not_last_subdivision:


242  //cap the right_s and right_t's so that they're valid


243 


244  mov eax,dword ptr [right_s]


245  mov ebx,dword ptr [right_t]


246 


247  add eax,dword ptr [cur_grads]tri_gradients.s_adjust


248  add ebx,dword ptr [cur_grads]tri_gradients.t_adjust


249 


250  //cap the right s and t


251  cmp eax,0


252  jge cmp_eax_high


253 


254  mov eax,0


255  jmp cmp_ebx_low


256 


257  cmp_eax_high:


258  cmp eax,dword ptr [s_mask]


259  jle cmp_ebx_low


260 


261  mov eax,dword ptr [s_mask]


262 


263  cmp_ebx_low:


264  cmp ebx,0


265  jge cmp_ebx_high


266 


267  mov ebx,0


268  jmp done_compare


269 


270  cmp_ebx_high:


271  cmp ebx,dword ptr [t_mask]


272  jle done_compare


273 


274  mov ebx,dword ptr [t_mask]


275 


276  done_compare:


277 


278  //store the right_s and right_t


279  //so they can be copied into left_s and left_t at the end of the 16pixel span


280  //(the cant be copied now because we have to calculate (right_sleft_s)>>4 and (right_tleft_t)>>4


281 


282  mov dword ptr [right_s],eax


283  mov dword ptr [right_t],ebx


284 


285  sub eax,dword ptr [left_s]


286  push ebp


287 


288  sar eax,4


289  sub ebx,dword ptr [left_t]


290 


291  sar ebx,4


292  mov word ptr [dsdx_frac+2],ax //this sets the upper 16 bits of dword ptr [dsdx_frac] to ((right_sleft_s)>>4)<<16


293 


294  sar eax,16


295  mov word ptr [dtdx_frac+2],bx //this sets the upper 16 bits of dword ptr [dtdx_frac] to ((right_tleft_t)>>4)<<16


296 


297  sar ebx,16


298  mov cl,byte ptr [r1_software_twidth_log2]


299 


300  shl ebx,cl


301 


302  add eax,ebx


303  mov ebx,0


304 


305  //s_t_carry[1] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2


306  //s_t_carry[0] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2 + r1_software_texture_width


307 


308  mov dword ptr [s_t_carry+4],eax


309  add eax,dword ptr [r1_software_texture_width]


310 


311  mov dword ptr [s_t_carry],eax


312  mov eax,0 //must make sure the high bits of these are zeroed out


313 


314  mov cl,4 //loop is unrolled to 4 pixels  we want to draw 16, so loop 4 times


315  mov bh,byte ptr [last_alpha_accumulated]


316 


317  ALIGN 16


318 


319  //high 16 bits of ecx is the fractional s component


320  //high 16 bits of edx is the fractional t component


321 


322  //eax is used to lookup the texel as well as the low 8bits of the lit texel


323  //ebx is used to lookup the high 8bits of the lit texel


324  //ebp is used to detect a tcarry as well as lookup the lit texel


325  //cl is the loop count variable


326 


327  looper1:


328  mov ax,word ptr [esi*2]


329  add edx,dword ptr [dtdx_frac]


330 


331  sbb ebp,ebp


332  mov bl,ah


333 


334  and eax,4095


335  add ecx,dword ptr [dsdx_frac]


336 


337  adc esi,dword ptr [4+s_t_carry+ebp*4]


338  and bl,240


339 


340  mov ax,word ptr [alpha_table+eax*2]


341  add bh,bl


342 


343  jnc skip_pixel_1


344 


345  mov word ptr [edi],ax


346  add bh,16


347 


348  skip_pixel_1:


349  mov ax,word ptr [esi*2]


350  add edx,dword ptr [dtdx_frac]


351 


352  sbb ebp,ebp


353  mov bl,ah


354 


355  and eax,4095


356  add ecx,dword ptr [dsdx_frac]


357 


358  adc esi,dword ptr [4+s_t_carry+ebp*4]


359  and bl,240


360 


361  mov ax,word ptr [alpha_table+eax*2]


362  add bh,bl


363 


364  jnc skip_pixel_2


365 


366  mov word ptr [edi+2],ax


367  add bh,16


368 


369  skip_pixel_2:


370  mov ax,word ptr [esi*2]


371  add edx,dword ptr [dtdx_frac]


372 


373  sbb ebp,ebp


374  mov bl,ah


375 


376  and eax,4095


377  add ecx,dword ptr [dsdx_frac]


378 


379  adc esi,dword ptr [4+s_t_carry+ebp*4]


380  and bl,240


381 


382  mov ax,word ptr [alpha_table+eax*2]


383  add bh,bl


384 


385  jnc skip_pixel_3


386 


387  mov word ptr [edi+4],ax


388  add bh,16


389 


390  skip_pixel_3:


391  mov ax,word ptr [esi*2]


392  add edx,dword ptr [dtdx_frac]


393 


394  sbb ebp,ebp


395  mov bl,ah


396 


397  and eax,4095


398  add ecx,dword ptr [dsdx_frac]


399 


400  adc esi,dword ptr [4+s_t_carry+ebp*4]


401  and bl,240


402 


403  mov ax,word ptr [alpha_table+eax*2]


404  add bh,bl


405 


406  jnc skip_pixel_4


407 


408  mov word ptr [edi+6],ax


409  add bh,16


410 


411  skip_pixel_4:


412  add edi,8


413  dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)


414 


415  jnz looper1


416  pop ebp


417 


418  mov byte ptr [last_alpha_accumulated],bh


419 


420  //store right_s and right_s in left_s and left_t


421  //right_s is what left_s starts at on the next 16 pixel span


422  //right_t is what left_t starts at on the next 16 pixel span


423 


424  mov eax,dword ptr [right_s]


425  mov ebx,dword ptr [right_t]


426 


427  mov dword ptr [left_s],eax


428  mov dword ptr [left_t],ebx


429  }


430 


431  _asm dec dword ptr [num_subdivisions]


432  }


433 


434  //store these so that the C code below actually works


435  _asm mov dword ptr [start_pixel],edi


436  }


437 


438  if (num_leftover)


439  {


440  if (num_leftover > 1)


441  {


442  if (had_subdivisions==0)


443  {


444  //calculate the right_z for the end of span


445  ooz_right = left>ooz + (cur_grads.doozdx * num_leftover);


446  soz_right = left>soz + (cur_grads.dsozdx * num_leftover);


447  toz_right = left>toz + (cur_grads.dtozdx * num_leftover);


448 


449  //calculate the z at the right endpoint


450  _asm fld1


451  _asm fdiv dword ptr [ooz_right]


452  }


453  else


454  {


455  //the correct ending right_z is already being calculated


456  //(see the if (num_subdivisions!=1) case above


457  }


458 


459  _asm


460  {


461  //calculate starting fractional and integral values for s and t


462 


463  mov esi,dword ptr [r1_software_texture_ptr]


464  mov eax,dword ptr [left_s]


465 


466  shr esi,1


467  mov ebx,dword ptr [left_t]


468 


469  sar eax,16


470  mov edx,dword ptr [left_t]


471 


472  sar ebx,16


473  add esi,eax


474 


475  mov cl,byte ptr [r1_software_twidth_log2]


476  shl ebx,cl


477 


478  sal edx,16


479  mov ecx,dword ptr [left_s]


480 


481  sal ecx,16


482  add esi,ebx


483 


484  mov edi,dword ptr [start_pixel]


485 


486  //calculate the right endpoint


487  //right_s = qftoi(soz_right * right_z) + cur_grads.s_adjust;


488  //right_t = qftoi(toz_right * right_z) + cur_grads.t_adjust;


489 


490  //right_z is in st0


491  fld st(0)


492 


493  fmul dword ptr [soz_right]


494  fxch st(1)


495 


496  fmul dword ptr [toz_right]


497  fxch st(1)


498 


499  fistp dword ptr [right_s]


500  fistp dword ptr [right_t]


501 


502  mov eax,dword ptr [right_s]


503  mov ebx,dword ptr [right_t]


504 


505  add eax,dword ptr [cur_grads]tri_gradients.s_adjust


506  add ebx,dword ptr [cur_grads]tri_gradients.t_adjust


507 


508  //cap the right s and t


509  cmp eax,0


510  jge cmp_eax_high_2


511 


512  mov eax,0


513  jmp cmp_ebx_low_2


514 


515  cmp_eax_high_2:


516  cmp eax,dword ptr [s_mask]


517  jle cmp_ebx_low_2


518 


519  mov eax,dword ptr [s_mask]


520 


521  cmp_ebx_low_2:


522  cmp ebx,0


523  jge cmp_ebx_high_2


524 


525  mov ebx,0


526  jmp done_compare_2


527 


528  cmp_ebx_high_2:


529  cmp ebx,dword ptr [t_mask]


530  jle done_compare_2


531 


532  mov ebx,dword ptr [t_mask]


533 


534  done_compare_2:


535 


536  //calculate the deltas (left to right)


537  //temp_dsdx = qftoi((float)(right_s  left_s) * inverse_leftover_lookup[num_leftover]);


538  //temp_dtdx = qftoi((float)(right_t  left_t) * inverse_leftover_lookup[num_leftover]);


539 


540  push ebp


541  mov ebp,num_leftover


542 


543  sub eax,dword ptr [left_s]


544  sub ebx,dword ptr [left_t]


545 


546  mov dword ptr [temp_dsdx],eax


547  mov dword ptr [temp_dtdx],ebx


548 


549  fild dword ptr [temp_dsdx]


550  fild dword ptr [temp_dtdx]


551 


552  fmul dword ptr [inverse_leftover_lookup + ebp*4]


553  fxch st(1)


554 


555  fmul dword ptr [inverse_leftover_lookup + ebp*4]


556  fxch st(1)


557 


558  fistp dword ptr [temp_dtdx]


559  fistp dword ptr [temp_dsdx]


560 


561  //calculate the fractional and integral delta vars


562  //s_t_carry[0] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2) + r1_software_texture_width;


563  //s_t_carry[1] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2);


564  //dsdx_frac = (temp_dsdx<<16);


565  //dtdx_frac = (temp_dtdx<<16);


566 


567  mov eax,dword ptr [temp_dsdx]


568  mov ebx,dword ptr [temp_dtdx]


569 


570  mov word ptr [dsdx_frac+2],ax


571  mov word ptr [dtdx_frac+2],bx


572 


573  sar eax,16


574  nop //mov dx,word ptr [left_l]


575 


576  sar ebx,16


577  mov cl,byte ptr [r1_software_twidth_log2]


578 


579  shl ebx,cl


580 


581  add eax,ebx


582  nop //mov ebx,0


583 


584  mov dword ptr [s_t_carry+4],eax


585  add eax,dword ptr [r1_software_texture_width]


586 


587  mov dword ptr [s_t_carry],eax


588  mov cl, byte ptr [num_leftover]


589 


590  mov eax,0


591  mov bl,byte ptr [last_alpha_accumulated]


592 


593  ALIGN 16


594 


595  looper3:


596  mov ax,word ptr [esi*2]


597  add edx,dword ptr [dtdx_frac]


598 


599  sbb ebp,ebp


600  add ecx,dword ptr [dsdx_frac]


601 


602  adc esi,dword ptr [4+s_t_carry+ebp*4]


603  add bl,ah


604 


605  jnc skip_a_pixel


606 


607  and eax,4095


608  mov ax,word ptr [alpha_table+eax*2]


609  mov word ptr [edi],ax


610 


611  skip_a_pixel:


612  and bl,240


613  add edi,2


614 


615  dec cl


616  jnz looper3


617 


618  pop ebp


619  }


620  }


621  else


622  {


623  //highly unoptimized single pixel drawer


624  register w16 texel = *( r1_software_texture_ptr + (left_s>>16) + ((left_t>>16) << r1_software_twidth_log2) );


625 


626  if (texel & (15<<12) == (15<<12))


627  {


628  //*start_pixel = alpha_table[texel & 4095];


629  }


630  }


631  }


632  }

