1 | /********************************************************************** <BR>
|
---|
2 | This file is part of Crack dot Com's free source code release of
|
---|
3 | Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for
|
---|
4 | information about compiling & licensing issues visit this URL</a>
|
---|
5 | <PRE> If that doesn't help, contact Jonathan Clark at
|
---|
6 | golgotha_source@usa.net (Subject should have "GOLG" in it)
|
---|
7 | ***********************************************************************/
|
---|
8 |
|
---|
9 | #include "software/r1_software_globals.hh"
|
---|
10 | #include "software/inline_fpu.hh"
|
---|
11 |
|
---|
12 | extern sw32 had_subdivisions;
|
---|
13 | static w8 last_alpha_accumulated;
|
---|
14 |
|
---|
15 | void texture_scanline_perspective_unlit_alpha(w16 *start_pixel,
|
---|
16 | sw32 start_x,
|
---|
17 | void *_left,//perspective_span *left,
|
---|
18 | sw32 width)
|
---|
19 | {
|
---|
20 | start_pixel = (w16 *)((w8 *)start_pixel + start_x);
|
---|
21 |
|
---|
22 | perspective_span *left = (perspective_span *)_left;
|
---|
23 |
|
---|
24 | last_alpha_accumulated = 16;
|
---|
25 |
|
---|
26 | _asm
|
---|
27 | {
|
---|
28 | //left_z = 1.f / left->ooz;
|
---|
29 | //left_s = qftoi(left->soz * left_z) + cur_grads.s_adjust;
|
---|
30 | //left_t = qftoi(left->toz * left_z) + cur_grads.t_adjust;
|
---|
31 |
|
---|
32 | //sw32 had_subdivisions = width & (~15);
|
---|
33 | //num_subdivisions = width >> 4;
|
---|
34 | //num_leftover = width & 15;
|
---|
35 |
|
---|
36 | mov esi,dword ptr [left]
|
---|
37 | mov eax,dword ptr [width]
|
---|
38 |
|
---|
39 | fld1
|
---|
40 | fdiv qword ptr [esi]perspective_span.ooz
|
---|
41 |
|
---|
42 | mov ebx,eax
|
---|
43 | and eax,15
|
---|
44 |
|
---|
45 | shr ebx,4
|
---|
46 | mov ecx,width
|
---|
47 |
|
---|
48 | and ecx,(~15)
|
---|
49 | mov dword ptr [num_leftover],eax
|
---|
50 |
|
---|
51 | mov dword ptr [num_subdivisions],ebx
|
---|
52 | mov dword ptr [had_subdivisions],ecx
|
---|
53 |
|
---|
54 | fld st(0)
|
---|
55 |
|
---|
56 | fmul dword ptr [esi]perspective_span.soz
|
---|
57 | fxch st(1)
|
---|
58 |
|
---|
59 | fmul dword ptr [esi]perspective_span.toz
|
---|
60 | fxch st(1)
|
---|
61 |
|
---|
62 | fistp dword ptr [left_s]
|
---|
63 | fistp dword ptr [left_t]
|
---|
64 |
|
---|
65 | mov eax,dword ptr [cur_grads].s_adjust
|
---|
66 | mov ebx,dword ptr [cur_grads].t_adjust
|
---|
67 |
|
---|
68 | add eax,dword ptr [left_s]
|
---|
69 | add ebx,dword ptr [left_t]
|
---|
70 |
|
---|
71 | mov dword ptr [left_s],eax
|
---|
72 | mov dword ptr [left_t],ebx
|
---|
73 |
|
---|
74 | //clear these out
|
---|
75 | mov dword ptr [dsdx_frac],0
|
---|
76 | mov dword ptr [dtdx_frac],0
|
---|
77 | }
|
---|
78 |
|
---|
79 | if (num_subdivisions)
|
---|
80 | {
|
---|
81 | _asm
|
---|
82 | {
|
---|
83 | //ooz_right = left->ooz + (cur_grads.doozdxspan);
|
---|
84 | //soz_right = left->soz + (cur_grads.dsozdxspan);
|
---|
85 | //toz_right = left->toz + (cur_grads.dtozdxspan);
|
---|
86 |
|
---|
87 | mov esi,dword ptr [left]
|
---|
88 | mov edi,dword ptr [start_pixel]
|
---|
89 |
|
---|
90 | fld qword ptr [esi]perspective_span.ooz
|
---|
91 | fld dword ptr [esi]perspective_span.soz
|
---|
92 | fld dword ptr [esi]perspective_span.toz
|
---|
93 |
|
---|
94 | //t s o
|
---|
95 | fadd dword ptr [cur_grads]tri_gradients.dtozdxspan
|
---|
96 | fxch st(2)
|
---|
97 |
|
---|
98 | //o s t
|
---|
99 |
|
---|
100 | fadd qword ptr [cur_grads]tri_gradients.doozdxspan
|
---|
101 | fxch st(1)
|
---|
102 |
|
---|
103 | //s o t
|
---|
104 |
|
---|
105 | fadd dword ptr [cur_grads]tri_gradients.dsozdxspan
|
---|
106 | fxch st(2)
|
---|
107 |
|
---|
108 | //t o s
|
---|
109 |
|
---|
110 | fstp dword ptr [toz_right]
|
---|
111 | fxch st(1)
|
---|
112 |
|
---|
113 | //s o
|
---|
114 |
|
---|
115 | fstp dword ptr [soz_right]
|
---|
116 |
|
---|
117 | fstp dword ptr [ooz_right]
|
---|
118 |
|
---|
119 | //calculate the 1st right_z
|
---|
120 | fld1
|
---|
121 | fdiv dword ptr [ooz_right]
|
---|
122 |
|
---|
123 | //calculate starting fractional and integral values for s and t
|
---|
124 | //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2
|
---|
125 | //ecx = starting_s_coordinate << 16
|
---|
126 | //edx = starting_t_coordinate << 16
|
---|
127 | //dx = starting_light_value
|
---|
128 |
|
---|
129 | mov esi,dword ptr [r1_software_texture_ptr]
|
---|
130 | mov eax,dword ptr [left_s]
|
---|
131 |
|
---|
132 | shr esi,1
|
---|
133 | mov ebx,dword ptr [left_t]
|
---|
134 |
|
---|
135 | sar eax,16
|
---|
136 | mov edx,dword ptr [left_t]
|
---|
137 |
|
---|
138 | sar ebx,16
|
---|
139 | add esi,eax
|
---|
140 |
|
---|
141 | mov cl,byte ptr [r1_software_twidth_log2]
|
---|
142 | shl ebx,cl
|
---|
143 |
|
---|
144 | sal edx,16
|
---|
145 | mov ecx,dword ptr [left_s]
|
---|
146 |
|
---|
147 | sal ecx,16
|
---|
148 | add esi,ebx
|
---|
149 | }
|
---|
150 |
|
---|
151 | while (num_subdivisions)
|
---|
152 | {
|
---|
153 | _asm
|
---|
154 | {
|
---|
155 | //right_s = qftoi(soz_right * right_z);
|
---|
156 | //right_t = qftoi(toz_right * right_z);
|
---|
157 |
|
---|
158 | //right_z is in st0
|
---|
159 | fld st(0)
|
---|
160 |
|
---|
161 | fmul dword ptr [soz_right]
|
---|
162 | fxch st(1)
|
---|
163 |
|
---|
164 | fmul dword ptr [toz_right]
|
---|
165 | fxch st(1)
|
---|
166 |
|
---|
167 | fistp dword ptr [right_s]
|
---|
168 | fistp dword ptr [right_t]
|
---|
169 |
|
---|
170 | //calculate ooz_right, soz_right, toz_right, and right_z for the end of the next span. if there are
|
---|
171 | //more subdivisions, calculate the end of the next span. if there are no more and there is > 1 leftover
|
---|
172 | //in the leftover span, calculate the end of that.
|
---|
173 |
|
---|
174 | //if (num_subdivisions!=1)
|
---|
175 | //{
|
---|
176 | cmp dword ptr [num_subdivisions],1
|
---|
177 | je last_subdivision
|
---|
178 |
|
---|
179 | //ooz_right += (cur_grads.doozdxspan);
|
---|
180 | //soz_right += (cur_grads.dsozdxspan);
|
---|
181 | //toz_right += (cur_grads.dtozdxspan);
|
---|
182 |
|
---|
183 | fld dword ptr [ooz_right]
|
---|
184 | fadd qword ptr [cur_grads]tri_gradients.doozdxspan
|
---|
185 |
|
---|
186 | fld dword ptr [soz_right]
|
---|
187 | fadd dword ptr [cur_grads]tri_gradients.dsozdxspan
|
---|
188 |
|
---|
189 | fld dword ptr [toz_right]
|
---|
190 | fadd dword ptr [cur_grads]tri_gradients.dtozdxspan
|
---|
191 |
|
---|
192 | fxch st(2)
|
---|
193 | fstp dword ptr [ooz_right]
|
---|
194 |
|
---|
195 | fstp dword ptr [soz_right]
|
---|
196 |
|
---|
197 | fstp dword ptr [toz_right]
|
---|
198 |
|
---|
199 | fld1
|
---|
200 | fdiv dword ptr [ooz_right]
|
---|
201 |
|
---|
202 | jmp not_last_subdivision
|
---|
203 | //}
|
---|
204 | //else
|
---|
205 | //if (num_leftover > 1)
|
---|
206 | //{
|
---|
207 |
|
---|
208 | last_subdivision:
|
---|
209 | cmp dword ptr [num_leftover],1
|
---|
210 | jle not_last_subdivision
|
---|
211 |
|
---|
212 | //calculate the right_z for the end of the leftover span
|
---|
213 | //ooz_right += (cur_grads.doozdx * num_leftover);
|
---|
214 | //soz_right += (cur_grads.dsozdx * num_leftover);
|
---|
215 | //toz_right += (cur_grads.dtozdx * num_leftover);
|
---|
216 |
|
---|
217 | fild dword ptr [num_leftover]
|
---|
218 |
|
---|
219 | //todo: pipeline these fpu ops
|
---|
220 | fld qword ptr [cur_grads]tri_gradients.doozdx
|
---|
221 | fmul st(0),st(1)
|
---|
222 | fadd dword ptr [ooz_right]
|
---|
223 | fstp dword ptr [ooz_right]
|
---|
224 |
|
---|
225 | fld dword ptr [cur_grads]tri_gradients.dsozdx
|
---|
226 | fmul st(0),st(1)
|
---|
227 | fadd dword ptr [soz_right]
|
---|
228 | fstp dword ptr [soz_right]
|
---|
229 |
|
---|
230 | fld dword ptr [cur_grads]tri_gradients.dtozdx
|
---|
231 | fmul st(0),st(1)
|
---|
232 | fadd dword ptr [toz_right]
|
---|
233 | fstp dword ptr [toz_right]
|
---|
234 |
|
---|
235 | fstp st(0) //nifty thing i found, a 1 cycle fpu pop
|
---|
236 |
|
---|
237 | fld1
|
---|
238 | fdiv dword ptr [ooz_right]
|
---|
239 | //}
|
---|
240 |
|
---|
241 | not_last_subdivision:
|
---|
242 | //cap the right_s and right_t's so that they're valid
|
---|
243 |
|
---|
244 | mov eax,dword ptr [right_s]
|
---|
245 | mov ebx,dword ptr [right_t]
|
---|
246 |
|
---|
247 | add eax,dword ptr [cur_grads]tri_gradients.s_adjust
|
---|
248 | add ebx,dword ptr [cur_grads]tri_gradients.t_adjust
|
---|
249 |
|
---|
250 | //cap the right s and t
|
---|
251 | cmp eax,0
|
---|
252 | jge cmp_eax_high
|
---|
253 |
|
---|
254 | mov eax,0
|
---|
255 | jmp cmp_ebx_low
|
---|
256 |
|
---|
257 | cmp_eax_high:
|
---|
258 | cmp eax,dword ptr [s_mask]
|
---|
259 | jle cmp_ebx_low
|
---|
260 |
|
---|
261 | mov eax,dword ptr [s_mask]
|
---|
262 |
|
---|
263 | cmp_ebx_low:
|
---|
264 | cmp ebx,0
|
---|
265 | jge cmp_ebx_high
|
---|
266 |
|
---|
267 | mov ebx,0
|
---|
268 | jmp done_compare
|
---|
269 |
|
---|
270 | cmp_ebx_high:
|
---|
271 | cmp ebx,dword ptr [t_mask]
|
---|
272 | jle done_compare
|
---|
273 |
|
---|
274 | mov ebx,dword ptr [t_mask]
|
---|
275 |
|
---|
276 | done_compare:
|
---|
277 |
|
---|
278 | //store the right_s and right_t
|
---|
279 | //so they can be copied into left_s and left_t at the end of the 16-pixel span
|
---|
280 | //(the cant be copied now because we have to calculate (right_s-left_s)>>4 and (right_t-left_t)>>4
|
---|
281 |
|
---|
282 | mov dword ptr [right_s],eax
|
---|
283 | mov dword ptr [right_t],ebx
|
---|
284 |
|
---|
285 | sub eax,dword ptr [left_s]
|
---|
286 | push ebp
|
---|
287 |
|
---|
288 | sar eax,4
|
---|
289 | sub ebx,dword ptr [left_t]
|
---|
290 |
|
---|
291 | sar ebx,4
|
---|
292 | mov word ptr [dsdx_frac+2],ax //this sets the upper 16 bits of dword ptr [dsdx_frac] to ((right_s-left_s)>>4)<<16
|
---|
293 |
|
---|
294 | sar eax,16
|
---|
295 | mov word ptr [dtdx_frac+2],bx //this sets the upper 16 bits of dword ptr [dtdx_frac] to ((right_t-left_t)>>4)<<16
|
---|
296 |
|
---|
297 | sar ebx,16
|
---|
298 | mov cl,byte ptr [r1_software_twidth_log2]
|
---|
299 |
|
---|
300 | shl ebx,cl
|
---|
301 |
|
---|
302 | add eax,ebx
|
---|
303 | mov ebx,0
|
---|
304 |
|
---|
305 | //s_t_carry[1] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2
|
---|
306 | //s_t_carry[0] = integral_dsdx + integral_dtdx<<r1_software_twidth_log2 + r1_software_texture_width
|
---|
307 |
|
---|
308 | mov dword ptr [s_t_carry+4],eax
|
---|
309 | add eax,dword ptr [r1_software_texture_width]
|
---|
310 |
|
---|
311 | mov dword ptr [s_t_carry],eax
|
---|
312 | mov eax,0 //must make sure the high bits of these are zeroed out
|
---|
313 |
|
---|
314 | mov cl,4 //loop is unrolled to 4 pixels - we want to draw 16, so loop 4 times
|
---|
315 | mov bh,byte ptr [last_alpha_accumulated]
|
---|
316 |
|
---|
317 | ALIGN 16
|
---|
318 |
|
---|
319 | //high 16 bits of ecx is the fractional s component
|
---|
320 | //high 16 bits of edx is the fractional t component
|
---|
321 |
|
---|
322 | //eax is used to lookup the texel as well as the low 8-bits of the lit texel
|
---|
323 | //ebx is used to lookup the high 8-bits of the lit texel
|
---|
324 | //ebp is used to detect a t-carry as well as lookup the lit texel
|
---|
325 | //cl is the loop count variable
|
---|
326 |
|
---|
327 | looper1:
|
---|
328 | mov ax,word ptr [esi*2]
|
---|
329 | add edx,dword ptr [dtdx_frac]
|
---|
330 |
|
---|
331 | sbb ebp,ebp
|
---|
332 | mov bl,ah
|
---|
333 |
|
---|
334 | and eax,4095
|
---|
335 | add ecx,dword ptr [dsdx_frac]
|
---|
336 |
|
---|
337 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
338 | and bl,240
|
---|
339 |
|
---|
340 | mov ax,word ptr [alpha_table+eax*2]
|
---|
341 | add bh,bl
|
---|
342 |
|
---|
343 | jnc skip_pixel_1
|
---|
344 |
|
---|
345 | mov word ptr [edi],ax
|
---|
346 | add bh,16
|
---|
347 |
|
---|
348 | skip_pixel_1:
|
---|
349 | mov ax,word ptr [esi*2]
|
---|
350 | add edx,dword ptr [dtdx_frac]
|
---|
351 |
|
---|
352 | sbb ebp,ebp
|
---|
353 | mov bl,ah
|
---|
354 |
|
---|
355 | and eax,4095
|
---|
356 | add ecx,dword ptr [dsdx_frac]
|
---|
357 |
|
---|
358 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
359 | and bl,240
|
---|
360 |
|
---|
361 | mov ax,word ptr [alpha_table+eax*2]
|
---|
362 | add bh,bl
|
---|
363 |
|
---|
364 | jnc skip_pixel_2
|
---|
365 |
|
---|
366 | mov word ptr [edi+2],ax
|
---|
367 | add bh,16
|
---|
368 |
|
---|
369 | skip_pixel_2:
|
---|
370 | mov ax,word ptr [esi*2]
|
---|
371 | add edx,dword ptr [dtdx_frac]
|
---|
372 |
|
---|
373 | sbb ebp,ebp
|
---|
374 | mov bl,ah
|
---|
375 |
|
---|
376 | and eax,4095
|
---|
377 | add ecx,dword ptr [dsdx_frac]
|
---|
378 |
|
---|
379 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
380 | and bl,240
|
---|
381 |
|
---|
382 | mov ax,word ptr [alpha_table+eax*2]
|
---|
383 | add bh,bl
|
---|
384 |
|
---|
385 | jnc skip_pixel_3
|
---|
386 |
|
---|
387 | mov word ptr [edi+4],ax
|
---|
388 | add bh,16
|
---|
389 |
|
---|
390 | skip_pixel_3:
|
---|
391 | mov ax,word ptr [esi*2]
|
---|
392 | add edx,dword ptr [dtdx_frac]
|
---|
393 |
|
---|
394 | sbb ebp,ebp
|
---|
395 | mov bl,ah
|
---|
396 |
|
---|
397 | and eax,4095
|
---|
398 | add ecx,dword ptr [dsdx_frac]
|
---|
399 |
|
---|
400 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
401 | and bl,240
|
---|
402 |
|
---|
403 | mov ax,word ptr [alpha_table+eax*2]
|
---|
404 | add bh,bl
|
---|
405 |
|
---|
406 | jnc skip_pixel_4
|
---|
407 |
|
---|
408 | mov word ptr [edi+6],ax
|
---|
409 | add bh,16
|
---|
410 |
|
---|
411 | skip_pixel_4:
|
---|
412 | add edi,8
|
---|
413 | dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
|
---|
414 |
|
---|
415 | jnz looper1
|
---|
416 | pop ebp
|
---|
417 |
|
---|
418 | mov byte ptr [last_alpha_accumulated],bh
|
---|
419 |
|
---|
420 | //store right_s and right_s in left_s and left_t
|
---|
421 | //right_s is what left_s starts at on the next 16 pixel span
|
---|
422 | //right_t is what left_t starts at on the next 16 pixel span
|
---|
423 |
|
---|
424 | mov eax,dword ptr [right_s]
|
---|
425 | mov ebx,dword ptr [right_t]
|
---|
426 |
|
---|
427 | mov dword ptr [left_s],eax
|
---|
428 | mov dword ptr [left_t],ebx
|
---|
429 | }
|
---|
430 |
|
---|
431 | _asm dec dword ptr [num_subdivisions]
|
---|
432 | }
|
---|
433 |
|
---|
434 | //store these so that the C code below actually works
|
---|
435 | _asm mov dword ptr [start_pixel],edi
|
---|
436 | }
|
---|
437 |
|
---|
438 | if (num_leftover)
|
---|
439 | {
|
---|
440 | if (num_leftover > 1)
|
---|
441 | {
|
---|
442 | if (had_subdivisions==0)
|
---|
443 | {
|
---|
444 | //calculate the right_z for the end of span
|
---|
445 | ooz_right = left->ooz + (cur_grads.doozdx * num_leftover);
|
---|
446 | soz_right = left->soz + (cur_grads.dsozdx * num_leftover);
|
---|
447 | toz_right = left->toz + (cur_grads.dtozdx * num_leftover);
|
---|
448 |
|
---|
449 | //calculate the z at the right endpoint
|
---|
450 | _asm fld1
|
---|
451 | _asm fdiv dword ptr [ooz_right]
|
---|
452 | }
|
---|
453 | else
|
---|
454 | {
|
---|
455 | //the correct ending right_z is already being calculated
|
---|
456 | //(see the if (num_subdivisions!=1) case above
|
---|
457 | }
|
---|
458 |
|
---|
459 | _asm
|
---|
460 | {
|
---|
461 | //calculate starting fractional and integral values for s and t
|
---|
462 |
|
---|
463 | mov esi,dword ptr [r1_software_texture_ptr]
|
---|
464 | mov eax,dword ptr [left_s]
|
---|
465 |
|
---|
466 | shr esi,1
|
---|
467 | mov ebx,dword ptr [left_t]
|
---|
468 |
|
---|
469 | sar eax,16
|
---|
470 | mov edx,dword ptr [left_t]
|
---|
471 |
|
---|
472 | sar ebx,16
|
---|
473 | add esi,eax
|
---|
474 |
|
---|
475 | mov cl,byte ptr [r1_software_twidth_log2]
|
---|
476 | shl ebx,cl
|
---|
477 |
|
---|
478 | sal edx,16
|
---|
479 | mov ecx,dword ptr [left_s]
|
---|
480 |
|
---|
481 | sal ecx,16
|
---|
482 | add esi,ebx
|
---|
483 |
|
---|
484 | mov edi,dword ptr [start_pixel]
|
---|
485 |
|
---|
486 | //calculate the right endpoint
|
---|
487 | //right_s = qftoi(soz_right * right_z) + cur_grads.s_adjust;
|
---|
488 | //right_t = qftoi(toz_right * right_z) + cur_grads.t_adjust;
|
---|
489 |
|
---|
490 | //right_z is in st0
|
---|
491 | fld st(0)
|
---|
492 |
|
---|
493 | fmul dword ptr [soz_right]
|
---|
494 | fxch st(1)
|
---|
495 |
|
---|
496 | fmul dword ptr [toz_right]
|
---|
497 | fxch st(1)
|
---|
498 |
|
---|
499 | fistp dword ptr [right_s]
|
---|
500 | fistp dword ptr [right_t]
|
---|
501 |
|
---|
502 | mov eax,dword ptr [right_s]
|
---|
503 | mov ebx,dword ptr [right_t]
|
---|
504 |
|
---|
505 | add eax,dword ptr [cur_grads]tri_gradients.s_adjust
|
---|
506 | add ebx,dword ptr [cur_grads]tri_gradients.t_adjust
|
---|
507 |
|
---|
508 | //cap the right s and t
|
---|
509 | cmp eax,0
|
---|
510 | jge cmp_eax_high_2
|
---|
511 |
|
---|
512 | mov eax,0
|
---|
513 | jmp cmp_ebx_low_2
|
---|
514 |
|
---|
515 | cmp_eax_high_2:
|
---|
516 | cmp eax,dword ptr [s_mask]
|
---|
517 | jle cmp_ebx_low_2
|
---|
518 |
|
---|
519 | mov eax,dword ptr [s_mask]
|
---|
520 |
|
---|
521 | cmp_ebx_low_2:
|
---|
522 | cmp ebx,0
|
---|
523 | jge cmp_ebx_high_2
|
---|
524 |
|
---|
525 | mov ebx,0
|
---|
526 | jmp done_compare_2
|
---|
527 |
|
---|
528 | cmp_ebx_high_2:
|
---|
529 | cmp ebx,dword ptr [t_mask]
|
---|
530 | jle done_compare_2
|
---|
531 |
|
---|
532 | mov ebx,dword ptr [t_mask]
|
---|
533 |
|
---|
534 | done_compare_2:
|
---|
535 |
|
---|
536 | //calculate the deltas (left to right)
|
---|
537 | //temp_dsdx = qftoi((float)(right_s - left_s) * inverse_leftover_lookup[num_leftover]);
|
---|
538 | //temp_dtdx = qftoi((float)(right_t - left_t) * inverse_leftover_lookup[num_leftover]);
|
---|
539 |
|
---|
540 | push ebp
|
---|
541 | mov ebp,num_leftover
|
---|
542 |
|
---|
543 | sub eax,dword ptr [left_s]
|
---|
544 | sub ebx,dword ptr [left_t]
|
---|
545 |
|
---|
546 | mov dword ptr [temp_dsdx],eax
|
---|
547 | mov dword ptr [temp_dtdx],ebx
|
---|
548 |
|
---|
549 | fild dword ptr [temp_dsdx]
|
---|
550 | fild dword ptr [temp_dtdx]
|
---|
551 |
|
---|
552 | fmul dword ptr [inverse_leftover_lookup + ebp*4]
|
---|
553 | fxch st(1)
|
---|
554 |
|
---|
555 | fmul dword ptr [inverse_leftover_lookup + ebp*4]
|
---|
556 | fxch st(1)
|
---|
557 |
|
---|
558 | fistp dword ptr [temp_dtdx]
|
---|
559 | fistp dword ptr [temp_dsdx]
|
---|
560 |
|
---|
561 | //calculate the fractional and integral delta vars
|
---|
562 | //s_t_carry[0] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2) + r1_software_texture_width;
|
---|
563 | //s_t_carry[1] = (temp_dsdx>>16) + ((temp_dtdx>>16)<<r1_software_twidth_log2);
|
---|
564 | //dsdx_frac = (temp_dsdx<<16);
|
---|
565 | //dtdx_frac = (temp_dtdx<<16);
|
---|
566 |
|
---|
567 | mov eax,dword ptr [temp_dsdx]
|
---|
568 | mov ebx,dword ptr [temp_dtdx]
|
---|
569 |
|
---|
570 | mov word ptr [dsdx_frac+2],ax
|
---|
571 | mov word ptr [dtdx_frac+2],bx
|
---|
572 |
|
---|
573 | sar eax,16
|
---|
574 | nop //mov dx,word ptr [left_l]
|
---|
575 |
|
---|
576 | sar ebx,16
|
---|
577 | mov cl,byte ptr [r1_software_twidth_log2]
|
---|
578 |
|
---|
579 | shl ebx,cl
|
---|
580 |
|
---|
581 | add eax,ebx
|
---|
582 | nop //mov ebx,0
|
---|
583 |
|
---|
584 | mov dword ptr [s_t_carry+4],eax
|
---|
585 | add eax,dword ptr [r1_software_texture_width]
|
---|
586 |
|
---|
587 | mov dword ptr [s_t_carry],eax
|
---|
588 | mov cl, byte ptr [num_leftover]
|
---|
589 |
|
---|
590 | mov eax,0
|
---|
591 | mov bl,byte ptr [last_alpha_accumulated]
|
---|
592 |
|
---|
593 | ALIGN 16
|
---|
594 |
|
---|
595 | looper3:
|
---|
596 | mov ax,word ptr [esi*2]
|
---|
597 | add edx,dword ptr [dtdx_frac]
|
---|
598 |
|
---|
599 | sbb ebp,ebp
|
---|
600 | add ecx,dword ptr [dsdx_frac]
|
---|
601 |
|
---|
602 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
603 | add bl,ah
|
---|
604 |
|
---|
605 | jnc skip_a_pixel
|
---|
606 |
|
---|
607 | and eax,4095
|
---|
608 | mov ax,word ptr [alpha_table+eax*2]
|
---|
609 | mov word ptr [edi],ax
|
---|
610 |
|
---|
611 | skip_a_pixel:
|
---|
612 | and bl,240
|
---|
613 | add edi,2
|
---|
614 |
|
---|
615 | dec cl
|
---|
616 | jnz looper3
|
---|
617 |
|
---|
618 | pop ebp
|
---|
619 | }
|
---|
620 | }
|
---|
621 | else
|
---|
622 | {
|
---|
623 | //highly unoptimized single pixel drawer
|
---|
624 | register w16 texel = *( r1_software_texture_ptr + (left_s>>16) + ((left_t>>16) << r1_software_twidth_log2) );
|
---|
625 |
|
---|
626 | if (texel & (15<<12) == (15<<12))
|
---|
627 | {
|
---|
628 | //*start_pixel = alpha_table[texel & 4095];
|
---|
629 | }
|
---|
630 | }
|
---|
631 | }
|
---|
632 | }
|
---|