[80] | 1 | /********************************************************************** <BR>
|
---|
| 2 | This file is part of Crack dot Com's free source code release of
|
---|
| 3 | Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for
|
---|
| 4 | information about compiling & licensing issues visit this URL</a>
|
---|
| 5 | <PRE> If that doesn't help, contact Jonathan Clark at
|
---|
| 6 | golgotha_source@usa.net (Subject should have "GOLG" in it)
|
---|
| 7 | ***********************************************************************/
|
---|
| 8 |
|
---|
| 9 | #include "software/r1_software_globals.hh"
|
---|
| 10 | #include "software/inline_fpu.hh"
|
---|
| 11 |
|
---|
| 12 | w32 *texture_affine_lit_starter_amd3d()
|
---|
| 13 | {
|
---|
| 14 | w32 returnval;
|
---|
| 15 | _asm
|
---|
| 16 | {
|
---|
| 17 | mov eax,OFFSET dumb_addr
|
---|
| 18 | dumb_addr:
|
---|
| 19 | mov returnval,eax
|
---|
| 20 | }
|
---|
| 21 | return (w32 *)returnval;
|
---|
| 22 | }
|
---|
| 23 |
|
---|
| 24 | extern w8 last_bh;
|
---|
| 25 |
|
---|
| 26 | void texture_scanline_affine_lit_amd3d(w16 *start_pixel,
|
---|
| 27 | sw32 start_x,
|
---|
| 28 | void *left,//perspective_span *left,
|
---|
| 29 | sw32 width)
|
---|
| 30 | {
|
---|
| 31 | start_pixel = (w16 *)((w8 *)start_pixel + start_x);
|
---|
| 32 | last_bh = 0;
|
---|
| 33 |
|
---|
| 34 | _asm
|
---|
| 35 | {
|
---|
| 36 | //num_subdivisions = width >> 4;
|
---|
| 37 | //num_leftover = width & 15;
|
---|
| 38 |
|
---|
| 39 | mov edi,dword ptr [left]
|
---|
| 40 |
|
---|
| 41 | mov eax,dword ptr [width]
|
---|
| 42 | mov ebx,dword ptr [width]
|
---|
| 43 |
|
---|
| 44 | shr ebx,4
|
---|
| 45 | and eax,15
|
---|
| 46 |
|
---|
| 47 | mov dword ptr [num_leftover],eax
|
---|
| 48 | mov dword ptr [num_subdivisions],ebx
|
---|
| 49 |
|
---|
| 50 | //esi = starting_s_coordinate >> 16 + starting_t_coordinate >> 16 << r1_software_twidth_log2
|
---|
| 51 | //ecx = starting_s_coordinate << 16
|
---|
| 52 | //edx = starting_t_coordinate << 16
|
---|
| 53 | //dx = starting_light_value
|
---|
| 54 |
|
---|
| 55 | mov eax,dword ptr [edi]affine_span.s
|
---|
| 56 | mov ebx,dword ptr [edi]affine_span.t
|
---|
| 57 |
|
---|
| 58 | sar eax,16
|
---|
| 59 | mov esi,dword ptr [r1_software_texture_ptr]
|
---|
| 60 |
|
---|
| 61 | sar ebx,16
|
---|
| 62 | mov ecx,dword ptr [edi]affine_span.s
|
---|
| 63 |
|
---|
| 64 | shr esi,1
|
---|
| 65 | mov cl,byte ptr [r1_software_twidth_log2]
|
---|
| 66 |
|
---|
| 67 | shl ebx,cl
|
---|
| 68 |
|
---|
| 69 | sal ecx,16
|
---|
| 70 | mov edx,dword ptr [edi]affine_span.t
|
---|
| 71 |
|
---|
| 72 | sal edx,16
|
---|
| 73 | add eax,ebx
|
---|
| 74 |
|
---|
| 75 | mov dx,word ptr [edi+AFFINE_SPAN_L]
|
---|
| 76 | mov edi,dword ptr [start_pixel]
|
---|
| 77 |
|
---|
| 78 | add esi,eax
|
---|
| 79 | push ebp
|
---|
| 80 |
|
---|
| 81 | mov ch,dl //copy the initial error from the 1st lighting value
|
---|
| 82 |
|
---|
| 83 | mov ebx,0
|
---|
| 84 | mov eax,0 //must make sure the high bits of these are zeroed out
|
---|
| 85 | }
|
---|
| 86 |
|
---|
| 87 | while (num_subdivisions)
|
---|
| 88 | {
|
---|
| 89 | _asm
|
---|
| 90 | {
|
---|
| 91 | mov cl,4
|
---|
| 92 | mov bh,byte ptr [last_bh] //necessary? bh should be preserved from the bottom of the loop..
|
---|
| 93 |
|
---|
| 94 | add ch,0 //this is used to clear the carry flag (the actual clc instruction takes 2 cycles. stupid.)
|
---|
| 95 |
|
---|
| 96 | ALIGN 16
|
---|
| 97 |
|
---|
| 98 | //high 16 bits of ecx is the fractional s component
|
---|
| 99 | //high 16 bits of edx is the fractional t component
|
---|
| 100 |
|
---|
| 101 | //eax is used to lookup the texel as well as the low 8-bits of the lit texel
|
---|
| 102 | //ebx is used to lookup the high 8-bits of the lit texel
|
---|
| 103 | //ebp is used to detect a t-carry as well as lookup the lit texel
|
---|
| 104 | //cl is the loop count variable
|
---|
| 105 | //dx is the lighting value (8 bits integer, 8 bits fraction)
|
---|
| 106 | //ch is the lighting error
|
---|
| 107 | //bh is used to dither the lighting (mov bh,0 then add ch,dl then adc bh,dh)
|
---|
| 108 |
|
---|
| 109 | looper1:
|
---|
| 110 | adc bh,dh
|
---|
| 111 | add edi,8 //the only convenient place for the stepping of edi was way up here
|
---|
| 112 |
|
---|
| 113 | movzx eax,word ptr [esi*2]
|
---|
| 114 | add edx,dword ptr [dtdx_frac]
|
---|
| 115 |
|
---|
| 116 | sbb ebp,ebp
|
---|
| 117 | mov bl,ah
|
---|
| 118 |
|
---|
| 119 | add ecx,dword ptr [dsdx_frac]
|
---|
| 120 | mov ah,bh
|
---|
| 121 |
|
---|
| 122 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
| 123 | mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
|
---|
| 124 |
|
---|
| 125 | add ebp,dword ptr [0xDEADBEEF+eax*4]
|
---|
| 126 | add edx,dword ptr [dldx_fixed]
|
---|
| 127 |
|
---|
| 128 | mov bh,0
|
---|
| 129 | add ch,dl
|
---|
| 130 |
|
---|
| 131 | mov word ptr [edi-8],bp
|
---|
| 132 | adc bh,dh
|
---|
| 133 |
|
---|
| 134 | movzx eax,word ptr [esi*2] //first pixel is finished aroundhere
|
---|
| 135 | add edx,dword ptr [dtdx_frac]
|
---|
| 136 |
|
---|
| 137 | sbb ebp,ebp
|
---|
| 138 | mov bl,ah
|
---|
| 139 |
|
---|
| 140 | add ecx,dword ptr [dsdx_frac]
|
---|
| 141 | mov ah,bh
|
---|
| 142 |
|
---|
| 143 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
| 144 | mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
|
---|
| 145 |
|
---|
| 146 | add ebp,dword ptr [0xDEADBEEF+eax*4]
|
---|
| 147 | add edx,dword ptr [dldx_fixed]
|
---|
| 148 |
|
---|
| 149 | mov bh,0
|
---|
| 150 | add ch,dl
|
---|
| 151 |
|
---|
| 152 | mov word ptr [edi-6],bp
|
---|
| 153 | adc bh,dh
|
---|
| 154 |
|
---|
| 155 | movzx eax,word ptr [esi*2]
|
---|
| 156 | add edx,dword ptr [dtdx_frac]
|
---|
| 157 |
|
---|
| 158 | sbb ebp,ebp
|
---|
| 159 | mov bl,ah
|
---|
| 160 |
|
---|
| 161 | add ecx,dword ptr [dsdx_frac]
|
---|
| 162 | mov ah,bh
|
---|
| 163 |
|
---|
| 164 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
| 165 | mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
|
---|
| 166 |
|
---|
| 167 | add ebp,dword ptr [0xDEADBEEF+eax*4]
|
---|
| 168 | add edx,dword ptr [dldx_fixed]
|
---|
| 169 |
|
---|
| 170 | mov bh,0
|
---|
| 171 | add ch,dl
|
---|
| 172 |
|
---|
| 173 | mov word ptr [edi-4],bp
|
---|
| 174 | adc bh,dh
|
---|
| 175 |
|
---|
| 176 | movzx eax,word ptr [esi*2] //first pixel is finished aroundhere
|
---|
| 177 | add edx,dword ptr [dtdx_frac]
|
---|
| 178 |
|
---|
| 179 | sbb ebp,ebp
|
---|
| 180 | mov bl,ah
|
---|
| 181 |
|
---|
| 182 | add ecx,dword ptr [dsdx_frac]
|
---|
| 183 | mov ah,bh
|
---|
| 184 |
|
---|
| 185 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
| 186 | mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
|
---|
| 187 |
|
---|
| 188 | add ebp,dword ptr [0xDEADBEEF+eax*4]
|
---|
| 189 | add edx,dword ptr [dldx_fixed]
|
---|
| 190 |
|
---|
| 191 | mov bh,0
|
---|
| 192 | add ch,dl
|
---|
| 193 |
|
---|
| 194 | mov word ptr [edi-2],bp
|
---|
| 195 | dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
|
---|
| 196 |
|
---|
| 197 | jnz looper1
|
---|
| 198 |
|
---|
| 199 | adc bh,0 //if we're done looping, save the last carry information here
|
---|
| 200 | mov byte ptr [last_bh],bh
|
---|
| 201 | }
|
---|
| 202 |
|
---|
| 203 | _asm dec dword ptr [num_subdivisions]
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | if (num_leftover)
|
---|
| 207 | {
|
---|
| 208 | _asm
|
---|
| 209 | {
|
---|
| 210 | mov cl, byte ptr [num_leftover]
|
---|
| 211 | mov bh, byte ptr [last_bh] //necessary?
|
---|
| 212 |
|
---|
| 213 | add ch,0 //this is used to clear the carry flag (the actual clc instruction takes 2 cycles. stupid.)
|
---|
| 214 |
|
---|
| 215 | ALIGN 16
|
---|
| 216 |
|
---|
| 217 | looper3:
|
---|
| 218 | adc bh,dh
|
---|
| 219 | add edi,2
|
---|
| 220 |
|
---|
| 221 | movzx eax,word ptr [esi*2]
|
---|
| 222 | add edx,dword ptr [dtdx_frac]
|
---|
| 223 |
|
---|
| 224 | sbb ebp,ebp
|
---|
| 225 | mov bl,ah
|
---|
| 226 |
|
---|
| 227 | add ecx,dword ptr [dsdx_frac]
|
---|
| 228 | mov ah,bh
|
---|
| 229 |
|
---|
| 230 | adc esi,dword ptr [4+s_t_carry+ebp*4]
|
---|
| 231 | mov ebp,dword ptr [0xDEADBEEF+ctable_size_bytes+ebx*4]
|
---|
| 232 |
|
---|
| 233 | add ebp,dword ptr [0xDEADBEEF+eax*4]
|
---|
| 234 | add edx,dword ptr [dldx_fixed]
|
---|
| 235 |
|
---|
| 236 | mov bh,0
|
---|
| 237 | add ch,dl
|
---|
| 238 |
|
---|
| 239 | mov word ptr [edi-2],bp
|
---|
| 240 | dec cl //thank god this doesnt modify the carry flag (the above add ch,dl needs to effect the adc bh,dh at the top of the loop)
|
---|
| 241 |
|
---|
| 242 | jnz looper3
|
---|
| 243 | }
|
---|
| 244 | }
|
---|
| 245 | _asm pop ebp
|
---|
| 246 | }
|
---|
| 247 |
|
---|
| 248 | w32 *texture_affine_lit_sentinel_amd3d()
|
---|
| 249 | {
|
---|
| 250 | w32 returnval;
|
---|
| 251 | _asm
|
---|
| 252 | {
|
---|
| 253 | mov eax,OFFSET dumb_addr
|
---|
| 254 | dumb_addr:
|
---|
| 255 | mov returnval,eax
|
---|
| 256 | }
|
---|
| 257 | return (w32 *)returnval;
|
---|
| 258 | }
|
---|
| 259 |
|
---|
| 260 | void insert_color_modify_address_low(w32 *address);
|
---|
| 261 | void insert_color_modify_address_high(w32 *address);
|
---|
| 262 | extern w32 color_modify_list[];
|
---|
| 263 | extern sw32 num_color_modifies;
|
---|
| 264 |
|
---|
| 265 | void setup_color_modify_affine_lit_amd3d()
|
---|
| 266 | {
|
---|
| 267 | w32 *stop = texture_affine_lit_sentinel_amd3d();
|
---|
| 268 |
|
---|
| 269 | w32 *search = texture_affine_lit_starter_amd3d();
|
---|
| 270 | //start searching for 0xDEADBEEF
|
---|
| 271 | while (search < stop)
|
---|
| 272 | {
|
---|
| 273 | //casting craziness
|
---|
| 274 | search = (w32 *)((w8 *)search + 1);
|
---|
| 275 | if (*search==0xDEADBEEF)
|
---|
| 276 | {
|
---|
| 277 | insert_color_modify_address_low(search);
|
---|
| 278 | }
|
---|
| 279 | else
|
---|
| 280 | if (*search==(0xDEADBEEF + ctable_size_bytes))
|
---|
| 281 | {
|
---|
| 282 | insert_color_modify_address_high(search);
|
---|
| 283 | }
|
---|
| 284 | }
|
---|
| 285 | }
|
---|