r_spr8.asm (22105B)
1 .386P 2 .model FLAT 3 ; 4 ; d_spr8.s 5 ; x86 assembly-language horizontal 8-bpp transparent span-drawing code. 6 ; 7 8 include qasm.inc 9 include d_if.inc 10 11 if id386 12 13 ;---------------------------------------------------------------------- 14 ; 8-bpp horizontal span drawing code for polygons, with transparency. 15 ;---------------------------------------------------------------------- 16 17 _TEXT SEGMENT 18 19 ; out-of-line, rarely-needed clamping code 20 21 LClampHigh0: 22 mov esi,ds:dword ptr[_bbextents] 23 jmp LClampReentry0 24 LClampHighOrLow0: 25 jg LClampHigh0 26 xor esi,esi 27 jmp LClampReentry0 28 29 LClampHigh1: 30 mov edx,ds:dword ptr[_bbextentt] 31 jmp LClampReentry1 32 LClampHighOrLow1: 33 jg LClampHigh1 34 xor edx,edx 35 jmp LClampReentry1 36 37 LClampLow2: 38 mov ebp,2048 39 jmp LClampReentry2 40 LClampHigh2: 41 mov ebp,ds:dword ptr[_bbextents] 42 jmp LClampReentry2 43 44 LClampLow3: 45 mov ecx,2048 46 jmp LClampReentry3 47 LClampHigh3: 48 mov ecx,ds:dword ptr[_bbextentt] 49 jmp LClampReentry3 50 51 LClampLow4: 52 mov eax,2048 53 jmp LClampReentry4 54 LClampHigh4: 55 mov eax,ds:dword ptr[_bbextents] 56 jmp LClampReentry4 57 58 LClampLow5: 59 mov ebx,2048 60 jmp LClampReentry5 61 LClampHigh5: 62 mov ebx,ds:dword ptr[_bbextentt] 63 jmp LClampReentry5 64 65 66 pspans equ 4+16 67 68 align 4 69 public _D_SpriteDrawSpansXXX 70 _D_SpriteDrawSpansXXX: 71 push ebp ; preserve caller's stack frame 72 push edi 73 push esi ; preserve register variables 74 push ebx 75 76 ; 77 ; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock 78 ; and span list pointers, and 1/z step in 0.32 fixed-point 79 ; 80 ; FIXME: any overlap from rearranging? 81 fld ds:dword ptr[_d_sdivzstepu] 82 fmul ds:dword ptr[fp_8] 83 mov edx,ds:dword ptr[_cacheblock] 84 fld ds:dword ptr[_d_tdivzstepu] 85 fmul ds:dword ptr[fp_8] 86 mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor 87 fld ds:dword ptr[_d_zistepu] 88 fmul ds:dword ptr[fp_8] 89 mov ds:dword ptr[pbase],edx ; pbase = cacheblock 90 fld ds:dword ptr[_d_zistepu] 91 fmul ds:dword ptr[fp_64kx64k] 92 fxch st(3) 93 fstp ds:dword ptr[sdivz8stepu] 94 fstp ds:dword ptr[zi8stepu] 95 fstp ds:dword ptr[tdivz8stepu] 96 fistp ds:dword ptr[izistep] 97 mov eax,ds:dword ptr[izistep] 98 ror eax,16 ; put upper 16 bits in low word 99 mov ecx,ds:dword ptr[sspan_t_count+ebx] 100 mov ds:dword ptr[izistep],eax 101 102 cmp ecx,0 103 jle LNextSpan 104 105 LSpanLoop: 106 107 ; 108 ; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the 109 ; initial s and t values 110 ; 111 ; FIXME: pipeline FILD? 112 fild ds:dword ptr[sspan_t_v+ebx] 113 fild ds:dword ptr[sspan_t_u+ebx] 114 115 fld st(1) ; dv | du | dv 116 fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv 117 fld st(1) ; du | dv*d_sdivzstepv | du | dv 118 fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 119 fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 120 fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu | 121 ; dv*d_sdivzstepv | du | dv 122 fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu | 123 ; dv*d_sdivzstepv | du | dv 124 faddp st(2),st(0) ; du*d_tdivzstepu | 125 ; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv 126 fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv | 127 ; du*d_tdivzstepu | du | dv 128 fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv | 129 ; du*d_tdivzstepu | du | dv 130 fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv | 131 ; du*d_sdivzstepu + dv*d_sdivzstepv | 132 ; du*d_tdivzstepu | du | dv 133 fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv | 134 ; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv 135 fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv + 136 ; du*d_sdivzstepu; stays in %st(2) at end 137 fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | 138 ; s/z 139 fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv | 140 ; du*d_tdivzstepu | du | s/z 141 fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv | 142 ; du*d_tdivzstepu | du | s/z 143 faddp st(2),st(0) ; dv*d_zistepv | 144 ; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z 145 fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu | 146 ; dv*d_zistepv | s/z 147 fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu | 148 ; dv*d_tdivzstepv + du*d_tdivzstepu | 149 ; dv*d_zistepv | s/z 150 fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu | 151 ; du*d_zistepu | dv*d_zistepv | s/z 152 fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv + 153 ; du*d_tdivzstepu; stays in %st(1) at end 154 fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z 155 faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z 156 157 fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z 158 fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z 159 fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv + 160 ; du*d_zistepu; stays in %st(0) at end 161 ; 1/z | fp_64k | t/z | s/z 162 163 fld st(0) ; FIXME: get rid of stall on FMUL? 164 fmul ds:dword ptr[fp_64kx64k] 165 fxch st(1) 166 167 ; 168 ; calculate and clamp s & t 169 ; 170 fdiv st(2),st(0) ; 1/z | z*64k | t/z | s/z 171 fxch st(1) 172 173 fistp ds:dword ptr[izi] ; 0.32 fixed-point 1/z 174 mov ebp,ds:dword ptr[izi] 175 176 ; 177 ; set pz to point to the first z-buffer pixel in the span 178 ; 179 ror ebp,16 ; put upper 16 bits in low word 180 mov eax,ds:dword ptr[sspan_t_v+ebx] 181 mov ds:dword ptr[izi],ebp 182 mov ebp,ds:dword ptr[sspan_t_u+ebx] 183 imul ds:dword ptr[_d_zrowbytes] 184 shl ebp,1 ; a word per pixel 185 add eax,ds:dword ptr[_d_pzbuffer] 186 add eax,ebp 187 mov ds:dword ptr[pz],eax 188 189 ; 190 ; point %edi to the first pixel in the span 191 ; 192 mov ebp,ds:dword ptr[_d_viewbuffer] 193 mov eax,ds:dword ptr[sspan_t_v+ebx] 194 push ebx ; preserve spans pointer 195 mov edx,ds:dword ptr[_tadjust] 196 mov esi,ds:dword ptr[_sadjust] 197 mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth 198 add edi,ebp 199 mov ebp,ds:dword ptr[sspan_t_u+ebx] 200 add edi,ebp ; pdest = &pdestspan[scans->u]; 201 202 ; 203 ; now start the FDIV for the end of the span 204 ; 205 cmp ecx,8 206 ja LSetupNotLast1 207 208 dec ecx 209 jz LCleanup1 ; if only one pixel, no need to start an FDIV 210 mov ds:dword ptr[spancountminus1],ecx 211 212 ; finish up the s and t calcs 213 fxch st(1) ; z*64k | 1/z | t/z | s/z 214 215 fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z 216 fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z 217 fxch st(1) ; z*64k | s | 1/z | t/z | s/z 218 fmul st(0),st(3) ; t | s | 1/z | t/z | s/z 219 fxch st(1) ; s | t | 1/z | t/z | s/z 220 fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z 221 fistp ds:dword ptr[t] ; 1/z | t/z | s/z 222 223 fild ds:dword ptr[spancountminus1] 224 225 fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | spancountminus1 226 fld ds:dword ptr[_d_zistepu] ; _d_zistepu | _d_tdivzstepu | spancountminus1 227 fmul st(0),st(2) ; _d_zistepu*scm1 | _d_tdivzstepu | scm1 228 fxch st(1) ; _d_tdivzstepu | _d_zistepu*scm1 | scm1 229 fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 230 fxch st(2) ; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 231 fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_zistepu*scm1 | 232 ; _d_tdivzstepu*scm1 233 fxch st(1) ; _d_zistepu*scm1 | _d_sdivzstepu*scm1 | 234 ; _d_tdivzstepu*scm1 235 faddp st(3),st(0) ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 236 fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 237 faddp st(3),st(0) ; _d_sdivzstepu*scm1 238 faddp st(3),st(0) 239 240 fld ds:dword ptr[fp_64k] 241 fdiv st(0),st(1) ; this is what we've gone to all this trouble to 242 ; overlap 243 jmp LFDIVInFlight1 244 245 LCleanup1: 246 ; finish up the s and t calcs 247 fxch st(1) ; z*64k | 1/z | t/z | s/z 248 249 fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z 250 fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z 251 fxch st(1) ; z*64k | s | 1/z | t/z | s/z 252 fmul st(0),st(3) ; t | s | 1/z | t/z | s/z 253 fxch st(1) ; s | t | 1/z | t/z | s/z 254 fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z 255 fistp ds:dword ptr[t] ; 1/z | t/z | s/z 256 jmp LFDIVInFlight1 257 258 align 4 259 LSetupNotLast1: 260 ; finish up the s and t calcs 261 fxch st(1) ; z*64k | 1/z | t/z | s/z 262 263 fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z 264 fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z 265 fxch st(1) ; z*64k | s | 1/z | t/z | s/z 266 fmul st(0),st(3) ; t | s | 1/z | t/z | s/z 267 fxch st(1) ; s | t | 1/z | t/z | s/z 268 fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z 269 fistp ds:dword ptr[t] ; 1/z | t/z | s/z 270 271 fadd ds:dword ptr[zi8stepu] 272 fxch st(2) 273 fadd ds:dword ptr[sdivz8stepu] 274 fxch st(2) 275 fld ds:dword ptr[tdivz8stepu] 276 faddp st(2),st(0) 277 fld ds:dword ptr[fp_64k] 278 fdiv st(0),st(1) ; z = 1/1/z 279 ; this is what we've gone to all this trouble to 280 ; overlap 281 LFDIVInFlight1: 282 283 add esi,ds:dword ptr[s] 284 add edx,ds:dword ptr[t] 285 mov ebx,ds:dword ptr[_bbextents] 286 mov ebp,ds:dword ptr[_bbextentt] 287 cmp esi,ebx 288 ja LClampHighOrLow0 289 LClampReentry0: 290 mov ds:dword ptr[s],esi 291 mov ebx,ds:dword ptr[pbase] 292 shl esi,16 293 cmp edx,ebp 294 mov ds:dword ptr[sfracf],esi 295 ja LClampHighOrLow1 296 LClampReentry1: 297 mov ds:dword ptr[t],edx 298 mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac; 299 shl edx,16 300 mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac; 301 sar esi,16 302 mov ds:dword ptr[tfracf],edx 303 304 ; 305 ; calculate the texture starting address 306 ; 307 sar eax,16 308 add esi,ebx 309 imul eax,ds:dword ptr[_cachewidth] ; (tfrac >> 16) * cachewidth 310 add esi,eax ; psource = pbase + (sfrac >> 16) + 311 ; ((tfrac >> 16) * cachewidth); 312 313 ; 314 ; determine whether last span or not 315 ; 316 cmp ecx,8 317 jna LLastSegment 318 319 ; 320 ; not the last segment; do full 8-wide segment 321 ; 322 LNotLastSegment: 323 324 ; 325 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 326 ; get there 327 ; 328 329 ; pick up after the FDIV that was left in flight previously 330 331 fld st(0) ; duplicate it 332 fmul st(0),st(4) ; s = s/z * z 333 fxch st(1) 334 fmul st(0),st(3) ; t = t/z * z 335 fxch st(1) 336 fistp ds:dword ptr[snext] 337 fistp ds:dword ptr[tnext] 338 mov eax,ds:dword ptr[snext] 339 mov edx,ds:dword ptr[tnext] 340 341 sub ecx,8 ; count off this segments' pixels 342 mov ebp,ds:dword ptr[_sadjust] 343 push ecx ; remember count of remaining pixels 344 mov ecx,ds:dword ptr[_tadjust] 345 346 add ebp,eax 347 add ecx,edx 348 349 mov eax,ds:dword ptr[_bbextents] 350 mov edx,ds:dword ptr[_bbextentt] 351 352 cmp ebp,2048 353 jl LClampLow2 354 cmp ebp,eax 355 ja LClampHigh2 356 LClampReentry2: 357 358 cmp ecx,2048 359 jl LClampLow3 360 cmp ecx,edx 361 ja LClampHigh3 362 LClampReentry3: 363 364 mov ds:dword ptr[snext],ebp 365 mov ds:dword ptr[tnext],ecx 366 367 sub ebp,ds:dword ptr[s] 368 sub ecx,ds:dword ptr[t] 369 370 ; 371 ; set up advancetable 372 ; 373 mov eax,ecx 374 mov edx,ebp 375 sar edx,19 ; sstep >>= 16; 376 mov ebx,ds:dword ptr[_cachewidth] 377 sar eax,19 ; tstep >>= 16; 378 jz LIsZero 379 imul eax,ebx ; (tstep >> 16) * cachewidth; 380 LIsZero: 381 add eax,edx ; add in sstep 382 ; (tstep >> 16) * cachewidth + (sstep >> 16); 383 mov edx,ds:dword ptr[tfracf] 384 mov ds:dword ptr[advancetable+4],eax ; advance base in t 385 add eax,ebx ; ((tstep >> 16) + 1) * cachewidth + 386 ; (sstep >> 16); 387 shl ebp,13 ; left-justify sstep fractional part 388 mov ds:dword ptr[sstep],ebp 389 mov ebx,ds:dword ptr[sfracf] 390 shl ecx,13 ; left-justify tstep fractional part 391 mov ds:dword ptr[advancetable],eax ; advance extra in t 392 mov ds:dword ptr[tstep],ecx 393 394 mov ecx,ds:dword ptr[pz] 395 mov ebp,ds:dword ptr[izi] 396 397 cmp bp,ds:word ptr[ecx] 398 jl Lp1 399 mov al,ds:byte ptr[esi] ; get first source texel 400 cmp al,offset TRANSPARENT_COLOR 401 jz Lp1 402 mov ds:word ptr[ecx],bp 403 mov ds:byte ptr[edi],al ; store first dest pixel 404 Lp1: 405 add ebp,ds:dword ptr[izistep] 406 adc ebp,0 407 add edx,ds:dword ptr[tstep] ; advance tfrac fractional part by tstep frac 408 409 sbb eax,eax ; turn tstep carry into -1 (0 if none) 410 add ebx,ds:dword ptr[sstep] ; advance sfrac fractional part by sstep frac 411 adc esi,ds:dword ptr[advancetable+4+eax*4] ; point to next source texel 412 413 cmp bp,ds:word ptr[2+ecx] 414 jl Lp2 415 mov al,ds:byte ptr[esi] 416 cmp al,offset TRANSPARENT_COLOR 417 jz Lp2 418 mov ds:word ptr[2+ecx],bp 419 mov ds:byte ptr[1+edi],al 420 Lp2: 421 add ebp,ds:dword ptr[izistep] 422 adc ebp,0 423 add edx,ds:dword ptr[tstep] 424 sbb eax,eax 425 add ebx,ds:dword ptr[sstep] 426 adc esi,ds:dword ptr[advancetable+4+eax*4] 427 428 cmp bp,ds:word ptr[4+ecx] 429 jl Lp3 430 mov al,ds:byte ptr[esi] 431 cmp al,offset TRANSPARENT_COLOR 432 jz Lp3 433 mov ds:word ptr[4+ecx],bp 434 mov ds:byte ptr[2+edi],al 435 Lp3: 436 add ebp,ds:dword ptr[izistep] 437 adc ebp,0 438 add edx,ds:dword ptr[tstep] 439 sbb eax,eax 440 add ebx,ds:dword ptr[sstep] 441 adc esi,ds:dword ptr[advancetable+4+eax*4] 442 443 cmp bp,ds:word ptr[6+ecx] 444 jl Lp4 445 mov al,ds:byte ptr[esi] 446 cmp al,offset TRANSPARENT_COLOR 447 jz Lp4 448 mov ds:word ptr[6+ecx],bp 449 mov ds:byte ptr[3+edi],al 450 Lp4: 451 add ebp,ds:dword ptr[izistep] 452 adc ebp,0 453 add edx,ds:dword ptr[tstep] 454 sbb eax,eax 455 add ebx,ds:dword ptr[sstep] 456 adc esi,ds:dword ptr[advancetable+4+eax*4] 457 458 cmp bp,ds:word ptr[8+ecx] 459 jl Lp5 460 mov al,ds:byte ptr[esi] 461 cmp al,offset TRANSPARENT_COLOR 462 jz Lp5 463 mov ds:word ptr[8+ecx],bp 464 mov ds:byte ptr[4+edi],al 465 Lp5: 466 add ebp,ds:dword ptr[izistep] 467 adc ebp,0 468 add edx,ds:dword ptr[tstep] 469 sbb eax,eax 470 add ebx,ds:dword ptr[sstep] 471 adc esi,ds:dword ptr[advancetable+4+eax*4] 472 473 ; 474 ; start FDIV for end of next segment in flight, so it can overlap 475 ; 476 pop eax 477 cmp eax,8 ; more than one segment after this? 478 ja LSetupNotLast2 ; yes 479 480 dec eax 481 jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV 482 mov ds:dword ptr[spancountminus1],eax 483 fild ds:dword ptr[spancountminus1] 484 485 fld ds:dword ptr[_d_zistepu] ; _d_zistepu | spancountminus1 486 fmul st(0),st(1) ; _d_zistepu*scm1 | scm1 487 fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | _d_zistepu*scm1 | scm1 488 fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 489 fxch st(1) ; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 490 faddp st(3),st(0) ; _d_tdivzstepu*scm1 | scm1 491 fxch st(1) ; scm1 | _d_tdivzstepu*scm1 492 fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 493 fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 494 faddp st(3),st(0) ; _d_sdivzstepu*scm1 495 fld ds:dword ptr[fp_64k] ; 64k | _d_sdivzstepu*scm1 496 fxch st(1) ; _d_sdivzstepu*scm1 | 64k 497 faddp st(4),st(0) ; 64k 498 499 fdiv st(0),st(1) ; this is what we've gone to all this trouble to 500 ; overlap 501 jmp LFDIVInFlight2 502 503 align 4 504 LSetupNotLast2: 505 fadd ds:dword ptr[zi8stepu] 506 fxch st(2) 507 fadd ds:dword ptr[sdivz8stepu] 508 fxch st(2) 509 fld ds:dword ptr[tdivz8stepu] 510 faddp st(2),st(0) 511 fld ds:dword ptr[fp_64k] 512 fdiv st(0),st(1) ; z = 1/1/z 513 ; this is what we've gone to all this trouble to 514 ; overlap 515 LFDIVInFlight2: 516 push eax 517 518 cmp bp,ds:word ptr[10+ecx] 519 jl Lp6 520 mov al,ds:byte ptr[esi] 521 cmp al,offset TRANSPARENT_COLOR 522 jz Lp6 523 mov ds:word ptr[10+ecx],bp 524 mov ds:byte ptr[5+edi],al 525 Lp6: 526 add ebp,ds:dword ptr[izistep] 527 adc ebp,0 528 add edx,ds:dword ptr[tstep] 529 sbb eax,eax 530 add ebx,ds:dword ptr[sstep] 531 adc esi,ds:dword ptr[advancetable+4+eax*4] 532 533 cmp bp,ds:word ptr[12+ecx] 534 jl Lp7 535 mov al,ds:byte ptr[esi] 536 cmp al,offset TRANSPARENT_COLOR 537 jz Lp7 538 mov ds:word ptr[12+ecx],bp 539 mov ds:byte ptr[6+edi],al 540 Lp7: 541 add ebp,ds:dword ptr[izistep] 542 adc ebp,0 543 add edx,ds:dword ptr[tstep] 544 sbb eax,eax 545 add ebx,ds:dword ptr[sstep] 546 adc esi,ds:dword ptr[advancetable+4+eax*4] 547 548 cmp bp,ds:word ptr[14+ecx] 549 jl Lp8 550 mov al,ds:byte ptr[esi] 551 cmp al,offset TRANSPARENT_COLOR 552 jz Lp8 553 mov ds:word ptr[14+ecx],bp 554 mov ds:byte ptr[7+edi],al 555 Lp8: 556 add ebp,ds:dword ptr[izistep] 557 adc ebp,0 558 add edx,ds:dword ptr[tstep] 559 sbb eax,eax 560 add ebx,ds:dword ptr[sstep] 561 adc esi,ds:dword ptr[advancetable+4+eax*4] 562 563 add edi,8 564 add ecx,16 565 mov ds:dword ptr[tfracf],edx 566 mov edx,ds:dword ptr[snext] 567 mov ds:dword ptr[sfracf],ebx 568 mov ebx,ds:dword ptr[tnext] 569 mov ds:dword ptr[s],edx 570 mov ds:dword ptr[t],ebx 571 572 mov ds:dword ptr[pz],ecx 573 mov ds:dword ptr[izi],ebp 574 575 pop ecx ; retrieve count 576 577 ; 578 ; determine whether last span or not 579 ; 580 cmp ecx,8 ; are there multiple segments remaining? 581 ja LNotLastSegment ; yes 582 583 ; 584 ; last segment of scan 585 ; 586 LLastSegment: 587 588 ; 589 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 590 ; get there. The number of pixels left is variable, and we want to land on the 591 ; last pixel, not step one past it, so we can't run into arithmetic problems 592 ; 593 test ecx,ecx 594 jz LNoSteps ; just draw the last pixel and we're done 595 596 ; pick up after the FDIV that was left in flight previously 597 598 599 fld st(0) ; duplicate it 600 fmul st(0),st(4) ; s = s/z * z 601 fxch st(1) 602 fmul st(0),st(3) ; t = t/z * z 603 fxch st(1) 604 fistp ds:dword ptr[snext] 605 fistp ds:dword ptr[tnext] 606 607 mov ebx,ds:dword ptr[_tadjust] 608 mov eax,ds:dword ptr[_sadjust] 609 610 add eax,ds:dword ptr[snext] 611 add ebx,ds:dword ptr[tnext] 612 613 mov ebp,ds:dword ptr[_bbextents] 614 mov edx,ds:dword ptr[_bbextentt] 615 616 cmp eax,2048 617 jl LClampLow4 618 cmp eax,ebp 619 ja LClampHigh4 620 LClampReentry4: 621 mov ds:dword ptr[snext],eax 622 623 cmp ebx,2048 624 jl LClampLow5 625 cmp ebx,edx 626 ja LClampHigh5 627 LClampReentry5: 628 629 cmp ecx,1 ; don't bother 630 je LOnlyOneStep ; if two pixels in segment, there's only one step, 631 ; of the segment length 632 sub eax,ds:dword ptr[s] 633 sub ebx,ds:dword ptr[t] 634 635 add eax,eax ; convert to 15.17 format so multiply by 1.31 636 add ebx,ebx ; reciprocal yields 16.48 637 imul ds:dword ptr[reciprocal_table-8+ecx*4] ; sstep = (snext - s) / (spancount-1) 638 mov ebp,edx 639 640 mov eax,ebx 641 imul ds:dword ptr[reciprocal_table-8+ecx*4] ; tstep = (tnext - t) / (spancount-1) 642 643 LSetEntryvec: 644 ; 645 ; set up advancetable 646 ; 647 mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4] 648 mov eax,edx 649 push ebx ; entry point into code for RET later 650 mov ecx,ebp 651 sar ecx,16 ; sstep >>= 16; 652 mov ebx,ds:dword ptr[_cachewidth] 653 sar edx,16 ; tstep >>= 16; 654 jz LIsZeroLast 655 imul edx,ebx ; (tstep >> 16) * cachewidth; 656 LIsZeroLast: 657 add edx,ecx ; add in sstep 658 ; (tstep >> 16) * cachewidth + (sstep >> 16); 659 mov ecx,ds:dword ptr[tfracf] 660 mov ds:dword ptr[advancetable+4],edx ; advance base in t 661 add edx,ebx ; ((tstep >> 16) + 1) * cachewidth + 662 ; (sstep >> 16); 663 shl ebp,16 ; left-justify sstep fractional part 664 mov ebx,ds:dword ptr[sfracf] 665 shl eax,16 ; left-justify tstep fractional part 666 mov ds:dword ptr[advancetable],edx ; advance extra in t 667 668 mov ds:dword ptr[tstep],eax 669 mov ds:dword ptr[sstep],ebp 670 mov edx,ecx 671 672 mov ecx,ds:dword ptr[pz] 673 mov ebp,ds:dword ptr[izi] 674 675 ret ; jump to the number-of-pixels handler 676 677 ;---------------------------------------- 678 679 LNoSteps: 680 mov ecx,ds:dword ptr[pz] 681 sub edi,7 ; adjust for hardwired offset 682 sub ecx,14 683 jmp LEndSpan 684 685 686 LOnlyOneStep: 687 sub eax,ds:dword ptr[s] 688 sub ebx,ds:dword ptr[t] 689 mov ebp,eax 690 mov edx,ebx 691 jmp LSetEntryvec 692 693 ;---------------------------------------- 694 695 public Spr8Entry2_8 696 Spr8Entry2_8: 697 sub edi,6 ; adjust for hardwired offsets 698 sub ecx,12 699 mov al,ds:byte ptr[esi] 700 jmp LLEntry2_8 701 702 ;---------------------------------------- 703 704 public Spr8Entry3_8 705 Spr8Entry3_8: 706 sub edi,5 ; adjust for hardwired offsets 707 sub ecx,10 708 jmp LLEntry3_8 709 710 ;---------------------------------------- 711 712 public Spr8Entry4_8 713 Spr8Entry4_8: 714 sub edi,4 ; adjust for hardwired offsets 715 sub ecx,8 716 jmp LLEntry4_8 717 718 ;---------------------------------------- 719 720 public Spr8Entry5_8 721 Spr8Entry5_8: 722 sub edi,3 ; adjust for hardwired offsets 723 sub ecx,6 724 jmp LLEntry5_8 725 726 ;---------------------------------------- 727 728 public Spr8Entry6_8 729 Spr8Entry6_8: 730 sub edi,2 ; adjust for hardwired offsets 731 sub ecx,4 732 jmp LLEntry6_8 733 734 ;---------------------------------------- 735 736 public Spr8Entry7_8 737 Spr8Entry7_8: 738 dec edi ; adjust for hardwired offsets 739 sub ecx,2 740 jmp LLEntry7_8 741 742 ;---------------------------------------- 743 744 public Spr8Entry8_8 745 Spr8Entry8_8: 746 cmp bp,ds:word ptr[ecx] 747 jl Lp9 748 mov al,ds:byte ptr[esi] 749 cmp al,offset TRANSPARENT_COLOR 750 jz Lp9 751 mov ds:word ptr[ecx],bp 752 mov ds:byte ptr[edi],al 753 Lp9: 754 add ebp,ds:dword ptr[izistep] 755 adc ebp,0 756 add edx,ds:dword ptr[tstep] 757 sbb eax,eax 758 add ebx,ds:dword ptr[sstep] 759 adc esi,ds:dword ptr[advancetable+4+eax*4] 760 LLEntry7_8: 761 cmp bp,ds:word ptr[2+ecx] 762 jl Lp10 763 mov al,ds:byte ptr[esi] 764 cmp al,offset TRANSPARENT_COLOR 765 jz Lp10 766 mov ds:word ptr[2+ecx],bp 767 mov ds:byte ptr[1+edi],al 768 Lp10: 769 add ebp,ds:dword ptr[izistep] 770 adc ebp,0 771 add edx,ds:dword ptr[tstep] 772 sbb eax,eax 773 add ebx,ds:dword ptr[sstep] 774 adc esi,ds:dword ptr[advancetable+4+eax*4] 775 LLEntry6_8: 776 cmp bp,ds:word ptr[4+ecx] 777 jl Lp11 778 mov al,ds:byte ptr[esi] 779 cmp al,offset TRANSPARENT_COLOR 780 jz Lp11 781 mov ds:word ptr[4+ecx],bp 782 mov ds:byte ptr[2+edi],al 783 Lp11: 784 add ebp,ds:dword ptr[izistep] 785 adc ebp,0 786 add edx,ds:dword ptr[tstep] 787 sbb eax,eax 788 add ebx,ds:dword ptr[sstep] 789 adc esi,ds:dword ptr[advancetable+4+eax*4] 790 LLEntry5_8: 791 cmp bp,ds:word ptr[6+ecx] 792 jl Lp12 793 mov al,ds:byte ptr[esi] 794 cmp al,offset TRANSPARENT_COLOR 795 jz Lp12 796 mov ds:word ptr[6+ecx],bp 797 mov ds:byte ptr[3+edi],al 798 Lp12: 799 add ebp,ds:dword ptr[izistep] 800 adc ebp,0 801 add edx,ds:dword ptr[tstep] 802 sbb eax,eax 803 add ebx,ds:dword ptr[sstep] 804 adc esi,ds:dword ptr[advancetable+4+eax*4] 805 LLEntry4_8: 806 cmp bp,ds:word ptr[8+ecx] 807 jl Lp13 808 mov al,ds:byte ptr[esi] 809 cmp al,offset TRANSPARENT_COLOR 810 jz Lp13 811 mov ds:word ptr[8+ecx],bp 812 mov ds:byte ptr[4+edi],al 813 Lp13: 814 add ebp,ds:dword ptr[izistep] 815 adc ebp,0 816 add edx,ds:dword ptr[tstep] 817 sbb eax,eax 818 add ebx,ds:dword ptr[sstep] 819 adc esi,ds:dword ptr[advancetable+4+eax*4] 820 LLEntry3_8: 821 cmp bp,ds:word ptr[10+ecx] 822 jl Lp14 823 mov al,ds:byte ptr[esi] 824 cmp al,offset TRANSPARENT_COLOR 825 jz Lp14 826 mov ds:word ptr[10+ecx],bp 827 mov ds:byte ptr[5+edi],al 828 Lp14: 829 add ebp,ds:dword ptr[izistep] 830 adc ebp,0 831 add edx,ds:dword ptr[tstep] 832 sbb eax,eax 833 add ebx,ds:dword ptr[sstep] 834 adc esi,ds:dword ptr[advancetable+4+eax*4] 835 LLEntry2_8: 836 cmp bp,ds:word ptr[12+ecx] 837 jl Lp15 838 mov al,ds:byte ptr[esi] 839 cmp al,offset TRANSPARENT_COLOR 840 jz Lp15 841 mov ds:word ptr[12+ecx],bp 842 mov ds:byte ptr[6+edi],al 843 Lp15: 844 add ebp,ds:dword ptr[izistep] 845 adc ebp,0 846 add edx,ds:dword ptr[tstep] 847 sbb eax,eax 848 add ebx,ds:dword ptr[sstep] 849 adc esi,ds:dword ptr[advancetable+4+eax*4] 850 851 LEndSpan: 852 cmp bp,ds:word ptr[14+ecx] 853 jl Lp16 854 mov al,ds:byte ptr[esi] ; load first texel in segment 855 cmp al,offset TRANSPARENT_COLOR 856 jz Lp16 857 mov ds:word ptr[14+ecx],bp 858 mov ds:byte ptr[7+edi],al 859 Lp16: 860 861 ; 862 ; clear s/z, t/z, 1/z from FP stack 863 ; 864 fstp st(0) 865 fstp st(0) 866 fstp st(0) 867 868 pop ebx ; restore spans pointer 869 LNextSpan: 870 add ebx,offset sspan_t_size ; point to next span 871 mov ecx,ds:dword ptr[sspan_t_count+ebx] 872 cmp ecx,0 ; any more spans? 873 jg LSpanLoop ; yes 874 jz LNextSpan ; yes, but this one's empty 875 876 pop ebx ; restore register variables 877 pop esi 878 pop edi 879 pop ebp ; restore the caller's stack frame 880 ret 881 882 _TEXT ENDS 883 endif ; id386 884 END