Quake-2

Quake 2 GPL Source Release
Log | Files | Refs

r_spr8.asm (22105B)


      1  .386P
      2  .model FLAT
      3 ;
      4 ; d_spr8.s
      5 ; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
      6 ;
      7 
      8 include qasm.inc
      9 include d_if.inc
     10 
     11 if id386
     12 
     13 ;----------------------------------------------------------------------
     14 ; 8-bpp horizontal span drawing code for polygons, with transparency.
     15 ;----------------------------------------------------------------------
     16 
     17 _TEXT SEGMENT	
     18 
     19 ; out-of-line, rarely-needed clamping code
     20 
     21 LClampHigh0:	
     22  mov esi,ds:dword ptr[_bbextents]	
     23  jmp LClampReentry0	
     24 LClampHighOrLow0:	
     25  jg LClampHigh0	
     26  xor esi,esi	
     27  jmp LClampReentry0	
     28 
     29 LClampHigh1:	
     30  mov edx,ds:dword ptr[_bbextentt]	
     31  jmp LClampReentry1	
     32 LClampHighOrLow1:	
     33  jg LClampHigh1	
     34  xor edx,edx	
     35  jmp LClampReentry1	
     36 
     37 LClampLow2:	
     38  mov ebp,2048	
     39  jmp LClampReentry2	
     40 LClampHigh2:	
     41  mov ebp,ds:dword ptr[_bbextents]	
     42  jmp LClampReentry2	
     43 
     44 LClampLow3:	
     45  mov ecx,2048	
     46  jmp LClampReentry3	
     47 LClampHigh3:	
     48  mov ecx,ds:dword ptr[_bbextentt]	
     49  jmp LClampReentry3	
     50 
     51 LClampLow4:	
     52  mov eax,2048	
     53  jmp LClampReentry4	
     54 LClampHigh4:	
     55  mov eax,ds:dword ptr[_bbextents]	
     56  jmp LClampReentry4	
     57 
     58 LClampLow5:	
     59  mov ebx,2048	
     60  jmp LClampReentry5	
     61 LClampHigh5:	
     62  mov ebx,ds:dword ptr[_bbextentt]	
     63  jmp LClampReentry5	
     64 
     65 
     66 pspans	equ		4+16
     67 
     68  align 4	
     69  public _D_SpriteDrawSpansXXX
     70 _D_SpriteDrawSpansXXX:	
     71  push ebp	; preserve caller's stack frame
     72  push edi	
     73  push esi	; preserve register variables
     74  push ebx	
     75 
     76 ;
     77 ; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
     78 ; and span list pointers, and 1/z step in 0.32 fixed-point
     79 ;
     80 ; FIXME: any overlap from rearranging?
     81  fld ds:dword ptr[_d_sdivzstepu]	
     82  fmul ds:dword ptr[fp_8]	
     83  mov edx,ds:dword ptr[_cacheblock]	
     84  fld ds:dword ptr[_d_tdivzstepu]	
     85  fmul ds:dword ptr[fp_8]	
     86  mov ebx,ds:dword ptr[pspans+esp]	; point to the first span descriptor
     87  fld ds:dword ptr[_d_zistepu]	
     88  fmul ds:dword ptr[fp_8]	
     89  mov ds:dword ptr[pbase],edx	; pbase = cacheblock
     90  fld ds:dword ptr[_d_zistepu]	
     91  fmul ds:dword ptr[fp_64kx64k]	
     92  fxch st(3)	
     93  fstp ds:dword ptr[sdivz8stepu]	
     94  fstp ds:dword ptr[zi8stepu]	
     95  fstp ds:dword ptr[tdivz8stepu]	
     96  fistp ds:dword ptr[izistep]	
     97  mov eax,ds:dword ptr[izistep]	
     98  ror eax,16	; put upper 16 bits in low word
     99  mov ecx,ds:dword ptr[sspan_t_count+ebx]	
    100  mov ds:dword ptr[izistep],eax	
    101 
    102  cmp ecx,0	
    103  jle LNextSpan	
    104 
    105 LSpanLoop:	
    106 
    107 ;
    108 ; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
    109 ; initial s and t values
    110 ;
    111 ; FIXME: pipeline FILD?
    112  fild ds:dword ptr[sspan_t_v+ebx]	
    113  fild ds:dword ptr[sspan_t_u+ebx]	
    114 
    115  fld st(1)	; dv | du | dv
    116  fmul ds:dword ptr[_d_sdivzstepv]	; dv*d_sdivzstepv | du | dv
    117  fld st(1)	; du | dv*d_sdivzstepv | du | dv
    118  fmul ds:dword ptr[_d_sdivzstepu]	; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    119  fld st(2)	; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    120  fmul ds:dword ptr[_d_tdivzstepu]	; du*d_tdivzstepu | du*d_sdivzstepu |
    121 ;  dv*d_sdivzstepv | du | dv
    122  fxch st(1)	; du*d_sdivzstepu | du*d_tdivzstepu |
    123 ;  dv*d_sdivzstepv | du | dv
    124  faddp st(2),st(0)	; du*d_tdivzstepu |
    125 ;  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
    126  fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
    127 ;  du*d_tdivzstepu | du | dv
    128  fld st(3)	; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
    129 ;  du*d_tdivzstepu | du | dv
    130  fmul ds:dword ptr[_d_tdivzstepv]	; dv*d_tdivzstepv |
    131 ;  du*d_sdivzstepu + dv*d_sdivzstepv |
    132 ;  du*d_tdivzstepu | du | dv
    133  fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
    134 ;  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
    135  fadd ds:dword ptr[_d_sdivzorigin]	; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
    136 ;  du*d_sdivzstepu; stays in %st(2) at end
    137  fxch st(4)	; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
    138 ;  s/z
    139  fmul ds:dword ptr[_d_zistepv]	; dv*d_zistepv | dv*d_tdivzstepv |
    140 ;  du*d_tdivzstepu | du | s/z
    141  fxch st(1)	; dv*d_tdivzstepv |  dv*d_zistepv |
    142 ;  du*d_tdivzstepu | du | s/z
    143  faddp st(2),st(0)	; dv*d_zistepv |
    144 ;  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
    145  fxch st(2)	; du | dv*d_tdivzstepv + du*d_tdivzstepu |
    146 ;  dv*d_zistepv | s/z
    147  fmul ds:dword ptr[_d_zistepu]	; du*d_zistepu |
    148 ;  dv*d_tdivzstepv + du*d_tdivzstepu |
    149 ;  dv*d_zistepv | s/z
    150  fxch st(1)	; dv*d_tdivzstepv + du*d_tdivzstepu |
    151 ;  du*d_zistepu | dv*d_zistepv | s/z
    152  fadd ds:dword ptr[_d_tdivzorigin]	; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
    153 ;  du*d_tdivzstepu; stays in %st(1) at end
    154  fxch st(2)	; dv*d_zistepv | du*d_zistepu | t/z | s/z
    155  faddp st(1),st(0)	; dv*d_zistepv + du*d_zistepu | t/z | s/z
    156 
    157  fld ds:dword ptr[fp_64k]	; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
    158  fxch st(1)	; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
    159  fadd ds:dword ptr[_d_ziorigin]	; zi = d_ziorigin + dv*d_zistepv +
    160 ;  du*d_zistepu; stays in %st(0) at end
    161 ; 1/z | fp_64k | t/z | s/z
    162 
    163  fld st(0)	; FIXME: get rid of stall on FMUL?
    164  fmul ds:dword ptr[fp_64kx64k]	
    165  fxch st(1)	
    166 
    167 ;
    168 ; calculate and clamp s & t
    169 ;
    170  fdiv st(2),st(0)	; 1/z | z*64k | t/z | s/z
    171  fxch st(1)	
    172 
    173  fistp ds:dword ptr[izi]	; 0.32 fixed-point 1/z
    174  mov ebp,ds:dword ptr[izi]	
    175 
    176 ;
    177 ; set pz to point to the first z-buffer pixel in the span
    178 ;
    179  ror ebp,16	; put upper 16 bits in low word
    180  mov eax,ds:dword ptr[sspan_t_v+ebx]	
    181  mov ds:dword ptr[izi],ebp	
    182  mov ebp,ds:dword ptr[sspan_t_u+ebx]	
    183  imul ds:dword ptr[_d_zrowbytes]	
    184  shl ebp,1	; a word per pixel
    185  add eax,ds:dword ptr[_d_pzbuffer]	
    186  add eax,ebp	
    187  mov ds:dword ptr[pz],eax	
    188 
    189 ;
    190 ; point %edi to the first pixel in the span
    191 ;
    192  mov ebp,ds:dword ptr[_d_viewbuffer]	
    193  mov eax,ds:dword ptr[sspan_t_v+ebx]	
    194  push ebx	; preserve spans pointer
    195  mov edx,ds:dword ptr[_tadjust]	
    196  mov esi,ds:dword ptr[_sadjust]	
    197  mov edi,ds:dword ptr[_d_scantable+eax*4]	; v * screenwidth
    198  add edi,ebp	
    199  mov ebp,ds:dword ptr[sspan_t_u+ebx]	
    200  add edi,ebp	; pdest = &pdestspan[scans->u];
    201 
    202 ;
    203 ; now start the FDIV for the end of the span
    204 ;
    205  cmp ecx,8	
    206  ja LSetupNotLast1	
    207 
    208  dec ecx	
    209  jz LCleanup1	; if only one pixel, no need to start an FDIV
    210  mov ds:dword ptr[spancountminus1],ecx	
    211 
    212 ; finish up the s and t calcs
    213  fxch st(1)	; z*64k | 1/z | t/z | s/z
    214 
    215  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    216  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    217  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    218  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    219  fxch st(1)	; s | t | 1/z | t/z | s/z
    220  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    221  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    222 
    223  fild ds:dword ptr[spancountminus1]	
    224 
    225  fld ds:dword ptr[_d_tdivzstepu]	; _d_tdivzstepu | spancountminus1
    226  fld ds:dword ptr[_d_zistepu]	; _d_zistepu | _d_tdivzstepu | spancountminus1
    227  fmul st(0),st(2)	; _d_zistepu*scm1 | _d_tdivzstepu | scm1
    228  fxch st(1)	; _d_tdivzstepu | _d_zistepu*scm1 | scm1
    229  fmul st(0),st(2)	; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
    230  fxch st(2)	; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
    231  fmul ds:dword ptr[_d_sdivzstepu]	; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
    232 ;  _d_tdivzstepu*scm1
    233  fxch st(1)	; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
    234 ;  _d_tdivzstepu*scm1
    235  faddp st(3),st(0)	; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
    236  fxch st(1)	; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
    237  faddp st(3),st(0)	; _d_sdivzstepu*scm1
    238  faddp st(3),st(0)	
    239 
    240  fld ds:dword ptr[fp_64k]	
    241  fdiv st(0),st(1)	; this is what we've gone to all this trouble to
    242 ;  overlap
    243  jmp LFDIVInFlight1	
    244 
    245 LCleanup1:	
    246 ; finish up the s and t calcs
    247  fxch st(1)	; z*64k | 1/z | t/z | s/z
    248 
    249  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    250  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    251  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    252  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    253  fxch st(1)	; s | t | 1/z | t/z | s/z
    254  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    255  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    256  jmp LFDIVInFlight1	
    257 
    258  align 4	
    259 LSetupNotLast1:	
    260 ; finish up the s and t calcs
    261  fxch st(1)	; z*64k | 1/z | t/z | s/z
    262 
    263  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    264  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    265  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    266  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    267  fxch st(1)	; s | t | 1/z | t/z | s/z
    268  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    269  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    270 
    271  fadd ds:dword ptr[zi8stepu]	
    272  fxch st(2)	
    273  fadd ds:dword ptr[sdivz8stepu]	
    274  fxch st(2)	
    275  fld ds:dword ptr[tdivz8stepu]	
    276  faddp st(2),st(0)	
    277  fld ds:dword ptr[fp_64k]	
    278  fdiv st(0),st(1)	; z = 1/1/z
    279 ; this is what we've gone to all this trouble to
    280 ;  overlap
    281 LFDIVInFlight1:	
    282 
    283  add esi,ds:dword ptr[s]	
    284  add edx,ds:dword ptr[t]	
    285  mov ebx,ds:dword ptr[_bbextents]	
    286  mov ebp,ds:dword ptr[_bbextentt]	
    287  cmp esi,ebx	
    288  ja LClampHighOrLow0	
    289 LClampReentry0:	
    290  mov ds:dword ptr[s],esi	
    291  mov ebx,ds:dword ptr[pbase]	
    292  shl esi,16	
    293  cmp edx,ebp	
    294  mov ds:dword ptr[sfracf],esi	
    295  ja LClampHighOrLow1	
    296 LClampReentry1:	
    297  mov ds:dword ptr[t],edx	
    298  mov esi,ds:dword ptr[s]	; sfrac = scans->sfrac;
    299  shl edx,16	
    300  mov eax,ds:dword ptr[t]	; tfrac = scans->tfrac;
    301  sar esi,16	
    302  mov ds:dword ptr[tfracf],edx	
    303 
    304 ;
    305 ; calculate the texture starting address
    306 ;
    307  sar eax,16	
    308  add esi,ebx	
    309  imul eax,ds:dword ptr[_cachewidth]	; (tfrac >> 16) * cachewidth
    310  add esi,eax	; psource = pbase + (sfrac >> 16) +
    311 ;           ((tfrac >> 16) * cachewidth);
    312 
    313 ;
    314 ; determine whether last span or not
    315 ;
    316  cmp ecx,8	
    317  jna LLastSegment	
    318 
    319 ;
    320 ; not the last segment; do full 8-wide segment
    321 ;
    322 LNotLastSegment:	
    323 
    324 ;
    325 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    326 ; get there
    327 ;
    328 
    329 ; pick up after the FDIV that was left in flight previously
    330 
    331  fld st(0)	; duplicate it
    332  fmul st(0),st(4)	; s = s/z * z
    333  fxch st(1)	
    334  fmul st(0),st(3)	; t = t/z * z
    335  fxch st(1)	
    336  fistp ds:dword ptr[snext]	
    337  fistp ds:dword ptr[tnext]	
    338  mov eax,ds:dword ptr[snext]	
    339  mov edx,ds:dword ptr[tnext]	
    340 
    341  sub ecx,8	; count off this segments' pixels
    342  mov ebp,ds:dword ptr[_sadjust]	
    343  push ecx	; remember count of remaining pixels
    344  mov ecx,ds:dword ptr[_tadjust]	
    345 
    346  add ebp,eax	
    347  add ecx,edx	
    348 
    349  mov eax,ds:dword ptr[_bbextents]	
    350  mov edx,ds:dword ptr[_bbextentt]	
    351 
    352  cmp ebp,2048	
    353  jl LClampLow2	
    354  cmp ebp,eax	
    355  ja LClampHigh2	
    356 LClampReentry2:	
    357 
    358  cmp ecx,2048	
    359  jl LClampLow3	
    360  cmp ecx,edx	
    361  ja LClampHigh3	
    362 LClampReentry3:	
    363 
    364  mov ds:dword ptr[snext],ebp	
    365  mov ds:dword ptr[tnext],ecx	
    366 
    367  sub ebp,ds:dword ptr[s]	
    368  sub ecx,ds:dword ptr[t]	
    369 
    370 ;
    371 ; set up advancetable
    372 ;
    373  mov eax,ecx	
    374  mov edx,ebp	
    375  sar edx,19	; sstep >>= 16;
    376  mov ebx,ds:dword ptr[_cachewidth]	
    377  sar eax,19	; tstep >>= 16;
    378  jz LIsZero	
    379  imul eax,ebx	; (tstep >> 16) * cachewidth;
    380 LIsZero:	
    381  add eax,edx	; add in sstep
    382 ; (tstep >> 16) * cachewidth + (sstep >> 16);
    383  mov edx,ds:dword ptr[tfracf]	
    384  mov ds:dword ptr[advancetable+4],eax	; advance base in t
    385  add eax,ebx	; ((tstep >> 16) + 1) * cachewidth +
    386 ;  (sstep >> 16);
    387  shl ebp,13	; left-justify sstep fractional part
    388  mov ds:dword ptr[sstep],ebp	
    389  mov ebx,ds:dword ptr[sfracf]	
    390  shl ecx,13	; left-justify tstep fractional part
    391  mov ds:dword ptr[advancetable],eax	; advance extra in t
    392  mov ds:dword ptr[tstep],ecx	
    393 
    394  mov ecx,ds:dword ptr[pz]	
    395  mov ebp,ds:dword ptr[izi]	
    396 
    397  cmp bp,ds:word ptr[ecx]	
    398  jl Lp1	
    399  mov al,ds:byte ptr[esi]	; get first source texel
    400  cmp al,offset TRANSPARENT_COLOR	
    401  jz Lp1	
    402  mov ds:word ptr[ecx],bp	
    403  mov ds:byte ptr[edi],al	; store first dest pixel
    404 Lp1:	
    405  add ebp,ds:dword ptr[izistep]	
    406  adc ebp,0	
    407  add edx,ds:dword ptr[tstep]	; advance tfrac fractional part by tstep frac
    408 
    409  sbb eax,eax	; turn tstep carry into -1 (0 if none)
    410  add ebx,ds:dword ptr[sstep]	; advance sfrac fractional part by sstep frac
    411  adc esi,ds:dword ptr[advancetable+4+eax*4]	; point to next source texel
    412 
    413  cmp bp,ds:word ptr[2+ecx]	
    414  jl Lp2	
    415  mov al,ds:byte ptr[esi]	
    416  cmp al,offset TRANSPARENT_COLOR	
    417  jz Lp2	
    418  mov ds:word ptr[2+ecx],bp	
    419  mov ds:byte ptr[1+edi],al	
    420 Lp2:	
    421  add ebp,ds:dword ptr[izistep]	
    422  adc ebp,0	
    423  add edx,ds:dword ptr[tstep]	
    424  sbb eax,eax	
    425  add ebx,ds:dword ptr[sstep]	
    426  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    427 
    428  cmp bp,ds:word ptr[4+ecx]	
    429  jl Lp3	
    430  mov al,ds:byte ptr[esi]	
    431  cmp al,offset TRANSPARENT_COLOR	
    432  jz Lp3	
    433  mov ds:word ptr[4+ecx],bp	
    434  mov ds:byte ptr[2+edi],al	
    435 Lp3:	
    436  add ebp,ds:dword ptr[izistep]	
    437  adc ebp,0	
    438  add edx,ds:dword ptr[tstep]	
    439  sbb eax,eax	
    440  add ebx,ds:dword ptr[sstep]	
    441  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    442 
    443  cmp bp,ds:word ptr[6+ecx]	
    444  jl Lp4	
    445  mov al,ds:byte ptr[esi]	
    446  cmp al,offset TRANSPARENT_COLOR	
    447  jz Lp4	
    448  mov ds:word ptr[6+ecx],bp	
    449  mov ds:byte ptr[3+edi],al	
    450 Lp4:	
    451  add ebp,ds:dword ptr[izistep]	
    452  adc ebp,0	
    453  add edx,ds:dword ptr[tstep]	
    454  sbb eax,eax	
    455  add ebx,ds:dword ptr[sstep]	
    456  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    457 
    458  cmp bp,ds:word ptr[8+ecx]	
    459  jl Lp5	
    460  mov al,ds:byte ptr[esi]	
    461  cmp al,offset TRANSPARENT_COLOR	
    462  jz Lp5	
    463  mov ds:word ptr[8+ecx],bp	
    464  mov ds:byte ptr[4+edi],al	
    465 Lp5:	
    466  add ebp,ds:dword ptr[izistep]	
    467  adc ebp,0	
    468  add edx,ds:dword ptr[tstep]	
    469  sbb eax,eax	
    470  add ebx,ds:dword ptr[sstep]	
    471  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    472 
    473 ;
    474 ; start FDIV for end of next segment in flight, so it can overlap
    475 ;
    476  pop eax	
    477  cmp eax,8	; more than one segment after this?
    478  ja LSetupNotLast2	; yes
    479 
    480  dec eax	
    481  jz LFDIVInFlight2	; if only one pixel, no need to start an FDIV
    482  mov ds:dword ptr[spancountminus1],eax	
    483  fild ds:dword ptr[spancountminus1]	
    484 
    485  fld ds:dword ptr[_d_zistepu]	; _d_zistepu | spancountminus1
    486  fmul st(0),st(1)	; _d_zistepu*scm1 | scm1
    487  fld ds:dword ptr[_d_tdivzstepu]	; _d_tdivzstepu | _d_zistepu*scm1 | scm1
    488  fmul st(0),st(2)	; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
    489  fxch st(1)	; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
    490  faddp st(3),st(0)	; _d_tdivzstepu*scm1 | scm1
    491  fxch st(1)	; scm1 | _d_tdivzstepu*scm1
    492  fmul ds:dword ptr[_d_sdivzstepu]	; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
    493  fxch st(1)	; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
    494  faddp st(3),st(0)	; _d_sdivzstepu*scm1
    495  fld ds:dword ptr[fp_64k]	; 64k | _d_sdivzstepu*scm1
    496  fxch st(1)	; _d_sdivzstepu*scm1 | 64k
    497  faddp st(4),st(0)	; 64k
    498 
    499  fdiv st(0),st(1)	; this is what we've gone to all this trouble to
    500 ;  overlap
    501  jmp LFDIVInFlight2	
    502 
    503  align 4	
    504 LSetupNotLast2:	
    505  fadd ds:dword ptr[zi8stepu]	
    506  fxch st(2)	
    507  fadd ds:dword ptr[sdivz8stepu]	
    508  fxch st(2)	
    509  fld ds:dword ptr[tdivz8stepu]	
    510  faddp st(2),st(0)	
    511  fld ds:dword ptr[fp_64k]	
    512  fdiv st(0),st(1)	; z = 1/1/z
    513 ; this is what we've gone to all this trouble to
    514 ;  overlap
    515 LFDIVInFlight2:	
    516  push eax	
    517 
    518  cmp bp,ds:word ptr[10+ecx]	
    519  jl Lp6	
    520  mov al,ds:byte ptr[esi]	
    521  cmp al,offset TRANSPARENT_COLOR	
    522  jz Lp6	
    523  mov ds:word ptr[10+ecx],bp	
    524  mov ds:byte ptr[5+edi],al	
    525 Lp6:	
    526  add ebp,ds:dword ptr[izistep]	
    527  adc ebp,0	
    528  add edx,ds:dword ptr[tstep]	
    529  sbb eax,eax	
    530  add ebx,ds:dword ptr[sstep]	
    531  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    532 
    533  cmp bp,ds:word ptr[12+ecx]	
    534  jl Lp7	
    535  mov al,ds:byte ptr[esi]	
    536  cmp al,offset TRANSPARENT_COLOR	
    537  jz Lp7	
    538  mov ds:word ptr[12+ecx],bp	
    539  mov ds:byte ptr[6+edi],al	
    540 Lp7:	
    541  add ebp,ds:dword ptr[izistep]	
    542  adc ebp,0	
    543  add edx,ds:dword ptr[tstep]	
    544  sbb eax,eax	
    545  add ebx,ds:dword ptr[sstep]	
    546  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    547 
    548  cmp bp,ds:word ptr[14+ecx]	
    549  jl Lp8	
    550  mov al,ds:byte ptr[esi]	
    551  cmp al,offset TRANSPARENT_COLOR	
    552  jz Lp8	
    553  mov ds:word ptr[14+ecx],bp	
    554  mov ds:byte ptr[7+edi],al	
    555 Lp8:	
    556  add ebp,ds:dword ptr[izistep]	
    557  adc ebp,0	
    558  add edx,ds:dword ptr[tstep]	
    559  sbb eax,eax	
    560  add ebx,ds:dword ptr[sstep]	
    561  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    562 
    563  add edi,8	
    564  add ecx,16	
    565  mov ds:dword ptr[tfracf],edx	
    566  mov edx,ds:dword ptr[snext]	
    567  mov ds:dword ptr[sfracf],ebx	
    568  mov ebx,ds:dword ptr[tnext]	
    569  mov ds:dword ptr[s],edx	
    570  mov ds:dword ptr[t],ebx	
    571 
    572  mov ds:dword ptr[pz],ecx	
    573  mov ds:dword ptr[izi],ebp	
    574 
    575  pop ecx	; retrieve count
    576 
    577 ;
    578 ; determine whether last span or not
    579 ;
    580  cmp ecx,8	; are there multiple segments remaining?
    581  ja LNotLastSegment	; yes
    582 
    583 ;
    584 ; last segment of scan
    585 ;
    586 LLastSegment:	
    587 
    588 ;
    589 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    590 ; get there. The number of pixels left is variable, and we want to land on the
    591 ; last pixel, not step one past it, so we can't run into arithmetic problems
    592 ;
    593  test ecx,ecx	
    594  jz LNoSteps	; just draw the last pixel and we're done
    595 
    596 ; pick up after the FDIV that was left in flight previously
    597 
    598 
    599  fld st(0)	; duplicate it
    600  fmul st(0),st(4)	; s = s/z * z
    601  fxch st(1)	
    602  fmul st(0),st(3)	; t = t/z * z
    603  fxch st(1)	
    604  fistp ds:dword ptr[snext]	
    605  fistp ds:dword ptr[tnext]	
    606 
    607  mov ebx,ds:dword ptr[_tadjust]	
    608  mov eax,ds:dword ptr[_sadjust]	
    609 
    610  add eax,ds:dword ptr[snext]	
    611  add ebx,ds:dword ptr[tnext]	
    612 
    613  mov ebp,ds:dword ptr[_bbextents]	
    614  mov edx,ds:dword ptr[_bbextentt]	
    615 
    616  cmp eax,2048	
    617  jl LClampLow4	
    618  cmp eax,ebp	
    619  ja LClampHigh4	
    620 LClampReentry4:	
    621  mov ds:dword ptr[snext],eax	
    622 
    623  cmp ebx,2048	
    624  jl LClampLow5	
    625  cmp ebx,edx	
    626  ja LClampHigh5	
    627 LClampReentry5:	
    628 
    629  cmp ecx,1	; don't bother 
    630  je LOnlyOneStep	; if two pixels in segment, there's only one step,
    631 ;  of the segment length
    632  sub eax,ds:dword ptr[s]	
    633  sub ebx,ds:dword ptr[t]	
    634 
    635  add eax,eax	; convert to 15.17 format so multiply by 1.31
    636  add ebx,ebx	;  reciprocal yields 16.48
    637  imul ds:dword ptr[reciprocal_table-8+ecx*4]	; sstep = (snext - s) / (spancount-1)
    638  mov ebp,edx	
    639 
    640  mov eax,ebx	
    641  imul ds:dword ptr[reciprocal_table-8+ecx*4]	; tstep = (tnext - t) / (spancount-1)
    642 
    643 LSetEntryvec:	
    644 ;
    645 ; set up advancetable
    646 ;
    647  mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4]	
    648  mov eax,edx	
    649  push ebx	; entry point into code for RET later
    650  mov ecx,ebp	
    651  sar ecx,16	; sstep >>= 16;
    652  mov ebx,ds:dword ptr[_cachewidth]	
    653  sar edx,16	; tstep >>= 16;
    654  jz LIsZeroLast	
    655  imul edx,ebx	; (tstep >> 16) * cachewidth;
    656 LIsZeroLast:	
    657  add edx,ecx	; add in sstep
    658 ; (tstep >> 16) * cachewidth + (sstep >> 16);
    659  mov ecx,ds:dword ptr[tfracf]	
    660  mov ds:dword ptr[advancetable+4],edx	; advance base in t
    661  add edx,ebx	; ((tstep >> 16) + 1) * cachewidth +
    662 ;  (sstep >> 16);
    663  shl ebp,16	; left-justify sstep fractional part
    664  mov ebx,ds:dword ptr[sfracf]	
    665  shl eax,16	; left-justify tstep fractional part
    666  mov ds:dword ptr[advancetable],edx	; advance extra in t
    667 
    668  mov ds:dword ptr[tstep],eax	
    669  mov ds:dword ptr[sstep],ebp	
    670  mov edx,ecx	
    671 
    672  mov ecx,ds:dword ptr[pz]	
    673  mov ebp,ds:dword ptr[izi]	
    674 
    675  ret	; jump to the number-of-pixels handler
    676 
    677 ;----------------------------------------
    678 
    679 LNoSteps:	
    680  mov ecx,ds:dword ptr[pz]	
    681  sub edi,7	; adjust for hardwired offset
    682  sub ecx,14	
    683  jmp LEndSpan	
    684 
    685 
    686 LOnlyOneStep:	
    687  sub eax,ds:dword ptr[s]	
    688  sub ebx,ds:dword ptr[t]	
    689  mov ebp,eax	
    690  mov edx,ebx	
    691  jmp LSetEntryvec	
    692 
    693 ;----------------------------------------
    694 
    695  public Spr8Entry2_8	
    696 Spr8Entry2_8:	
    697  sub edi,6	; adjust for hardwired offsets
    698  sub ecx,12	
    699  mov al,ds:byte ptr[esi]	
    700  jmp LLEntry2_8	
    701 
    702 ;----------------------------------------
    703 
    704  public Spr8Entry3_8	
    705 Spr8Entry3_8:	
    706  sub edi,5	; adjust for hardwired offsets
    707  sub ecx,10	
    708  jmp LLEntry3_8	
    709 
    710 ;----------------------------------------
    711 
    712  public Spr8Entry4_8	
    713 Spr8Entry4_8:	
    714  sub edi,4	; adjust for hardwired offsets
    715  sub ecx,8	
    716  jmp LLEntry4_8	
    717 
    718 ;----------------------------------------
    719 
    720  public Spr8Entry5_8	
    721 Spr8Entry5_8:	
    722  sub edi,3	; adjust for hardwired offsets
    723  sub ecx,6	
    724  jmp LLEntry5_8	
    725 
    726 ;----------------------------------------
    727 
    728  public Spr8Entry6_8	
    729 Spr8Entry6_8:	
    730  sub edi,2	; adjust for hardwired offsets
    731  sub ecx,4	
    732  jmp LLEntry6_8	
    733 
    734 ;----------------------------------------
    735 
    736  public Spr8Entry7_8	
    737 Spr8Entry7_8:	
    738  dec edi	; adjust for hardwired offsets
    739  sub ecx,2	
    740  jmp LLEntry7_8	
    741 
    742 ;----------------------------------------
    743 
    744  public Spr8Entry8_8	
    745 Spr8Entry8_8:	
    746  cmp bp,ds:word ptr[ecx]	
    747  jl Lp9	
    748  mov al,ds:byte ptr[esi]	
    749  cmp al,offset TRANSPARENT_COLOR	
    750  jz Lp9	
    751  mov ds:word ptr[ecx],bp	
    752  mov ds:byte ptr[edi],al	
    753 Lp9:	
    754  add ebp,ds:dword ptr[izistep]	
    755  adc ebp,0	
    756  add edx,ds:dword ptr[tstep]	
    757  sbb eax,eax	
    758  add ebx,ds:dword ptr[sstep]	
    759  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    760 LLEntry7_8:	
    761  cmp bp,ds:word ptr[2+ecx]	
    762  jl Lp10	
    763  mov al,ds:byte ptr[esi]	
    764  cmp al,offset TRANSPARENT_COLOR	
    765  jz Lp10	
    766  mov ds:word ptr[2+ecx],bp	
    767  mov ds:byte ptr[1+edi],al	
    768 Lp10:	
    769  add ebp,ds:dword ptr[izistep]	
    770  adc ebp,0	
    771  add edx,ds:dword ptr[tstep]	
    772  sbb eax,eax	
    773  add ebx,ds:dword ptr[sstep]	
    774  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    775 LLEntry6_8:	
    776  cmp bp,ds:word ptr[4+ecx]	
    777  jl Lp11	
    778  mov al,ds:byte ptr[esi]	
    779  cmp al,offset TRANSPARENT_COLOR	
    780  jz Lp11	
    781  mov ds:word ptr[4+ecx],bp	
    782  mov ds:byte ptr[2+edi],al	
    783 Lp11:	
    784  add ebp,ds:dword ptr[izistep]	
    785  adc ebp,0	
    786  add edx,ds:dword ptr[tstep]	
    787  sbb eax,eax	
    788  add ebx,ds:dword ptr[sstep]	
    789  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    790 LLEntry5_8:	
    791  cmp bp,ds:word ptr[6+ecx]	
    792  jl Lp12	
    793  mov al,ds:byte ptr[esi]	
    794  cmp al,offset TRANSPARENT_COLOR	
    795  jz Lp12	
    796  mov ds:word ptr[6+ecx],bp	
    797  mov ds:byte ptr[3+edi],al	
    798 Lp12:	
    799  add ebp,ds:dword ptr[izistep]	
    800  adc ebp,0	
    801  add edx,ds:dword ptr[tstep]	
    802  sbb eax,eax	
    803  add ebx,ds:dword ptr[sstep]	
    804  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    805 LLEntry4_8:	
    806  cmp bp,ds:word ptr[8+ecx]	
    807  jl Lp13	
    808  mov al,ds:byte ptr[esi]	
    809  cmp al,offset TRANSPARENT_COLOR	
    810  jz Lp13	
    811  mov ds:word ptr[8+ecx],bp	
    812  mov ds:byte ptr[4+edi],al	
    813 Lp13:	
    814  add ebp,ds:dword ptr[izistep]	
    815  adc ebp,0	
    816  add edx,ds:dword ptr[tstep]	
    817  sbb eax,eax	
    818  add ebx,ds:dword ptr[sstep]	
    819  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    820 LLEntry3_8:	
    821  cmp bp,ds:word ptr[10+ecx]	
    822  jl Lp14	
    823  mov al,ds:byte ptr[esi]	
    824  cmp al,offset TRANSPARENT_COLOR	
    825  jz Lp14	
    826  mov ds:word ptr[10+ecx],bp	
    827  mov ds:byte ptr[5+edi],al	
    828 Lp14:	
    829  add ebp,ds:dword ptr[izistep]	
    830  adc ebp,0	
    831  add edx,ds:dword ptr[tstep]	
    832  sbb eax,eax	
    833  add ebx,ds:dword ptr[sstep]	
    834  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    835 LLEntry2_8:	
    836  cmp bp,ds:word ptr[12+ecx]	
    837  jl Lp15	
    838  mov al,ds:byte ptr[esi]	
    839  cmp al,offset TRANSPARENT_COLOR	
    840  jz Lp15	
    841  mov ds:word ptr[12+ecx],bp	
    842  mov ds:byte ptr[6+edi],al	
    843 Lp15:	
    844  add ebp,ds:dword ptr[izistep]	
    845  adc ebp,0	
    846  add edx,ds:dword ptr[tstep]	
    847  sbb eax,eax	
    848  add ebx,ds:dword ptr[sstep]	
    849  adc esi,ds:dword ptr[advancetable+4+eax*4]	
    850 
    851 LEndSpan:	
    852  cmp bp,ds:word ptr[14+ecx]	
    853  jl Lp16	
    854  mov al,ds:byte ptr[esi]	; load first texel in segment
    855  cmp al,offset TRANSPARENT_COLOR	
    856  jz Lp16	
    857  mov ds:word ptr[14+ecx],bp	
    858  mov ds:byte ptr[7+edi],al	
    859 Lp16:	
    860 
    861 ;
    862 ; clear s/z, t/z, 1/z from FP stack
    863 ;
    864  fstp st(0)	
    865  fstp st(0)	
    866  fstp st(0)	
    867 
    868  pop ebx	; restore spans pointer
    869 LNextSpan:	
    870  add ebx,offset sspan_t_size	; point to next span
    871  mov ecx,ds:dword ptr[sspan_t_count+ebx]	
    872  cmp ecx,0	; any more spans?
    873  jg LSpanLoop	; yes
    874  jz LNextSpan	; yes, but this one's empty
    875 
    876  pop ebx	; restore register variables
    877  pop esi	
    878  pop edi	
    879  pop ebp	; restore the caller's stack frame
    880  ret	
    881 
    882 _TEXT ENDS
    883 endif	; id386
    884  END