Quake-2

Quake 2 GPL Source Release
Log | Files | Refs

r_draw16.asm (30058B)


      1  .386P
      2  .model FLAT
      3 ;
      4 ; d_draw16.s
      5 ; x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
      6 ; subdivision.
      7 ;
      8 
      9 include qasm.inc
     10 include d_if.inc
     11 
     12 if	id386
     13 
     14 ;----------------------------------------------------------------------
     15 ; 8-bpp horizontal span drawing code for polygons, with no transparency and
     16 ; 16-pixel subdivision.
     17 ;
     18 ; Assumes there is at least one span in pspans, and that every span
     19 ; contains at least one pixel
     20 ;----------------------------------------------------------------------
     21 
     22 _DATA SEGMENT	
     23 
     24 _DATA ENDS
     25 _TEXT SEGMENT	
     26 
     27 ; out-of-line, rarely-needed clamping code
     28 
     29 LClampHigh0:	
     30  mov esi,ds:dword ptr[_bbextents]	
     31  jmp LClampReentry0	
     32 LClampHighOrLow0:	
     33  jg LClampHigh0	
     34  xor esi,esi	
     35  jmp LClampReentry0	
     36 
     37 LClampHigh1:	
     38  mov edx,ds:dword ptr[_bbextentt]	
     39  jmp LClampReentry1	
     40 LClampHighOrLow1:	
     41  jg LClampHigh1	
     42  xor edx,edx	
     43  jmp LClampReentry1	
     44 
     45 LClampLow2:	
     46  mov ebp,4096	
     47  jmp LClampReentry2	
     48 LClampHigh2:	
     49  mov ebp,ds:dword ptr[_bbextents]	
     50  jmp LClampReentry2	
     51 
     52 LClampLow3:	
     53  mov ecx,4096	
     54  jmp LClampReentry3	
     55 LClampHigh3:	
     56  mov ecx,ds:dword ptr[_bbextentt]	
     57  jmp LClampReentry3	
     58 
     59 LClampLow4:	
     60  mov eax,4096	
     61  jmp LClampReentry4	
     62 LClampHigh4:	
     63  mov eax,ds:dword ptr[_bbextents]	
     64  jmp LClampReentry4	
     65 
     66 LClampLow5:	
     67  mov ebx,4096	
     68  jmp LClampReentry5	
     69 LClampHigh5:	
     70  mov ebx,ds:dword ptr[_bbextentt]	
     71  jmp LClampReentry5	
     72 
     73 
     74 pspans	equ		4+16
     75 
     76  align 4	
     77  public _D_DrawSpans16	
     78 _D_DrawSpans16:	
     79  push ebp	; preserve caller's stack frame
     80  push edi	
     81  push esi	; preserve register variables
     82  push ebx	
     83 
     84 ;
     85 ; set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
     86 ; and span list pointers
     87 ;
     88 ; TODO: any overlap from rearranging?
     89  fld ds:dword ptr[_d_sdivzstepu]	
     90  fmul ds:dword ptr[fp_16]	
     91  mov edx,ds:dword ptr[_cacheblock]	
     92  fld ds:dword ptr[_d_tdivzstepu]	
     93  fmul ds:dword ptr[fp_16]	
     94  mov ebx,ds:dword ptr[pspans+esp]	; point to the first span descriptor
     95  fld ds:dword ptr[_d_zistepu]	
     96  fmul ds:dword ptr[fp_16]	
     97  mov ds:dword ptr[pbase],edx	; pbase = cacheblock
     98  fstp ds:dword ptr[zi16stepu]	
     99  fstp ds:dword ptr[tdivz16stepu]	
    100  fstp ds:dword ptr[sdivz16stepu]	
    101 
    102 LSpanLoop:	
    103 ;
    104 ; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
    105 ; initial s and t values
    106 ;
    107 ; FIXME: pipeline FILD?
    108  fild ds:dword ptr[espan_t_v+ebx]	
    109  fild ds:dword ptr[espan_t_u+ebx]	
    110 
    111  fld st(1)	; dv | du | dv
    112  fmul ds:dword ptr[_d_sdivzstepv]	; dv*d_sdivzstepv | du | dv
    113  fld st(1)	; du | dv*d_sdivzstepv | du | dv
    114  fmul ds:dword ptr[_d_sdivzstepu]	; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    115  fld st(2)	; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    116  fmul ds:dword ptr[_d_tdivzstepu]	; du*d_tdivzstepu | du*d_sdivzstepu |
    117 ;  dv*d_sdivzstepv | du | dv
    118  fxch st(1)	; du*d_sdivzstepu | du*d_tdivzstepu |
    119 ;  dv*d_sdivzstepv | du | dv
    120  faddp st(2),st(0)	; du*d_tdivzstepu |
    121 ;  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
    122  fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
    123 ;  du*d_tdivzstepu | du | dv
    124  fld st(3)	; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
    125 ;  du*d_tdivzstepu | du | dv
    126  fmul ds:dword ptr[_d_tdivzstepv]	; dv*d_tdivzstepv |
    127 ;  du*d_sdivzstepu + dv*d_sdivzstepv |
    128 ;  du*d_tdivzstepu | du | dv
    129  fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
    130 ;  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
    131  fadd ds:dword ptr[_d_sdivzorigin]	; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
    132 ;  du*d_sdivzstepu; stays in %st(2) at end
    133  fxch st(4)	; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
    134 ;  s/z
    135  fmul ds:dword ptr[_d_zistepv]	; dv*d_zistepv | dv*d_tdivzstepv |
    136 ;  du*d_tdivzstepu | du | s/z
    137  fxch st(1)	; dv*d_tdivzstepv |  dv*d_zistepv |
    138 ;  du*d_tdivzstepu | du | s/z
    139  faddp st(2),st(0)	; dv*d_zistepv |
    140 ;  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
    141  fxch st(2)	; du | dv*d_tdivzstepv + du*d_tdivzstepu |
    142 ;  dv*d_zistepv | s/z
    143  fmul ds:dword ptr[_d_zistepu]	; du*d_zistepu |
    144 ;  dv*d_tdivzstepv + du*d_tdivzstepu |
    145 ;  dv*d_zistepv | s/z
    146  fxch st(1)	; dv*d_tdivzstepv + du*d_tdivzstepu |
    147 ;  du*d_zistepu | dv*d_zistepv | s/z
    148  fadd ds:dword ptr[_d_tdivzorigin]	; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
    149 ;  du*d_tdivzstepu; stays in %st(1) at end
    150  fxch st(2)	; dv*d_zistepv | du*d_zistepu | t/z | s/z
    151  faddp st(1),st(0)	; dv*d_zistepv + du*d_zistepu | t/z | s/z
    152 
    153  fld ds:dword ptr[fp_64k]	; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
    154  fxch st(1)	; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
    155  fadd ds:dword ptr[_d_ziorigin]	; zi = d_ziorigin + dv*d_zistepv +
    156 ;  du*d_zistepu; stays in %st(0) at end
    157 ; 1/z | fp_64k | t/z | s/z
    158 ;
    159 ; calculate and clamp s & t
    160 ;
    161  fdiv st(1),st(0)	; 1/z | z*64k | t/z | s/z
    162 
    163 ;
    164 ; point %edi to the first pixel in the span
    165 ;
    166  mov ecx,ds:dword ptr[_d_viewbuffer]	
    167  mov eax,ds:dword ptr[espan_t_v+ebx]	
    168  mov ds:dword ptr[pspantemp],ebx	; preserve spans pointer
    169 
    170  mov edx,ds:dword ptr[_tadjust]	
    171  mov esi,ds:dword ptr[_sadjust]	
    172  mov edi,ds:dword ptr[_d_scantable+eax*4]	; v * screenwidth
    173  add edi,ecx	
    174  mov ecx,ds:dword ptr[espan_t_u+ebx]	
    175  add edi,ecx	; pdest = &pdestspan[scans->u];
    176  mov ecx,ds:dword ptr[espan_t_count+ebx]	
    177 
    178 ;
    179 ; now start the FDIV for the end of the span
    180 ;
    181  cmp ecx,16	
    182  ja LSetupNotLast1	
    183 
    184  dec ecx	
    185  jz LCleanup1	; if only one pixel, no need to start an FDIV
    186  mov ds:dword ptr[spancountminus1],ecx	
    187 
    188 ; finish up the s and t calcs
    189  fxch st(1)	; z*64k | 1/z | t/z | s/z
    190 
    191  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    192  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    193  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    194  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    195  fxch st(1)	; s | t | 1/z | t/z | s/z
    196  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    197  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    198 
    199  fild ds:dword ptr[spancountminus1]	
    200 
    201  fld ds:dword ptr[_d_tdivzstepu]	; C(d_tdivzstepu) | spancountminus1
    202  fld ds:dword ptr[_d_zistepu]	; C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
    203  fmul st(0),st(2)	; C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
    204  fxch st(1)	; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
    205  fmul st(0),st(2)	; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
    206  fxch st(2)	; scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
    207  fmul ds:dword ptr[_d_sdivzstepu]	; C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
    208 ;  C(d_tdivzstepu)*scm1
    209  fxch st(1)	; C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
    210 ;  C(d_tdivzstepu)*scm1
    211  faddp st(3),st(0)	; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
    212  fxch st(1)	; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
    213  faddp st(3),st(0)	; C(d_sdivzstepu)*scm1
    214  faddp st(3),st(0)	
    215 
    216  fld ds:dword ptr[fp_64k]	
    217  fdiv st(0),st(1)	; this is what we've gone to all this trouble to
    218 ;  overlap
    219  jmp LFDIVInFlight1	
    220 
    221 LCleanup1:	
    222 ; finish up the s and t calcs
    223  fxch st(1)	; z*64k | 1/z | t/z | s/z
    224 
    225  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    226  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    227  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    228  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    229  fxch st(1)	; s | t | 1/z | t/z | s/z
    230  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    231  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    232  jmp LFDIVInFlight1	
    233 
    234  align 4	
    235 LSetupNotLast1:	
    236 ; finish up the s and t calcs
    237  fxch st(1)	; z*64k | 1/z | t/z | s/z
    238 
    239  fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
    240  fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
    241  fxch st(1)	; z*64k | s | 1/z | t/z | s/z
    242  fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
    243  fxch st(1)	; s | t | 1/z | t/z | s/z
    244  fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
    245  fistp ds:dword ptr[t]	; 1/z | t/z | s/z
    246 
    247  fadd ds:dword ptr[zi16stepu]	
    248  fxch st(2)	
    249  fadd ds:dword ptr[sdivz16stepu]	
    250  fxch st(2)	
    251  fld ds:dword ptr[tdivz16stepu]	
    252  faddp st(2),st(0)	
    253  fld ds:dword ptr[fp_64k]	
    254  fdiv st(0),st(1)	; z = 1/1/z
    255 ; this is what we've gone to all this trouble to
    256 ;  overlap
    257 LFDIVInFlight1:	
    258 
    259  add esi,ds:dword ptr[s]	
    260  add edx,ds:dword ptr[t]	
    261  mov ebx,ds:dword ptr[_bbextents]	
    262  mov ebp,ds:dword ptr[_bbextentt]	
    263  cmp esi,ebx	
    264  ja LClampHighOrLow0	
    265 LClampReentry0:	
    266  mov ds:dword ptr[s],esi	
    267  mov ebx,ds:dword ptr[pbase]	
    268  shl esi,16	
    269  cmp edx,ebp	
    270  mov ds:dword ptr[sfracf],esi	
    271  ja LClampHighOrLow1	
    272 LClampReentry1:	
    273  mov ds:dword ptr[t],edx	
    274  mov esi,ds:dword ptr[s]	; sfrac = scans->sfrac;
    275  shl edx,16	
    276  mov eax,ds:dword ptr[t]	; tfrac = scans->tfrac;
    277  sar esi,16	
    278  mov ds:dword ptr[tfracf],edx	
    279 
    280 ;
    281 ; calculate the texture starting address
    282 ;
    283  sar eax,16	
    284  mov edx,ds:dword ptr[_cachewidth]	
    285  imul eax,edx	; (tfrac >> 16) * cachewidth
    286  add esi,ebx	
    287  add esi,eax	; psource = pbase + (sfrac >> 16) +
    288 ;           ((tfrac >> 16) * cachewidth);
    289 ;
    290 ; determine whether last span or not
    291 ;
    292  cmp ecx,16	
    293  jna LLastSegment	
    294 
    295 ;
    296 ; not the last segment; do full 16-wide segment
    297 ;
    298 LNotLastSegment:	
    299 
    300 ;
    301 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    302 ; get there
    303 ;
    304 
    305 ; pick up after the FDIV that was left in flight previously
    306 
    307  fld st(0)	; duplicate it
    308  fmul st(0),st(4)	; s = s/z * z
    309  fxch st(1)	
    310  fmul st(0),st(3)	; t = t/z * z
    311  fxch st(1)	
    312  fistp ds:dword ptr[snext]	
    313  fistp ds:dword ptr[tnext]	
    314  mov eax,ds:dword ptr[snext]	
    315  mov edx,ds:dword ptr[tnext]	
    316 
    317  mov bl,ds:byte ptr[esi]	; get first source texel
    318  sub ecx,16	; count off this segments' pixels
    319  mov ebp,ds:dword ptr[_sadjust]	
    320  mov ds:dword ptr[counttemp],ecx	; remember count of remaining pixels
    321 
    322  mov ecx,ds:dword ptr[_tadjust]	
    323  mov ds:byte ptr[edi],bl	; store first dest pixel
    324 
    325  add ebp,eax	
    326  add ecx,edx	
    327 
    328  mov eax,ds:dword ptr[_bbextents]	
    329  mov edx,ds:dword ptr[_bbextentt]	
    330 
    331  cmp ebp,4096	
    332  jl LClampLow2	
    333  cmp ebp,eax	
    334  ja LClampHigh2	
    335 LClampReentry2:	
    336 
    337  cmp ecx,4096	
    338  jl LClampLow3	
    339  cmp ecx,edx	
    340  ja LClampHigh3	
    341 LClampReentry3:	
    342 
    343  mov ds:dword ptr[snext],ebp	
    344  mov ds:dword ptr[tnext],ecx	
    345 
    346  sub ebp,ds:dword ptr[s]	
    347  sub ecx,ds:dword ptr[t]	
    348 
    349 ;
    350 ; set up advancetable
    351 ;
    352  mov eax,ecx	
    353  mov edx,ebp	
    354  sar eax,20	; tstep >>= 16;
    355  jz LZero	
    356  sar edx,20	; sstep >>= 16;
    357  mov ebx,ds:dword ptr[_cachewidth]	
    358  imul eax,ebx	
    359  jmp LSetUp1	
    360 
    361 LZero:	
    362  sar edx,20	; sstep >>= 16;
    363  mov ebx,ds:dword ptr[_cachewidth]	
    364 
    365 LSetUp1:	
    366 
    367  add eax,edx	; add in sstep
    368 ; (tstep >> 16) * cachewidth + (sstep >> 16);
    369  mov edx,ds:dword ptr[tfracf]	
    370  mov ds:dword ptr[advancetable+4],eax	; advance base in t
    371  add eax,ebx	; ((tstep >> 16) + 1) * cachewidth +
    372 ;  (sstep >> 16);
    373  shl ebp,12	; left-justify sstep fractional part
    374  mov ebx,ds:dword ptr[sfracf]	
    375  shl ecx,12	; left-justify tstep fractional part
    376  mov ds:dword ptr[advancetable],eax	; advance extra in t
    377 
    378  mov ds:dword ptr[tstep],ecx	
    379  add edx,ecx	; advance tfrac fractional part by tstep frac
    380 
    381  sbb ecx,ecx	; turn tstep carry into -1 (0 if none)
    382  add ebx,ebp	; advance sfrac fractional part by sstep frac
    383  adc esi,ds:dword ptr[advancetable+4+ecx*4]	; point to next source texel
    384 
    385  add edx,ds:dword ptr[tstep]	
    386  sbb ecx,ecx	
    387  mov al,ds:byte ptr[esi]	
    388  add ebx,ebp	
    389  mov ds:byte ptr[1+edi],al	
    390  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    391 
    392  add edx,ds:dword ptr[tstep]	
    393  sbb ecx,ecx	
    394  add ebx,ebp	
    395  mov al,ds:byte ptr[esi]	
    396  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    397 
    398  add edx,ds:dword ptr[tstep]	
    399  sbb ecx,ecx	
    400  mov ds:byte ptr[2+edi],al	
    401  add ebx,ebp	
    402  mov al,ds:byte ptr[esi]	
    403  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    404 
    405  add edx,ds:dword ptr[tstep]	
    406  sbb ecx,ecx	
    407  mov ds:byte ptr[3+edi],al	
    408  add ebx,ebp	
    409  mov al,ds:byte ptr[esi]	
    410  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    411 
    412  add edx,ds:dword ptr[tstep]	
    413  sbb ecx,ecx	
    414  mov ds:byte ptr[4+edi],al	
    415  add ebx,ebp	
    416  mov al,ds:byte ptr[esi]	
    417  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    418 
    419  add edx,ds:dword ptr[tstep]	
    420  sbb ecx,ecx	
    421  mov ds:byte ptr[5+edi],al	
    422  add ebx,ebp	
    423  mov al,ds:byte ptr[esi]	
    424  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    425 
    426  add edx,ds:dword ptr[tstep]	
    427  sbb ecx,ecx	
    428  mov ds:byte ptr[6+edi],al	
    429  add ebx,ebp	
    430  mov al,ds:byte ptr[esi]	
    431  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    432 
    433  add edx,ds:dword ptr[tstep]	
    434  sbb ecx,ecx	
    435  mov ds:byte ptr[7+edi],al	
    436  add ebx,ebp	
    437  mov al,ds:byte ptr[esi]	
    438  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    439 
    440 
    441 ;
    442 ; start FDIV for end of next segment in flight, so it can overlap
    443 ;
    444  mov ecx,ds:dword ptr[counttemp]	
    445  cmp ecx,16	; more than one segment after this?
    446  ja LSetupNotLast2	; yes
    447 
    448  dec ecx	
    449  jz LFDIVInFlight2	; if only one pixel, no need to start an FDIV
    450  mov ds:dword ptr[spancountminus1],ecx	
    451  fild ds:dword ptr[spancountminus1]	
    452 
    453  fld ds:dword ptr[_d_zistepu]	; C(d_zistepu) | spancountminus1
    454  fmul st(0),st(1)	; C(d_zistepu)*scm1 | scm1
    455  fld ds:dword ptr[_d_tdivzstepu]	; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
    456  fmul st(0),st(2)	; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
    457  fxch st(1)	; C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
    458  faddp st(3),st(0)	; C(d_tdivzstepu)*scm1 | scm1
    459  fxch st(1)	; scm1 | C(d_tdivzstepu)*scm1
    460  fmul ds:dword ptr[_d_sdivzstepu]	; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
    461  fxch st(1)	; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
    462  faddp st(3),st(0)	; C(d_sdivzstepu)*scm1
    463  fld ds:dword ptr[fp_64k]	; 64k | C(d_sdivzstepu)*scm1
    464  fxch st(1)	; C(d_sdivzstepu)*scm1 | 64k
    465  faddp st(4),st(0)	; 64k
    466 
    467  fdiv st(0),st(1)	; this is what we've gone to all this trouble to
    468 ;  overlap
    469  jmp LFDIVInFlight2	
    470 
    471  align 4	
    472 LSetupNotLast2:	
    473  fadd ds:dword ptr[zi16stepu]	
    474  fxch st(2)	
    475  fadd ds:dword ptr[sdivz16stepu]	
    476  fxch st(2)	
    477  fld ds:dword ptr[tdivz16stepu]	
    478  faddp st(2),st(0)	
    479  fld ds:dword ptr[fp_64k]	
    480  fdiv st(0),st(1)	; z = 1/1/z
    481 ; this is what we've gone to all this trouble to
    482 ;  overlap
    483 LFDIVInFlight2:	
    484  mov ds:dword ptr[counttemp],ecx	
    485 
    486  add edx,ds:dword ptr[tstep]	
    487  sbb ecx,ecx	
    488  mov ds:byte ptr[8+edi],al	
    489  add ebx,ebp	
    490  mov al,ds:byte ptr[esi]	
    491  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    492 
    493  add edx,ds:dword ptr[tstep]	
    494  sbb ecx,ecx	
    495  mov ds:byte ptr[9+edi],al	
    496  add ebx,ebp	
    497  mov al,ds:byte ptr[esi]	
    498  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    499 
    500  add edx,ds:dword ptr[tstep]	
    501  sbb ecx,ecx	
    502  mov ds:byte ptr[10+edi],al	
    503  add ebx,ebp	
    504  mov al,ds:byte ptr[esi]	
    505  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    506 
    507  add edx,ds:dword ptr[tstep]	
    508  sbb ecx,ecx	
    509  mov ds:byte ptr[11+edi],al	
    510  add ebx,ebp	
    511  mov al,ds:byte ptr[esi]	
    512  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    513 
    514  add edx,ds:dword ptr[tstep]	
    515  sbb ecx,ecx	
    516  mov ds:byte ptr[12+edi],al	
    517  add ebx,ebp	
    518  mov al,ds:byte ptr[esi]	
    519  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    520 
    521  add edx,ds:dword ptr[tstep]	
    522  sbb ecx,ecx	
    523  mov ds:byte ptr[13+edi],al	
    524  add ebx,ebp	
    525  mov al,ds:byte ptr[esi]	
    526  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    527 
    528  add edx,ds:dword ptr[tstep]	
    529  sbb ecx,ecx	
    530  mov ds:byte ptr[14+edi],al	
    531  add ebx,ebp	
    532  mov al,ds:byte ptr[esi]	
    533  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    534 
    535  add edi,16	
    536  mov ds:dword ptr[tfracf],edx	
    537  mov edx,ds:dword ptr[snext]	
    538  mov ds:dword ptr[sfracf],ebx	
    539  mov ebx,ds:dword ptr[tnext]	
    540  mov ds:dword ptr[s],edx	
    541  mov ds:dword ptr[t],ebx	
    542 
    543  mov ecx,ds:dword ptr[counttemp]	; retrieve count
    544 
    545 ;
    546 ; determine whether last span or not
    547 ;
    548  cmp ecx,16	; are there multiple segments remaining?
    549  mov ds:byte ptr[-1+edi],al	
    550  ja LNotLastSegment	; yes
    551 
    552 ;
    553 ; last segment of scan
    554 ;
    555 LLastSegment:	
    556 
    557 ;
    558 ; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    559 ; get there. The number of pixels left is variable, and we want to land on the
    560 ; last pixel, not step one past it, so we can't run into arithmetic problems
    561 ;
    562  test ecx,ecx	
    563  jz LNoSteps	; just draw the last pixel and we're done
    564 
    565 ; pick up after the FDIV that was left in flight previously
    566 
    567 
    568  fld st(0)	; duplicate it
    569  fmul st(0),st(4)	; s = s/z * z
    570  fxch st(1)	
    571  fmul st(0),st(3)	; t = t/z * z
    572  fxch st(1)	
    573  fistp ds:dword ptr[snext]	
    574  fistp ds:dword ptr[tnext]	
    575 
    576  mov al,ds:byte ptr[esi]	; load first texel in segment
    577  mov ebx,ds:dword ptr[_tadjust]	
    578  mov ds:byte ptr[edi],al	; store first pixel in segment
    579  mov eax,ds:dword ptr[_sadjust]	
    580 
    581  add eax,ds:dword ptr[snext]	
    582  add ebx,ds:dword ptr[tnext]	
    583 
    584  mov ebp,ds:dword ptr[_bbextents]	
    585  mov edx,ds:dword ptr[_bbextentt]	
    586 
    587  cmp eax,4096	
    588  jl LClampLow4	
    589  cmp eax,ebp	
    590  ja LClampHigh4	
    591 LClampReentry4:	
    592  mov ds:dword ptr[snext],eax	
    593 
    594  cmp ebx,4096	
    595  jl LClampLow5	
    596  cmp ebx,edx	
    597  ja LClampHigh5	
    598 LClampReentry5:	
    599 
    600  cmp ecx,1	; don't bother 
    601  je LOnlyOneStep	; if two pixels in segment, there's only one step,
    602 ;  of the segment length
    603  sub eax,ds:dword ptr[s]	
    604  sub ebx,ds:dword ptr[t]	
    605 
    606  add eax,eax	; convert to 15.17 format so multiply by 1.31
    607  add ebx,ebx	;  reciprocal yields 16.48
    608 
    609  imul ds:dword ptr[reciprocal_table_16-8+ecx*4]	; sstep = (snext - s) /
    610 ;  (spancount-1)
    611  mov ebp,edx	
    612 
    613  mov eax,ebx	
    614  imul ds:dword ptr[reciprocal_table_16-8+ecx*4]	; tstep = (tnext - t) /
    615 ;  (spancount-1)
    616 LSetEntryvec:	
    617 ;
    618 ; set up advancetable
    619 ;
    620  mov ebx,ds:dword ptr[entryvec_table_16+ecx*4]	
    621  mov eax,edx	
    622  mov ds:dword ptr[jumptemp],ebx	; entry point into code for RET later
    623  mov ecx,ebp	
    624  sar edx,16	; tstep >>= 16;
    625  mov ebx,ds:dword ptr[_cachewidth]	
    626  sar ecx,16	; sstep >>= 16;
    627  imul edx,ebx	
    628 
    629  add edx,ecx	; add in sstep
    630 ; (tstep >> 16) * cachewidth + (sstep >> 16);
    631  mov ecx,ds:dword ptr[tfracf]	
    632  mov ds:dword ptr[advancetable+4],edx	; advance base in t
    633  add edx,ebx	; ((tstep >> 16) + 1) * cachewidth +
    634 ;  (sstep >> 16);
    635  shl ebp,16	; left-justify sstep fractional part
    636  mov ebx,ds:dword ptr[sfracf]	
    637  shl eax,16	; left-justify tstep fractional part
    638  mov ds:dword ptr[advancetable],edx	; advance extra in t
    639 
    640  mov ds:dword ptr[tstep],eax	
    641  mov edx,ecx	
    642  add edx,eax	
    643  sbb ecx,ecx	
    644  add ebx,ebp	
    645  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    646 
    647  jmp dword ptr[jumptemp]	; jump to the number-of-pixels handler
    648 
    649 ;----------------------------------------
    650 
    651 LNoSteps:	
    652  mov al,ds:byte ptr[esi]	; load first texel in segment
    653  sub edi,15	; adjust for hardwired offset
    654  jmp LEndSpan	
    655 
    656 
    657 LOnlyOneStep:	
    658  sub eax,ds:dword ptr[s]	
    659  sub ebx,ds:dword ptr[t]	
    660  mov ebp,eax	
    661  mov edx,ebx	
    662  jmp LSetEntryvec	
    663 
    664 ;----------------------------------------
    665 
    666  public Entry2_16, Entry3_16, Entry4_16, Entry5_16	
    667  public Entry6_16, Entry7_16, Entry8_16, Entry9_16	
    668  public Entry10_16, Entry11_16, Entry12_16, Entry13_16	
    669  public Entry14_16, Entry15_16, Entry16_16	
    670 
    671 Entry2_16:	
    672  sub edi,14	; adjust for hardwired offsets
    673  mov al,ds:byte ptr[esi]	
    674  jmp LEntry2_16	
    675 
    676 ;----------------------------------------
    677 
    678 Entry3_16:	
    679  sub edi,13	; adjust for hardwired offsets
    680  add edx,eax	
    681  mov al,ds:byte ptr[esi]	
    682  sbb ecx,ecx	
    683  add ebx,ebp	
    684  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    685  jmp LEntry3_16	
    686 
    687 ;----------------------------------------
    688 
    689 Entry4_16:	
    690  sub edi,12	; adjust for hardwired offsets
    691  add edx,eax	
    692  mov al,ds:byte ptr[esi]	
    693  sbb ecx,ecx	
    694  add ebx,ebp	
    695  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    696  add edx,ds:dword ptr[tstep]	
    697  jmp LEntry4_16	
    698 
    699 ;----------------------------------------
    700 
    701 Entry5_16:	
    702  sub edi,11	; adjust for hardwired offsets
    703  add edx,eax	
    704  mov al,ds:byte ptr[esi]	
    705  sbb ecx,ecx	
    706  add ebx,ebp	
    707  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    708  add edx,ds:dword ptr[tstep]	
    709  jmp LEntry5_16	
    710 
    711 ;----------------------------------------
    712 
    713 Entry6_16:	
    714  sub edi,10	; adjust for hardwired offsets
    715  add edx,eax	
    716  mov al,ds:byte ptr[esi]	
    717  sbb ecx,ecx	
    718  add ebx,ebp	
    719  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    720  add edx,ds:dword ptr[tstep]	
    721  jmp LEntry6_16	
    722 
    723 ;----------------------------------------
    724 
    725 Entry7_16:	
    726  sub edi,9	; adjust for hardwired offsets
    727  add edx,eax	
    728  mov al,ds:byte ptr[esi]	
    729  sbb ecx,ecx	
    730  add ebx,ebp	
    731  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    732  add edx,ds:dword ptr[tstep]	
    733  jmp LEntry7_16	
    734 
    735 ;----------------------------------------
    736 
    737 Entry8_16:	
    738  sub edi,8	; adjust for hardwired offsets
    739  add edx,eax	
    740  mov al,ds:byte ptr[esi]	
    741  sbb ecx,ecx	
    742  add ebx,ebp	
    743  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    744  add edx,ds:dword ptr[tstep]	
    745  jmp LEntry8_16	
    746 
    747 ;----------------------------------------
    748 
    749 Entry9_16:	
    750  sub edi,7	; adjust for hardwired offsets
    751  add edx,eax	
    752  mov al,ds:byte ptr[esi]	
    753  sbb ecx,ecx	
    754  add ebx,ebp	
    755  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    756  add edx,ds:dword ptr[tstep]	
    757  jmp LEntry9_16	
    758 
    759 ;----------------------------------------
    760 
    761 Entry10_16:	
    762  sub edi,6	; adjust for hardwired offsets
    763  add edx,eax	
    764  mov al,ds:byte ptr[esi]	
    765  sbb ecx,ecx	
    766  add ebx,ebp	
    767  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    768  add edx,ds:dword ptr[tstep]	
    769  jmp LEntry10_16	
    770 
    771 ;----------------------------------------
    772 
    773 Entry11_16:	
    774  sub edi,5	; adjust for hardwired offsets
    775  add edx,eax	
    776  mov al,ds:byte ptr[esi]	
    777  sbb ecx,ecx	
    778  add ebx,ebp	
    779  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    780  add edx,ds:dword ptr[tstep]	
    781  jmp LEntry11_16	
    782 
    783 ;----------------------------------------
    784 
    785 Entry12_16:	
    786  sub edi,4	; adjust for hardwired offsets
    787  add edx,eax	
    788  mov al,ds:byte ptr[esi]	
    789  sbb ecx,ecx	
    790  add ebx,ebp	
    791  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    792  add edx,ds:dword ptr[tstep]	
    793  jmp LEntry12_16	
    794 
    795 ;----------------------------------------
    796 
    797 Entry13_16:	
    798  sub edi,3	; adjust for hardwired offsets
    799  add edx,eax	
    800  mov al,ds:byte ptr[esi]	
    801  sbb ecx,ecx	
    802  add ebx,ebp	
    803  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    804  add edx,ds:dword ptr[tstep]	
    805  jmp LEntry13_16	
    806 
    807 ;----------------------------------------
    808 
    809 Entry14_16:	
    810  sub edi,2	; adjust for hardwired offsets
    811  add edx,eax	
    812  mov al,ds:byte ptr[esi]	
    813  sbb ecx,ecx	
    814  add ebx,ebp	
    815  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    816  add edx,ds:dword ptr[tstep]	
    817  jmp LEntry14_16	
    818 
    819 ;----------------------------------------
    820 
    821 Entry15_16:	
    822  dec edi	; adjust for hardwired offsets
    823  add edx,eax	
    824  mov al,ds:byte ptr[esi]	
    825  sbb ecx,ecx	
    826  add ebx,ebp	
    827  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    828  add edx,ds:dword ptr[tstep]	
    829  jmp LEntry15_16	
    830 
    831 ;----------------------------------------
    832 
    833 Entry16_16:	
    834  add edx,eax	
    835  mov al,ds:byte ptr[esi]	
    836  sbb ecx,ecx	
    837  add ebx,ebp	
    838  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    839 
    840  add edx,ds:dword ptr[tstep]	
    841  sbb ecx,ecx	
    842  mov ds:byte ptr[1+edi],al	
    843  add ebx,ebp	
    844  mov al,ds:byte ptr[esi]	
    845  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    846  add edx,ds:dword ptr[tstep]	
    847 LEntry15_16:	
    848  sbb ecx,ecx	
    849  mov ds:byte ptr[2+edi],al	
    850  add ebx,ebp	
    851  mov al,ds:byte ptr[esi]	
    852  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    853  add edx,ds:dword ptr[tstep]	
    854 LEntry14_16:	
    855  sbb ecx,ecx	
    856  mov ds:byte ptr[3+edi],al	
    857  add ebx,ebp	
    858  mov al,ds:byte ptr[esi]	
    859  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    860  add edx,ds:dword ptr[tstep]	
    861 LEntry13_16:	
    862  sbb ecx,ecx	
    863  mov ds:byte ptr[4+edi],al	
    864  add ebx,ebp	
    865  mov al,ds:byte ptr[esi]	
    866  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    867  add edx,ds:dword ptr[tstep]	
    868 LEntry12_16:	
    869  sbb ecx,ecx	
    870  mov ds:byte ptr[5+edi],al	
    871  add ebx,ebp	
    872  mov al,ds:byte ptr[esi]	
    873  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    874  add edx,ds:dword ptr[tstep]	
    875 LEntry11_16:	
    876  sbb ecx,ecx	
    877  mov ds:byte ptr[6+edi],al	
    878  add ebx,ebp	
    879  mov al,ds:byte ptr[esi]	
    880  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    881  add edx,ds:dword ptr[tstep]	
    882 LEntry10_16:	
    883  sbb ecx,ecx	
    884  mov ds:byte ptr[7+edi],al	
    885  add ebx,ebp	
    886  mov al,ds:byte ptr[esi]	
    887  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    888  add edx,ds:dword ptr[tstep]	
    889 LEntry9_16:	
    890  sbb ecx,ecx	
    891  mov ds:byte ptr[8+edi],al	
    892  add ebx,ebp	
    893  mov al,ds:byte ptr[esi]	
    894  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    895  add edx,ds:dword ptr[tstep]	
    896 LEntry8_16:	
    897  sbb ecx,ecx	
    898  mov ds:byte ptr[9+edi],al	
    899  add ebx,ebp	
    900  mov al,ds:byte ptr[esi]	
    901  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    902  add edx,ds:dword ptr[tstep]	
    903 LEntry7_16:	
    904  sbb ecx,ecx	
    905  mov ds:byte ptr[10+edi],al	
    906  add ebx,ebp	
    907  mov al,ds:byte ptr[esi]	
    908  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    909  add edx,ds:dword ptr[tstep]	
    910 LEntry6_16:	
    911  sbb ecx,ecx	
    912  mov ds:byte ptr[11+edi],al	
    913  add ebx,ebp	
    914  mov al,ds:byte ptr[esi]	
    915  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    916  add edx,ds:dword ptr[tstep]	
    917 LEntry5_16:	
    918  sbb ecx,ecx	
    919  mov ds:byte ptr[12+edi],al	
    920  add ebx,ebp	
    921  mov al,ds:byte ptr[esi]	
    922  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    923  add edx,ds:dword ptr[tstep]	
    924 LEntry4_16:	
    925  sbb ecx,ecx	
    926  mov ds:byte ptr[13+edi],al	
    927  add ebx,ebp	
    928  mov al,ds:byte ptr[esi]	
    929  adc esi,ds:dword ptr[advancetable+4+ecx*4]	
    930 LEntry3_16:	
    931  mov ds:byte ptr[14+edi],al	
    932  mov al,ds:byte ptr[esi]	
    933 LEntry2_16:	
    934 
    935 LEndSpan:	
    936 
    937 ;
    938 ; clear s/z, t/z, 1/z from FP stack
    939 ;
    940  fstp st(0)	
    941  fstp st(0)	
    942  fstp st(0)	
    943 
    944  mov ebx,ds:dword ptr[pspantemp]	; restore spans pointer
    945  mov ebx,ds:dword ptr[espan_t_pnext+ebx]	; point to next span
    946  test ebx,ebx	; any more spans?
    947  mov ds:byte ptr[15+edi],al	
    948  jnz LSpanLoop	; more spans
    949 
    950  pop ebx	; restore register variables
    951  pop esi	
    952  pop edi	
    953  pop ebp	; restore the caller's stack frame
    954  ret	
    955 
    956 
    957 ;----------------------------------------------------------------------
    958 ; 8-bpp horizontal span z drawing codefor polygons, with no transparency.
    959 ;
    960 ; Assumes there is at least one span in pzspans, and that every span
    961 ; contains at least one pixel
    962 ;----------------------------------------------------------------------
    963 
    964 	
    965 
    966 ; z-clamp on a non-negative gradient span
    967 LClamp:	
    968  mov edx,040000000h	
    969  xor ebx,ebx	
    970  fstp st(0)	
    971  jmp LZDraw	
    972 
    973 ; z-clamp on a negative gradient span
    974 LClampNeg:	
    975  mov edx,040000000h	
    976  xor ebx,ebx	
    977  fstp st(0)	
    978  jmp LZDrawNeg	
    979 
    980 
    981 pzspans	equ		4+16
    982 
    983  public _D_DrawZSpans	
    984 _D_DrawZSpans:	
    985  push ebp	; preserve caller's stack frame
    986  push edi	
    987  push esi	; preserve register variables
    988  push ebx	
    989 
    990  fld ds:dword ptr[_d_zistepu]	
    991  mov eax,ds:dword ptr[_d_zistepu]	
    992  mov esi,ds:dword ptr[pzspans+esp]	
    993  test eax,eax	
    994  jz LFNegSpan	
    995 
    996  fmul ds:dword ptr[Float2ToThe31nd]	
    997  fistp ds:dword ptr[izistep]	; note: we are relying on FP exceptions being turned
    998 ; off here to avoid range problems
    999  mov ebx,ds:dword ptr[izistep]	; remains loaded for all spans
   1000 
   1001 LFSpanLoop:	
   1002 ; set up the initial 1/z value
   1003  fild ds:dword ptr[espan_t_v+esi]	
   1004  fild ds:dword ptr[espan_t_u+esi]	
   1005  mov ecx,ds:dword ptr[espan_t_v+esi]	
   1006  mov edi,ds:dword ptr[_d_pzbuffer]	
   1007  fmul ds:dword ptr[_d_zistepu]	
   1008  fxch st(1)	
   1009  fmul ds:dword ptr[_d_zistepv]	
   1010  fxch st(1)	
   1011  fadd ds:dword ptr[_d_ziorigin]	
   1012  imul ecx,ds:dword ptr[_d_zrowbytes]	
   1013  faddp st(1),st(0)	
   1014 
   1015 ; clamp if z is nearer than 2 (1/z > 0.5)
   1016  fcom ds:dword ptr[float_point5]	
   1017  add edi,ecx	
   1018  mov edx,ds:dword ptr[espan_t_u+esi]	
   1019  add edx,edx	; word count
   1020  mov ecx,ds:dword ptr[espan_t_count+esi]	
   1021  add edi,edx	; pdest = &pdestspan[scans->u];
   1022  push esi	; preserve spans pointer
   1023  fnstsw ax	
   1024  test ah,045h	
   1025  jz LClamp	
   1026 
   1027  fmul ds:dword ptr[Float2ToThe31nd]	
   1028  fistp ds:dword ptr[izi]	; note: we are relying on FP exceptions being turned
   1029 ; off here to avoid problems when the span is closer
   1030 ; than 1/(2**31)
   1031  mov edx,ds:dword ptr[izi]	
   1032 
   1033 ; at this point:
   1034 ; %ebx = izistep
   1035 ; %ecx = count
   1036 ; %edx = izi
   1037 ; %edi = pdest
   1038 
   1039 LZDraw:	
   1040 
   1041 ; do a single pixel up front, if necessary to dword align the destination
   1042  test edi,2	
   1043  jz LFMiddle	
   1044  mov eax,edx	
   1045  add edx,ebx	
   1046  shr eax,16	
   1047  dec ecx	
   1048  mov ds:word ptr[edi],ax	
   1049  add edi,2	
   1050 
   1051 ; do middle a pair of aligned dwords at a time
   1052 LFMiddle:	
   1053  push ecx	
   1054  shr ecx,1	; count / 2
   1055  jz LFLast	; no aligned dwords to do
   1056  shr ecx,1	; (count / 2) / 2
   1057  jnc LFMiddleLoop	; even number of aligned dwords to do
   1058 
   1059  mov eax,edx	
   1060  add edx,ebx	
   1061  shr eax,16	
   1062  mov esi,edx	
   1063  add edx,ebx	
   1064  and esi,0FFFF0000h	
   1065  or eax,esi	
   1066  mov ds:dword ptr[edi],eax	
   1067  add edi,4	
   1068  and ecx,ecx	
   1069  jz LFLast	
   1070 
   1071 LFMiddleLoop:	
   1072  mov eax,edx	
   1073  add edx,ebx	
   1074  shr eax,16	
   1075  mov esi,edx	
   1076  add edx,ebx	
   1077  and esi,0FFFF0000h	
   1078  or eax,esi	
   1079  mov ebp,edx	
   1080  mov ds:dword ptr[edi],eax	
   1081  add edx,ebx	
   1082  shr ebp,16	
   1083  mov esi,edx	
   1084  add edx,ebx	
   1085  and esi,0FFFF0000h	
   1086  or ebp,esi	
   1087  mov ds:dword ptr[4+edi],ebp	; FIXME: eliminate register contention
   1088  add edi,8	
   1089 
   1090  dec ecx	
   1091  jnz LFMiddleLoop	
   1092 
   1093 LFLast:	
   1094  pop ecx	; retrieve count
   1095  pop esi	; retrieve span pointer
   1096 
   1097 ; do the last, unaligned pixel, if there is one
   1098  and ecx,1	; is there an odd pixel left to do?
   1099  jz LFSpanDone	; no
   1100  shr edx,16	
   1101  mov ds:word ptr[edi],dx	; do the final pixel's z
   1102 
   1103 LFSpanDone:	
   1104  mov esi,ds:dword ptr[espan_t_pnext+esi]	
   1105  test esi,esi	
   1106  jnz LFSpanLoop	
   1107 
   1108  jmp LFDone	
   1109 
   1110 LFNegSpan:	
   1111  fmul ds:dword ptr[FloatMinus2ToThe31nd]	
   1112  fistp ds:dword ptr[izistep]	; note: we are relying on FP exceptions being turned
   1113 ; off here to avoid range problems
   1114  mov ebx,ds:dword ptr[izistep]	; remains loaded for all spans
   1115 
   1116 LFNegSpanLoop:	
   1117 ; set up the initial 1/z value
   1118  fild ds:dword ptr[espan_t_v+esi]	
   1119  fild ds:dword ptr[espan_t_u+esi]	
   1120  mov ecx,ds:dword ptr[espan_t_v+esi]	
   1121  mov edi,ds:dword ptr[_d_pzbuffer]	
   1122  fmul ds:dword ptr[_d_zistepu]	
   1123  fxch st(1)	
   1124  fmul ds:dword ptr[_d_zistepv]	
   1125  fxch st(1)	
   1126  fadd ds:dword ptr[_d_ziorigin]	
   1127  imul ecx,ds:dword ptr[_d_zrowbytes]	
   1128  faddp st(1),st(0)	
   1129 
   1130 ; clamp if z is nearer than 2 (1/z > 0.5)
   1131  fcom ds:dword ptr[float_point5]	
   1132  add edi,ecx	
   1133  mov edx,ds:dword ptr[espan_t_u+esi]	
   1134  add edx,edx	; word count
   1135  mov ecx,ds:dword ptr[espan_t_count+esi]	
   1136  add edi,edx	; pdest = &pdestspan[scans->u];
   1137  push esi	; preserve spans pointer
   1138  fnstsw ax	
   1139  test ah,045h	
   1140  jz LClampNeg	
   1141 
   1142  fmul ds:dword ptr[Float2ToThe31nd]	
   1143  fistp ds:dword ptr[izi]	; note: we are relying on FP exceptions being turned
   1144 ; off here to avoid problems when the span is closer
   1145 ; than 1/(2**31)
   1146  mov edx,ds:dword ptr[izi]	
   1147 
   1148 ; at this point:
   1149 ; %ebx = izistep
   1150 ; %ecx = count
   1151 ; %edx = izi
   1152 ; %edi = pdest
   1153 
   1154 LZDrawNeg:	
   1155 
   1156 ; do a single pixel up front, if necessary to dword align the destination
   1157  test edi,2	
   1158  jz LFNegMiddle	
   1159  mov eax,edx	
   1160  sub edx,ebx	
   1161  shr eax,16	
   1162  dec ecx	
   1163  mov ds:word ptr[edi],ax	
   1164  add edi,2	
   1165 
   1166 ; do middle a pair of aligned dwords at a time
   1167 LFNegMiddle:	
   1168  push ecx	
   1169  shr ecx,1	; count / 2
   1170  jz LFNegLast	; no aligned dwords to do
   1171  shr ecx,1	; (count / 2) / 2
   1172  jnc LFNegMiddleLoop	; even number of aligned dwords to do
   1173 
   1174  mov eax,edx	
   1175  sub edx,ebx	
   1176  shr eax,16	
   1177  mov esi,edx	
   1178  sub edx,ebx	
   1179  and esi,0FFFF0000h	
   1180  or eax,esi	
   1181  mov ds:dword ptr[edi],eax	
   1182  add edi,4	
   1183  and ecx,ecx	
   1184  jz LFNegLast	
   1185 
   1186 LFNegMiddleLoop:	
   1187  mov eax,edx	
   1188  sub edx,ebx	
   1189  shr eax,16	
   1190  mov esi,edx	
   1191  sub edx,ebx	
   1192  and esi,0FFFF0000h	
   1193  or eax,esi	
   1194  mov ebp,edx	
   1195  mov ds:dword ptr[edi],eax	
   1196  sub edx,ebx	
   1197  shr ebp,16	
   1198  mov esi,edx	
   1199  sub edx,ebx	
   1200  and esi,0FFFF0000h	
   1201  or ebp,esi	
   1202  mov ds:dword ptr[4+edi],ebp	; FIXME: eliminate register contention
   1203  add edi,8	
   1204 
   1205  dec ecx	
   1206  jnz LFNegMiddleLoop	
   1207 
   1208 LFNegLast:	
   1209  pop ecx	; retrieve count
   1210  pop esi	; retrieve span pointer
   1211 
   1212 ; do the last, unaligned pixel, if there is one
   1213  and ecx,1	; is there an odd pixel left to do?
   1214  jz LFNegSpanDone	; no
   1215  shr edx,16	
   1216  mov ds:word ptr[edi],dx	; do the final pixel's z
   1217 
   1218 LFNegSpanDone:	
   1219  mov esi,ds:dword ptr[espan_t_pnext+esi]	
   1220  test esi,esi	
   1221  jnz LFNegSpanLoop	
   1222 
   1223 LFDone:	
   1224  pop ebx	; restore register variables
   1225  pop esi	
   1226  pop edi	
   1227  pop ebp	; restore the caller's stack frame
   1228  ret	
   1229 
   1230 
   1231 
   1232 _TEXT ENDS
   1233 endif	;id386
   1234  END