Quake-2

Quake 2 GPL Source Release
Log | Files | Refs

r_spr8.s (19905B)


      1 //
      2 // d_spr8.s
      3 // x86 assembly-language horizontal 8-bpp transparent span-drawing code.
      4 //
      5 
      6 #include "qasm.h"
      7 
      8 #if id386
      9 
     10 //----------------------------------------------------------------------
     11 // 8-bpp horizontal span drawing code for polygons, with transparency.
     12 //----------------------------------------------------------------------
     13 
     14 	.text
     15 
     16 // out-of-line, rarely-needed clamping code
     17 
     18 LClampHigh0:
     19 	movl	C(bbextents),%esi
     20 	jmp		LClampReentry0
     21 LClampHighOrLow0:
     22 	jg		LClampHigh0
     23 	xorl	%esi,%esi
     24 	jmp		LClampReentry0
     25 
     26 LClampHigh1:
     27 	movl	C(bbextentt),%edx
     28 	jmp		LClampReentry1
     29 LClampHighOrLow1:
     30 	jg		LClampHigh1
     31 	xorl	%edx,%edx
     32 	jmp		LClampReentry1
     33 
     34 LClampLow2:
     35 	movl	$2048,%ebp
     36 	jmp		LClampReentry2
     37 LClampHigh2:
     38 	movl	C(bbextents),%ebp
     39 	jmp		LClampReentry2
     40 
     41 LClampLow3:
     42 	movl	$2048,%ecx
     43 	jmp		LClampReentry3
     44 LClampHigh3:
     45 	movl	C(bbextentt),%ecx
     46 	jmp		LClampReentry3
     47 
     48 LClampLow4:
     49 	movl	$2048,%eax
     50 	jmp		LClampReentry4
     51 LClampHigh4:
     52 	movl	C(bbextents),%eax
     53 	jmp		LClampReentry4
     54 
     55 LClampLow5:
     56 	movl	$2048,%ebx
     57 	jmp		LClampReentry5
     58 LClampHigh5:
     59 	movl	C(bbextentt),%ebx
     60 	jmp		LClampReentry5
     61 
     62 
     63 #define pspans	4+16
     64 
     65 	.align 4
     66 .globl C(D_SpriteDrawSpans)
     67 C(D_SpriteDrawSpans):
     68 	pushl	%ebp				// preserve caller's stack frame
     69 	pushl	%edi
     70 	pushl	%esi				// preserve register variables
     71 	pushl	%ebx
     72 
     73 //
     74 // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
     75 // and span list pointers, and 1/z step in 0.32 fixed-point
     76 //
     77 // FIXME: any overlap from rearranging?
     78 	flds	C(d_sdivzstepu)
     79 	fmuls	fp_8
     80 	movl	C(cacheblock),%edx
     81 	flds	C(d_tdivzstepu)
     82 	fmuls	fp_8
     83 	movl	pspans(%esp),%ebx	// point to the first span descriptor
     84 	flds	C(d_zistepu)
     85 	fmuls	fp_8
     86 	movl	%edx,pbase			// pbase = cacheblock
     87 	flds	C(d_zistepu)
     88 	fmuls	fp_64kx64k
     89 	fxch	%st(3)
     90 	fstps	sdivz8stepu
     91 	fstps	zi8stepu
     92 	fstps	tdivz8stepu
     93 	fistpl	izistep
     94 	movl	izistep,%eax
     95 	rorl	$16,%eax		// put upper 16 bits in low word
     96 	movl	sspan_t_count(%ebx),%ecx
     97 	movl	%eax,izistep
     98 
     99 	cmpl	$0,%ecx
    100 	jle		LNextSpan
    101 
    102 LSpanLoop:
    103 
    104 //
    105 // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
    106 // initial s and t values
    107 //
    108 // FIXME: pipeline FILD?
    109 	fildl	sspan_t_v(%ebx)
    110 	fildl	sspan_t_u(%ebx)
    111 
    112 	fld		%st(1)			// dv | du | dv
    113 	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
    114 	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
    115 	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    116 	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
    117 	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
    118 							//  dv*d_sdivzstepv | du | dv
    119 	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
    120 							//  dv*d_sdivzstepv | du | dv
    121 	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
    122 							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
    123 	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
    124 							//  du*d_tdivzstepu | du | dv
    125 	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
    126 							//  du*d_tdivzstepu | du | dv
    127 	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
    128 							//  du*d_sdivzstepu + dv*d_sdivzstepv |
    129 							//  du*d_tdivzstepu | du | dv
    130 	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
    131 							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
    132 	fadds	C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
    133 							//  du*d_sdivzstepu; stays in %st(2) at end
    134 	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
    135 							//  s/z
    136 	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
    137 							//  du*d_tdivzstepu | du | s/z
    138 	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
    139 							//  du*d_tdivzstepu | du | s/z
    140 	faddp	%st(0),%st(2)	// dv*d_zistepv |
    141 							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
    142 	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
    143 							//  dv*d_zistepv | s/z
    144 	fmuls	C(d_zistepu)		// du*d_zistepu |
    145 							//  dv*d_tdivzstepv + du*d_tdivzstepu |
    146 							//  dv*d_zistepv | s/z
    147 	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
    148 							//  du*d_zistepu | dv*d_zistepv | s/z
    149 	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
    150 							//  du*d_tdivzstepu; stays in %st(1) at end
    151 	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
    152 	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
    153 
    154 	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
    155 	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
    156 	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
    157 							//  du*d_zistepu; stays in %st(0) at end
    158 							// 1/z | fp_64k | t/z | s/z
    159 
    160 	fld		%st(0)			// FIXME: get rid of stall on FMUL?
    161 	fmuls	fp_64kx64k
    162 	fxch	%st(1)
    163 
    164 //
    165 // calculate and clamp s & t
    166 //
    167 	fdivr	%st(0),%st(2)	// 1/z | z*64k | t/z | s/z
    168 	fxch	%st(1)
    169 
    170 	fistpl	izi				// 0.32 fixed-point 1/z
    171 	movl	izi,%ebp
    172 
    173 //
    174 // set pz to point to the first z-buffer pixel in the span
    175 //
    176 	rorl	$16,%ebp		// put upper 16 bits in low word
    177 	movl	sspan_t_v(%ebx),%eax
    178 	movl	%ebp,izi
    179 	movl	sspan_t_u(%ebx),%ebp
    180 	imull	C(d_zrowbytes)
    181 	shll	$1,%ebp					// a word per pixel
    182 	addl	C(d_pzbuffer),%eax
    183 	addl	%ebp,%eax
    184 	movl	%eax,pz
    185 
    186 //
    187 // point %edi to the first pixel in the span
    188 //
    189 	movl	C(d_viewbuffer),%ebp
    190 	movl	sspan_t_v(%ebx),%eax
    191 	pushl	%ebx		// preserve spans pointer
    192 	movl	C(tadjust),%edx
    193 	movl	C(sadjust),%esi
    194 	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
    195 	addl	%ebp,%edi
    196 	movl	sspan_t_u(%ebx),%ebp
    197 	addl	%ebp,%edi				// pdest = &pdestspan[scans->u];
    198 
    199 //
    200 // now start the FDIV for the end of the span
    201 //
    202 	cmpl	$8,%ecx
    203 	ja		LSetupNotLast1
    204 
    205 	decl	%ecx
    206 	jz		LCleanup1		// if only one pixel, no need to start an FDIV
    207 	movl	%ecx,spancountminus1
    208 
    209 // finish up the s and t calcs
    210 	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
    211 
    212 	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
    213 	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
    214 	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
    215 	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
    216 	fxch	%st(1)			// s | t | 1/z | t/z | s/z
    217 	fistpl	s				// 1/z | t | t/z | s/z
    218 	fistpl	t				// 1/z | t/z | s/z
    219 
    220 	fildl	spancountminus1
    221 
    222 	flds	C(d_tdivzstepu)	// _d_tdivzstepu | spancountminus1
    223 	flds	C(d_zistepu)	// _d_zistepu | _d_tdivzstepu | spancountminus1
    224 	fmul	%st(2),%st(0)	// _d_zistepu*scm1 | _d_tdivzstepu | scm1
    225 	fxch	%st(1)			// _d_tdivzstepu | _d_zistepu*scm1 | scm1
    226 	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
    227 	fxch	%st(2)			// scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
    228 	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
    229 							//  _d_tdivzstepu*scm1
    230 	fxch	%st(1)			// _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
    231 							//  _d_tdivzstepu*scm1
    232 	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
    233 	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
    234 	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
    235 	faddp	%st(0),%st(3)
    236 
    237 	flds	fp_64k
    238 	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
    239 							//  overlap
    240 	jmp		LFDIVInFlight1
    241 
    242 LCleanup1:
    243 // finish up the s and t calcs
    244 	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
    245 
    246 	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
    247 	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
    248 	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
    249 	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
    250 	fxch	%st(1)			// s | t | 1/z | t/z | s/z
    251 	fistpl	s				// 1/z | t | t/z | s/z
    252 	fistpl	t				// 1/z | t/z | s/z
    253 	jmp		LFDIVInFlight1
    254 
    255 	.align	4
    256 LSetupNotLast1:
    257 // finish up the s and t calcs
    258 	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
    259 
    260 	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
    261 	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
    262 	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
    263 	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
    264 	fxch	%st(1)			// s | t | 1/z | t/z | s/z
    265 	fistpl	s				// 1/z | t | t/z | s/z
    266 	fistpl	t				// 1/z | t/z | s/z
    267 
    268 	fadds	zi8stepu
    269 	fxch	%st(2)
    270 	fadds	sdivz8stepu
    271 	fxch	%st(2)
    272 	flds	tdivz8stepu
    273 	faddp	%st(0),%st(2)
    274 	flds	fp_64k
    275 	fdiv	%st(1),%st(0)	// z = 1/1/z
    276 							// this is what we've gone to all this trouble to
    277 							//  overlap
    278 LFDIVInFlight1:
    279 
    280 	addl	s,%esi
    281 	addl	t,%edx
    282 	movl	C(bbextents),%ebx
    283 	movl	C(bbextentt),%ebp
    284 	cmpl	%ebx,%esi
    285 	ja		LClampHighOrLow0
    286 LClampReentry0:
    287 	movl	%esi,s
    288 	movl	pbase,%ebx
    289 	shll	$16,%esi
    290 	cmpl	%ebp,%edx
    291 	movl	%esi,sfracf
    292 	ja		LClampHighOrLow1
    293 LClampReentry1:
    294 	movl	%edx,t
    295 	movl	s,%esi					// sfrac = scans->sfrac;
    296 	shll	$16,%edx
    297 	movl	t,%eax					// tfrac = scans->tfrac;
    298 	sarl	$16,%esi
    299 	movl	%edx,tfracf
    300 
    301 //
    302 // calculate the texture starting address
    303 //
    304 	sarl	$16,%eax
    305 	addl	%ebx,%esi
    306 	imull	C(cachewidth),%eax		// (tfrac >> 16) * cachewidth
    307 	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
    308 									//           ((tfrac >> 16) * cachewidth);
    309 
    310 //
    311 // determine whether last span or not
    312 //
    313 	cmpl	$8,%ecx
    314 	jna		LLastSegment
    315 
    316 //
    317 // not the last segment; do full 8-wide segment
    318 //
    319 LNotLastSegment:
    320 
    321 //
    322 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    323 // get there
    324 //
    325 
    326 // pick up after the FDIV that was left in flight previously
    327 
    328 	fld		%st(0)			// duplicate it
    329 	fmul	%st(4),%st(0)	// s = s/z * z
    330 	fxch	%st(1)
    331 	fmul	%st(3),%st(0)	// t = t/z * z
    332 	fxch	%st(1)
    333 	fistpl	snext
    334 	fistpl	tnext
    335 	movl	snext,%eax
    336 	movl	tnext,%edx
    337 
    338 	subl	$8,%ecx		// count off this segments' pixels
    339 	movl	C(sadjust),%ebp
    340 	pushl	%ecx		// remember count of remaining pixels
    341 	movl	C(tadjust),%ecx
    342 
    343 	addl	%eax,%ebp
    344 	addl	%edx,%ecx
    345 
    346 	movl	C(bbextents),%eax
    347 	movl	C(bbextentt),%edx
    348 
    349 	cmpl	$2048,%ebp
    350 	jl		LClampLow2
    351 	cmpl	%eax,%ebp
    352 	ja		LClampHigh2
    353 LClampReentry2:
    354 
    355 	cmpl	$2048,%ecx
    356 	jl		LClampLow3
    357 	cmpl	%edx,%ecx
    358 	ja		LClampHigh3
    359 LClampReentry3:
    360 
    361 	movl	%ebp,snext
    362 	movl	%ecx,tnext
    363 
    364 	subl	s,%ebp
    365 	subl	t,%ecx
    366 	
    367 //
    368 // set up advancetable
    369 //
    370 	movl	%ecx,%eax
    371 	movl	%ebp,%edx
    372 	sarl	$19,%edx			// sstep >>= 16;
    373 	movl	C(cachewidth),%ebx
    374 	sarl	$19,%eax			// tstep >>= 16;
    375 	jz		LIsZero
    376 	imull	%ebx,%eax			// (tstep >> 16) * cachewidth;
    377 LIsZero:
    378 	addl	%edx,%eax			// add in sstep
    379 								// (tstep >> 16) * cachewidth + (sstep >> 16);
    380 	movl	tfracf,%edx
    381 	movl	%eax,advancetable+4	// advance base in t
    382 	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
    383 								//  (sstep >> 16);
    384 	shll	$13,%ebp			// left-justify sstep fractional part
    385 	movl	%ebp,sstep
    386 	movl	sfracf,%ebx
    387 	shll	$13,%ecx			// left-justify tstep fractional part
    388 	movl	%eax,advancetable	// advance extra in t
    389 	movl	%ecx,tstep
    390 
    391 	movl	pz,%ecx
    392 	movl	izi,%ebp
    393 
    394 	cmpw	(%ecx),%bp
    395 	jl		Lp1
    396 	movb	(%esi),%al			// get first source texel
    397 	cmpb	$(TRANSPARENT_COLOR),%al
    398 	jz		Lp1
    399 	movw	%bp,(%ecx)
    400 	movb	%al,(%edi)			// store first dest pixel
    401 Lp1:
    402 	addl	izistep,%ebp
    403 	adcl	$0,%ebp
    404 	addl	tstep,%edx			// advance tfrac fractional part by tstep frac
    405 
    406 	sbbl	%eax,%eax			// turn tstep carry into -1 (0 if none)
    407 	addl	sstep,%ebx			// advance sfrac fractional part by sstep frac
    408 	adcl	advancetable+4(,%eax,4),%esi	// point to next source texel
    409 
    410 	cmpw	2(%ecx),%bp
    411 	jl		Lp2
    412 	movb	(%esi),%al
    413 	cmpb	$(TRANSPARENT_COLOR),%al
    414 	jz		Lp2
    415 	movw	%bp,2(%ecx)
    416 	movb	%al,1(%edi)
    417 Lp2:
    418 	addl	izistep,%ebp
    419 	adcl	$0,%ebp
    420 	addl	tstep,%edx
    421 	sbbl	%eax,%eax
    422 	addl	sstep,%ebx
    423 	adcl	advancetable+4(,%eax,4),%esi
    424 
    425 	cmpw	4(%ecx),%bp
    426 	jl		Lp3
    427 	movb	(%esi),%al
    428 	cmpb	$(TRANSPARENT_COLOR),%al
    429 	jz		Lp3
    430 	movw	%bp,4(%ecx)
    431 	movb	%al,2(%edi)
    432 Lp3:
    433 	addl	izistep,%ebp
    434 	adcl	$0,%ebp
    435 	addl	tstep,%edx
    436 	sbbl	%eax,%eax
    437 	addl	sstep,%ebx
    438 	adcl	advancetable+4(,%eax,4),%esi
    439 
    440 	cmpw	6(%ecx),%bp
    441 	jl		Lp4
    442 	movb	(%esi),%al
    443 	cmpb	$(TRANSPARENT_COLOR),%al
    444 	jz		Lp4
    445 	movw	%bp,6(%ecx)
    446 	movb	%al,3(%edi)
    447 Lp4:
    448 	addl	izistep,%ebp
    449 	adcl	$0,%ebp
    450 	addl	tstep,%edx
    451 	sbbl	%eax,%eax
    452 	addl	sstep,%ebx
    453 	adcl	advancetable+4(,%eax,4),%esi
    454 
    455 	cmpw	8(%ecx),%bp
    456 	jl		Lp5
    457 	movb	(%esi),%al
    458 	cmpb	$(TRANSPARENT_COLOR),%al
    459 	jz		Lp5
    460 	movw	%bp,8(%ecx)
    461 	movb	%al,4(%edi)
    462 Lp5:
    463 	addl	izistep,%ebp
    464 	adcl	$0,%ebp
    465 	addl	tstep,%edx
    466 	sbbl	%eax,%eax
    467 	addl	sstep,%ebx
    468 	adcl	advancetable+4(,%eax,4),%esi
    469 
    470 //
    471 // start FDIV for end of next segment in flight, so it can overlap
    472 //
    473 	popl	%eax
    474 	cmpl	$8,%eax			// more than one segment after this?
    475 	ja		LSetupNotLast2	// yes
    476 
    477 	decl	%eax
    478 	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
    479 	movl	%eax,spancountminus1
    480 	fildl	spancountminus1
    481 
    482 	flds	C(d_zistepu)		// _d_zistepu | spancountminus1
    483 	fmul	%st(1),%st(0)	// _d_zistepu*scm1 | scm1
    484 	flds	C(d_tdivzstepu)	// _d_tdivzstepu | _d_zistepu*scm1 | scm1
    485 	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
    486 	fxch	%st(1)			// _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
    487 	faddp	%st(0),%st(3)	// _d_tdivzstepu*scm1 | scm1
    488 	fxch	%st(1)			// scm1 | _d_tdivzstepu*scm1
    489 	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
    490 	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
    491 	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
    492 	flds	fp_64k			// 64k | _d_sdivzstepu*scm1
    493 	fxch	%st(1)			// _d_sdivzstepu*scm1 | 64k
    494 	faddp	%st(0),%st(4)	// 64k
    495 
    496 	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
    497 							//  overlap
    498 	jmp		LFDIVInFlight2
    499 
    500 	.align	4
    501 LSetupNotLast2:
    502 	fadds	zi8stepu
    503 	fxch	%st(2)
    504 	fadds	sdivz8stepu
    505 	fxch	%st(2)
    506 	flds	tdivz8stepu
    507 	faddp	%st(0),%st(2)
    508 	flds	fp_64k
    509 	fdiv	%st(1),%st(0)	// z = 1/1/z
    510 							// this is what we've gone to all this trouble to
    511 							//  overlap
    512 LFDIVInFlight2:
    513 	pushl	%eax
    514 
    515 	cmpw	10(%ecx),%bp
    516 	jl		Lp6
    517 	movb	(%esi),%al
    518 	cmpb	$(TRANSPARENT_COLOR),%al
    519 	jz		Lp6
    520 	movw	%bp,10(%ecx)
    521 	movb	%al,5(%edi)
    522 Lp6:
    523 	addl	izistep,%ebp
    524 	adcl	$0,%ebp
    525 	addl	tstep,%edx
    526 	sbbl	%eax,%eax
    527 	addl	sstep,%ebx
    528 	adcl	advancetable+4(,%eax,4),%esi
    529 
    530 	cmpw	12(%ecx),%bp
    531 	jl		Lp7
    532 	movb	(%esi),%al
    533 	cmpb	$(TRANSPARENT_COLOR),%al
    534 	jz		Lp7
    535 	movw	%bp,12(%ecx)
    536 	movb	%al,6(%edi)
    537 Lp7:
    538 	addl	izistep,%ebp
    539 	adcl	$0,%ebp
    540 	addl	tstep,%edx
    541 	sbbl	%eax,%eax
    542 	addl	sstep,%ebx
    543 	adcl	advancetable+4(,%eax,4),%esi
    544 
    545 	cmpw	14(%ecx),%bp
    546 	jl		Lp8
    547 	movb	(%esi),%al
    548 	cmpb	$(TRANSPARENT_COLOR),%al
    549 	jz		Lp8
    550 	movw	%bp,14(%ecx)
    551 	movb	%al,7(%edi)
    552 Lp8:
    553 	addl	izistep,%ebp
    554 	adcl	$0,%ebp
    555 	addl	tstep,%edx
    556 	sbbl	%eax,%eax
    557 	addl	sstep,%ebx
    558 	adcl	advancetable+4(,%eax,4),%esi
    559 
    560 	addl	$8,%edi
    561 	addl	$16,%ecx
    562 	movl	%edx,tfracf
    563 	movl	snext,%edx
    564 	movl	%ebx,sfracf
    565 	movl	tnext,%ebx
    566 	movl	%edx,s
    567 	movl	%ebx,t
    568 
    569 	movl	%ecx,pz
    570 	movl	%ebp,izi
    571 
    572 	popl	%ecx				// retrieve count
    573 
    574 //
    575 // determine whether last span or not
    576 //
    577 	cmpl	$8,%ecx				// are there multiple segments remaining?
    578 	ja		LNotLastSegment		// yes
    579 
    580 //
    581 // last segment of scan
    582 //
    583 LLastSegment:
    584 
    585 //
    586 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
    587 // get there. The number of pixels left is variable, and we want to land on the
    588 // last pixel, not step one past it, so we can't run into arithmetic problems
    589 //
    590 	testl	%ecx,%ecx
    591 	jz		LNoSteps		// just draw the last pixel and we're done
    592 
    593 // pick up after the FDIV that was left in flight previously
    594 
    595 
    596 	fld		%st(0)			// duplicate it
    597 	fmul	%st(4),%st(0)	// s = s/z * z
    598 	fxch	%st(1)
    599 	fmul	%st(3),%st(0)	// t = t/z * z
    600 	fxch	%st(1)
    601 	fistpl	snext
    602 	fistpl	tnext
    603 
    604 	movl	C(tadjust),%ebx
    605 	movl	C(sadjust),%eax
    606 
    607 	addl	snext,%eax
    608 	addl	tnext,%ebx
    609 
    610 	movl	C(bbextents),%ebp
    611 	movl	C(bbextentt),%edx
    612 
    613 	cmpl	$2048,%eax
    614 	jl		LClampLow4
    615 	cmpl	%ebp,%eax
    616 	ja		LClampHigh4
    617 LClampReentry4:
    618 	movl	%eax,snext
    619 
    620 	cmpl	$2048,%ebx
    621 	jl		LClampLow5
    622 	cmpl	%edx,%ebx
    623 	ja		LClampHigh5
    624 LClampReentry5:
    625 
    626 	cmpl	$1,%ecx			// don't bother 
    627 	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
    628 							//  of the segment length
    629 	subl	s,%eax
    630 	subl	t,%ebx
    631 
    632 	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
    633 	addl	%ebx,%ebx		//  reciprocal yields 16.48
    634 	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
    635 	movl	%edx,%ebp
    636 
    637 	movl	%ebx,%eax
    638 	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
    639 
    640 LSetEntryvec:
    641 //
    642 // set up advancetable
    643 //
    644 	movl	spr8entryvec_table(,%ecx,4),%ebx
    645 	movl	%edx,%eax
    646 	pushl	%ebx				// entry point into code for RET later
    647 	movl	%ebp,%ecx
    648 	sarl	$16,%ecx			// sstep >>= 16;
    649 	movl	C(cachewidth),%ebx
    650 	sarl	$16,%edx			// tstep >>= 16;
    651 	jz		LIsZeroLast
    652 	imull	%ebx,%edx			// (tstep >> 16) * cachewidth;
    653 LIsZeroLast:
    654 	addl	%ecx,%edx			// add in sstep
    655 								// (tstep >> 16) * cachewidth + (sstep >> 16);
    656 	movl	tfracf,%ecx
    657 	movl	%edx,advancetable+4	// advance base in t
    658 	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
    659 								//  (sstep >> 16);
    660 	shll	$16,%ebp			// left-justify sstep fractional part
    661 	movl	sfracf,%ebx
    662 	shll	$16,%eax			// left-justify tstep fractional part
    663 	movl	%edx,advancetable	// advance extra in t
    664 
    665 	movl	%eax,tstep
    666 	movl	%ebp,sstep
    667 	movl	%ecx,%edx
    668 
    669 	movl	pz,%ecx
    670 	movl	izi,%ebp
    671 
    672 	ret							// jump to the number-of-pixels handler
    673 
    674 //----------------------------------------
    675 
    676 LNoSteps:
    677 	movl	pz,%ecx
    678 	subl	$7,%edi			// adjust for hardwired offset
    679 	subl	$14,%ecx
    680 	jmp		LEndSpan
    681 
    682 
    683 LOnlyOneStep:
    684 	subl	s,%eax
    685 	subl	t,%ebx
    686 	movl	%eax,%ebp
    687 	movl	%ebx,%edx
    688 	jmp		LSetEntryvec
    689 
    690 //----------------------------------------
    691 
    692 .globl	Spr8Entry2_8
    693 Spr8Entry2_8:
    694 	subl	$6,%edi		// adjust for hardwired offsets
    695 	subl	$12,%ecx
    696 	movb	(%esi),%al
    697 	jmp		LLEntry2_8
    698 
    699 //----------------------------------------
    700 
    701 .globl	Spr8Entry3_8
    702 Spr8Entry3_8:
    703 	subl	$5,%edi		// adjust for hardwired offsets
    704 	subl	$10,%ecx
    705 	jmp		LLEntry3_8
    706 
    707 //----------------------------------------
    708 
    709 .globl	Spr8Entry4_8
    710 Spr8Entry4_8:
    711 	subl	$4,%edi		// adjust for hardwired offsets
    712 	subl	$8,%ecx
    713 	jmp		LLEntry4_8
    714 
    715 //----------------------------------------
    716 
    717 .globl	Spr8Entry5_8
    718 Spr8Entry5_8:
    719 	subl	$3,%edi		// adjust for hardwired offsets
    720 	subl	$6,%ecx
    721 	jmp		LLEntry5_8
    722 
    723 //----------------------------------------
    724 
    725 .globl	Spr8Entry6_8
    726 Spr8Entry6_8:
    727 	subl	$2,%edi		// adjust for hardwired offsets
    728 	subl	$4,%ecx
    729 	jmp		LLEntry6_8
    730 
    731 //----------------------------------------
    732 
    733 .globl	Spr8Entry7_8
    734 Spr8Entry7_8:
    735 	decl	%edi		// adjust for hardwired offsets
    736 	subl	$2,%ecx
    737 	jmp		LLEntry7_8
    738 
    739 //----------------------------------------
    740 
    741 .globl	Spr8Entry8_8
    742 Spr8Entry8_8:
    743 	cmpw	(%ecx),%bp
    744 	jl		Lp9
    745 	movb	(%esi),%al
    746 	cmpb	$(TRANSPARENT_COLOR),%al
    747 	jz		Lp9
    748 	movw	%bp,(%ecx)
    749 	movb	%al,(%edi)
    750 Lp9:
    751 	addl	izistep,%ebp
    752 	adcl	$0,%ebp
    753 	addl	tstep,%edx
    754 	sbbl	%eax,%eax
    755 	addl	sstep,%ebx
    756 	adcl	advancetable+4(,%eax,4),%esi
    757 LLEntry7_8:
    758 	cmpw	2(%ecx),%bp
    759 	jl		Lp10
    760 	movb	(%esi),%al
    761 	cmpb	$(TRANSPARENT_COLOR),%al
    762 	jz		Lp10
    763 	movw	%bp,2(%ecx)
    764 	movb	%al,1(%edi)
    765 Lp10:
    766 	addl	izistep,%ebp
    767 	adcl	$0,%ebp
    768 	addl	tstep,%edx
    769 	sbbl	%eax,%eax
    770 	addl	sstep,%ebx
    771 	adcl	advancetable+4(,%eax,4),%esi
    772 LLEntry6_8:
    773 	cmpw	4(%ecx),%bp
    774 	jl		Lp11
    775 	movb	(%esi),%al
    776 	cmpb	$(TRANSPARENT_COLOR),%al
    777 	jz		Lp11
    778 	movw	%bp,4(%ecx)
    779 	movb	%al,2(%edi)
    780 Lp11:
    781 	addl	izistep,%ebp
    782 	adcl	$0,%ebp
    783 	addl	tstep,%edx
    784 	sbbl	%eax,%eax
    785 	addl	sstep,%ebx
    786 	adcl	advancetable+4(,%eax,4),%esi
    787 LLEntry5_8:
    788 	cmpw	6(%ecx),%bp
    789 	jl		Lp12
    790 	movb	(%esi),%al
    791 	cmpb	$(TRANSPARENT_COLOR),%al
    792 	jz		Lp12
    793 	movw	%bp,6(%ecx)
    794 	movb	%al,3(%edi)
    795 Lp12:
    796 	addl	izistep,%ebp
    797 	adcl	$0,%ebp
    798 	addl	tstep,%edx
    799 	sbbl	%eax,%eax
    800 	addl	sstep,%ebx
    801 	adcl	advancetable+4(,%eax,4),%esi
    802 LLEntry4_8:
    803 	cmpw	8(%ecx),%bp
    804 	jl		Lp13
    805 	movb	(%esi),%al
    806 	cmpb	$(TRANSPARENT_COLOR),%al
    807 	jz		Lp13
    808 	movw	%bp,8(%ecx)
    809 	movb	%al,4(%edi)
    810 Lp13:
    811 	addl	izistep,%ebp
    812 	adcl	$0,%ebp
    813 	addl	tstep,%edx
    814 	sbbl	%eax,%eax
    815 	addl	sstep,%ebx
    816 	adcl	advancetable+4(,%eax,4),%esi
    817 LLEntry3_8:
    818 	cmpw	10(%ecx),%bp
    819 	jl		Lp14
    820 	movb	(%esi),%al
    821 	cmpb	$(TRANSPARENT_COLOR),%al
    822 	jz		Lp14
    823 	movw	%bp,10(%ecx)
    824 	movb	%al,5(%edi)
    825 Lp14:
    826 	addl	izistep,%ebp
    827 	adcl	$0,%ebp
    828 	addl	tstep,%edx
    829 	sbbl	%eax,%eax
    830 	addl	sstep,%ebx
    831 	adcl	advancetable+4(,%eax,4),%esi
    832 LLEntry2_8:
    833 	cmpw	12(%ecx),%bp
    834 	jl		Lp15
    835 	movb	(%esi),%al
    836 	cmpb	$(TRANSPARENT_COLOR),%al
    837 	jz		Lp15
    838 	movw	%bp,12(%ecx)
    839 	movb	%al,6(%edi)
    840 Lp15:
    841 	addl	izistep,%ebp
    842 	adcl	$0,%ebp
    843 	addl	tstep,%edx
    844 	sbbl	%eax,%eax
    845 	addl	sstep,%ebx
    846 	adcl	advancetable+4(,%eax,4),%esi
    847 
    848 LEndSpan:
    849 	cmpw	14(%ecx),%bp
    850 	jl		Lp16
    851 	movb	(%esi),%al		// load first texel in segment
    852 	cmpb	$(TRANSPARENT_COLOR),%al
    853 	jz		Lp16
    854 	movw	%bp,14(%ecx)
    855 	movb	%al,7(%edi)
    856 Lp16:
    857 
    858 //
    859 // clear s/z, t/z, 1/z from FP stack
    860 //
    861 	fstp %st(0)
    862 	fstp %st(0)
    863 	fstp %st(0)
    864 
    865 	popl	%ebx				// restore spans pointer
    866 LNextSpan:
    867 	addl	$(sspan_t_size),%ebx // point to next span
    868 	movl	sspan_t_count(%ebx),%ecx
    869 	cmpl	$0,%ecx				// any more spans?
    870 	jg		LSpanLoop			// yes
    871 	jz		LNextSpan			// yes, but this one's empty
    872 
    873 	popl	%ebx				// restore register variables
    874 	popl	%esi
    875 	popl	%edi
    876 	popl	%ebp				// restore the caller's stack frame
    877 	ret
    878 
    879 #endif	// id386