Quake-III-Arena

Quake III Arena GPL Source Release
Log | Files | Refs

matha.s (11341B)


      1 /*
      2 ===========================================================================
      3 Copyright (C) 1999-2005 Id Software, Inc.
      4 
      5 This file is part of Quake III Arena source code.
      6 
      7 Quake III Arena source code is free software; you can redistribute it
      8 and/or modify it under the terms of the GNU General Public License as
      9 published by the Free Software Foundation; either version 2 of the License,
     10 or (at your option) any later version.
     11 
     12 Quake III Arena source code is distributed in the hope that it will be
     13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 GNU General Public License for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with Foobar; if not, write to the Free Software
     19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     20 ===========================================================================
     21 */
     22 //
     23 // math.s
     24 // x86 assembly-language math routines.
     25 
     26 #define GLQUAKE	1	// don't include unneeded defs
     27 #include "qasm.h"
     28 
     29 
     30 #if	id386
     31 
     32 	.data
     33 
     34 	.align	4
     35 Ljmptab:	.long	Lcase0, Lcase1, Lcase2, Lcase3
     36 			.long	Lcase4, Lcase5, Lcase6, Lcase7
     37 
     38 	.text
     39 
     40 // TODO: rounding needed?
     41 // stack parameter offset
     42 #define	val	4
     43 
     44 .globl C(Invert24To16)
     45 C(Invert24To16):
     46 
     47 	movl	val(%esp),%ecx
     48 	movl	$0x100,%edx		// 0x10000000000 as dividend
     49 	cmpl	%edx,%ecx
     50 	jle		LOutOfRange
     51 
     52 	subl	%eax,%eax
     53 	divl	%ecx
     54 
     55 	ret
     56 
     57 LOutOfRange:
     58 	movl	$0xFFFFFFFF,%eax
     59 	ret
     60 
     61 #if 0
     62 
     63 #define	in	4
     64 #define out	8
     65 
     66 	.align 2
     67 .globl C(TransformVector)
     68 C(TransformVector):
     69 	movl	in(%esp),%eax
     70 	movl	out(%esp),%edx
     71 
     72 	flds	(%eax)		// in[0]
     73 	fmuls	C(vright)		// in[0]*vright[0]
     74 	flds	(%eax)		// in[0] | in[0]*vright[0]
     75 	fmuls	C(vup)		// in[0]*vup[0] | in[0]*vright[0]
     76 	flds	(%eax)		// in[0] | in[0]*vup[0] | in[0]*vright[0]
     77 	fmuls	C(vpn)		// in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
     78 
     79 	flds	4(%eax)		// in[1] | ...
     80 	fmuls	C(vright)+4	// in[1]*vright[1] | ...
     81 	flds	4(%eax)		// in[1] | in[1]*vright[1] | ...
     82 	fmuls	C(vup)+4		// in[1]*vup[1] | in[1]*vright[1] | ...
     83 	flds	4(%eax)		// in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
     84 	fmuls	C(vpn)+4		// in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
     85 	fxch	%st(2)		// in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
     86 
     87 	faddp	%st(0),%st(5)	// in[1]*vup[1] | in[1]*vpn[1] | ...
     88 	faddp	%st(0),%st(3)	// in[1]*vpn[1] | ...
     89 	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
     90 
     91 	flds	8(%eax)		// in[2] | ...
     92 	fmuls	C(vright)+8	// in[2]*vright[2] | ...
     93 	flds	8(%eax)		// in[2] | in[2]*vright[2] | ...
     94 	fmuls	C(vup)+8		// in[2]*vup[2] | in[2]*vright[2] | ...
     95 	flds	8(%eax)		// in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
     96 	fmuls	C(vpn)+8		// in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
     97 	fxch	%st(2)		// in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
     98 
     99 	faddp	%st(0),%st(5)	// in[2]*vup[2] | in[2]*vpn[2] | ...
    100 	faddp	%st(0),%st(3)	// in[2]*vpn[2] | ...
    101 	faddp	%st(0),%st(1)	// vpn_accum | vup_accum | vright_accum
    102 
    103 	fstps	8(%edx)		// out[2]
    104 	fstps	4(%edx)		// out[1]
    105 	fstps	(%edx)		// out[0]
    106 
    107 	ret
    108 
    109 #endif
    110 
    111 #define EMINS	4+4
    112 #define EMAXS	4+8
    113 #define P		4+12
    114 
    115 	.align 2
    116 .globl C(BoxOnPlaneSide)
    117 C(BoxOnPlaneSide):
    118 	pushl	%ebx
    119 
    120 	movl	P(%esp),%edx
    121 	movl	EMINS(%esp),%ecx
    122 	xorl	%eax,%eax
    123 	movl	EMAXS(%esp),%ebx
    124 	movb	pl_signbits(%edx),%al
    125 	cmpb	$8,%al
    126 	jge		Lerror
    127 	flds	pl_normal(%edx)		// p->normal[0]
    128 	fld		%st(0)				// p->normal[0] | p->normal[0]
    129 	// bk000422 - warning: missing prefix `*' in absolute indirect address, maybe misassembled!
    130 	// bk001129 - fix from Andrew Henderson, was: Ljmptab(,%eax,4) 
    131 	jmp		*Ljmptab(,%eax,4)
    132 
    133 
    134 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
    135 //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
    136 Lcase0:
    137 	fmuls	(%ebx)				// p->normal[0]*emaxs[0] | p->normal[0]
    138 	flds	pl_normal+4(%edx)	// p->normal[1] | p->normal[0]*emaxs[0] |
    139 								//  p->normal[0]
    140 	fxch	%st(2)				// p->normal[0] | p->normal[0]*emaxs[0] |
    141 								//  p->normal[1]
    142 	fmuls	(%ecx)				// p->normal[0]*emins[0] |
    143 								//  p->normal[0]*emaxs[0] | p->normal[1]
    144 	fxch	%st(2)				// p->normal[1] | p->normal[0]*emaxs[0] |
    145 								//  p->normal[0]*emins[0]
    146 	fld		%st(0)				// p->normal[1] | p->normal[1] |
    147 								//  p->normal[0]*emaxs[0] |
    148 								//  p->normal[0]*emins[0]
    149 	fmuls	4(%ebx)				// p->normal[1]*emaxs[1] | p->normal[1] |
    150 								//  p->normal[0]*emaxs[0] |
    151 								//  p->normal[0]*emins[0]
    152 	flds	pl_normal+8(%edx)	// p->normal[2] | p->normal[1]*emaxs[1] |
    153 								//  p->normal[1] | p->normal[0]*emaxs[0] |
    154 								//  p->normal[0]*emins[0]
    155 	fxch	%st(2)				// p->normal[1] | p->normal[1]*emaxs[1] |
    156 								//  p->normal[2] | p->normal[0]*emaxs[0] |
    157 								//  p->normal[0]*emins[0]
    158 	fmuls	4(%ecx)				// p->normal[1]*emins[1] |
    159 								//  p->normal[1]*emaxs[1] |
    160 								//  p->normal[2] | p->normal[0]*emaxs[0] |
    161 								//  p->normal[0]*emins[0]
    162 	fxch	%st(2)				// p->normal[2] | p->normal[1]*emaxs[1] |
    163 								//  p->normal[1]*emins[1] |
    164 								//  p->normal[0]*emaxs[0] |
    165 								//  p->normal[0]*emins[0]
    166 	fld		%st(0)				// p->normal[2] | p->normal[2] |
    167 								//  p->normal[1]*emaxs[1] |
    168 								//  p->normal[1]*emins[1] |
    169 								//  p->normal[0]*emaxs[0] |
    170 								//  p->normal[0]*emins[0]
    171 	fmuls	8(%ebx)				// p->normal[2]*emaxs[2] |
    172 								//  p->normal[2] |
    173 								//  p->normal[1]*emaxs[1] |
    174 								//  p->normal[1]*emins[1] |
    175 								//  p->normal[0]*emaxs[0] |
    176 								//  p->normal[0]*emins[0]
    177 	fxch	%st(5)				// p->normal[0]*emins[0] |
    178 								//  p->normal[2] |
    179 								//  p->normal[1]*emaxs[1] |
    180 								//  p->normal[1]*emins[1] |
    181 								//  p->normal[0]*emaxs[0] |
    182 								//  p->normal[2]*emaxs[2]
    183 	faddp	%st(0),%st(3)		//p->normal[2] |
    184 								// p->normal[1]*emaxs[1] |
    185 								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    186 								// p->normal[0]*emaxs[0] |
    187 								// p->normal[2]*emaxs[2]
    188 	fmuls	8(%ecx)				//p->normal[2]*emins[2] |
    189 								// p->normal[1]*emaxs[1] |
    190 								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    191 								// p->normal[0]*emaxs[0] |
    192 								// p->normal[2]*emaxs[2]
    193 	fxch	%st(1)				//p->normal[1]*emaxs[1] |
    194 								// p->normal[2]*emins[2] |
    195 								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    196 								// p->normal[0]*emaxs[0] |
    197 								// p->normal[2]*emaxs[2]
    198 	faddp	%st(0),%st(3)		//p->normal[2]*emins[2] |
    199 								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    200 								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
    201 								// p->normal[2]*emaxs[2]
    202 	fxch	%st(3)				//p->normal[2]*emaxs[2] +
    203 								// p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    204 								// p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
    205 								// p->normal[2]*emins[2]
    206 	faddp	%st(0),%st(2)		//p->normal[1]*emins[1]+p->normal[0]*emins[0]|
    207 								// dist1 | p->normal[2]*emins[2]
    208 
    209 	jmp		LSetSides
    210 
    211 //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
    212 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
    213 Lcase1:
    214 	fmuls	(%ecx)				// emins[0]
    215 	flds	pl_normal+4(%edx)
    216 	fxch	%st(2)
    217 	fmuls	(%ebx)				// emaxs[0]
    218 	fxch	%st(2)
    219 	fld		%st(0)
    220 	fmuls	4(%ebx)				// emaxs[1]
    221 	flds	pl_normal+8(%edx)
    222 	fxch	%st(2)
    223 	fmuls	4(%ecx)				// emins[1]
    224 	fxch	%st(2)
    225 	fld		%st(0)
    226 	fmuls	8(%ebx)				// emaxs[2]
    227 	fxch	%st(5)
    228 	faddp	%st(0),%st(3)
    229 	fmuls	8(%ecx)				// emins[2]
    230 	fxch	%st(1)
    231 	faddp	%st(0),%st(3)
    232 	fxch	%st(3)
    233 	faddp	%st(0),%st(2)
    234 
    235 	jmp		LSetSides
    236 
    237 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
    238 //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
    239 Lcase2:
    240 	fmuls	(%ebx)				// emaxs[0]
    241 	flds	pl_normal+4(%edx)
    242 	fxch	%st(2)
    243 	fmuls	(%ecx)				// emins[0]
    244 	fxch	%st(2)
    245 	fld		%st(0)
    246 	fmuls	4(%ecx)				// emins[1]
    247 	flds	pl_normal+8(%edx)
    248 	fxch	%st(2)
    249 	fmuls	4(%ebx)				// emaxs[1]
    250 	fxch	%st(2)
    251 	fld		%st(0)
    252 	fmuls	8(%ebx)				// emaxs[2]
    253 	fxch	%st(5)
    254 	faddp	%st(0),%st(3)
    255 	fmuls	8(%ecx)				// emins[2]
    256 	fxch	%st(1)
    257 	faddp	%st(0),%st(3)
    258 	fxch	%st(3)
    259 	faddp	%st(0),%st(2)
    260 
    261 	jmp		LSetSides
    262 
    263 //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
    264 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
    265 Lcase3:
    266 	fmuls	(%ecx)				// emins[0]
    267 	flds	pl_normal+4(%edx)
    268 	fxch	%st(2)
    269 	fmuls	(%ebx)				// emaxs[0]
    270 	fxch	%st(2)
    271 	fld		%st(0)
    272 	fmuls	4(%ecx)				// emins[1]
    273 	flds	pl_normal+8(%edx)
    274 	fxch	%st(2)
    275 	fmuls	4(%ebx)				// emaxs[1]
    276 	fxch	%st(2)
    277 	fld		%st(0)
    278 	fmuls	8(%ebx)				// emaxs[2]
    279 	fxch	%st(5)
    280 	faddp	%st(0),%st(3)
    281 	fmuls	8(%ecx)				// emins[2]
    282 	fxch	%st(1)
    283 	faddp	%st(0),%st(3)
    284 	fxch	%st(3)
    285 	faddp	%st(0),%st(2)
    286 
    287 	jmp		LSetSides
    288 
    289 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
    290 //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
    291 Lcase4:
    292 	fmuls	(%ebx)				// emaxs[0]
    293 	flds	pl_normal+4(%edx)
    294 	fxch	%st(2)
    295 	fmuls	(%ecx)				// emins[0]
    296 	fxch	%st(2)
    297 	fld		%st(0)
    298 	fmuls	4(%ebx)				// emaxs[1]
    299 	flds	pl_normal+8(%edx)
    300 	fxch	%st(2)
    301 	fmuls	4(%ecx)				// emins[1]
    302 	fxch	%st(2)
    303 	fld		%st(0)
    304 	fmuls	8(%ecx)				// emins[2]
    305 	fxch	%st(5)
    306 	faddp	%st(0),%st(3)
    307 	fmuls	8(%ebx)				// emaxs[2]
    308 	fxch	%st(1)
    309 	faddp	%st(0),%st(3)
    310 	fxch	%st(3)
    311 	faddp	%st(0),%st(2)
    312 
    313 	jmp		LSetSides
    314 
    315 //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
    316 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
    317 Lcase5:
    318 	fmuls	(%ecx)				// emins[0]
    319 	flds	pl_normal+4(%edx)
    320 	fxch	%st(2)
    321 	fmuls	(%ebx)				// emaxs[0]
    322 	fxch	%st(2)
    323 	fld		%st(0)
    324 	fmuls	4(%ebx)				// emaxs[1]
    325 	flds	pl_normal+8(%edx)
    326 	fxch	%st(2)
    327 	fmuls	4(%ecx)				// emins[1]
    328 	fxch	%st(2)
    329 	fld		%st(0)
    330 	fmuls	8(%ecx)				// emins[2]
    331 	fxch	%st(5)
    332 	faddp	%st(0),%st(3)
    333 	fmuls	8(%ebx)				// emaxs[2]
    334 	fxch	%st(1)
    335 	faddp	%st(0),%st(3)
    336 	fxch	%st(3)
    337 	faddp	%st(0),%st(2)
    338 
    339 	jmp		LSetSides
    340 
    341 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
    342 //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
    343 Lcase6:
    344 	fmuls	(%ebx)				// emaxs[0]
    345 	flds	pl_normal+4(%edx)
    346 	fxch	%st(2)
    347 	fmuls	(%ecx)				// emins[0]
    348 	fxch	%st(2)
    349 	fld		%st(0)
    350 	fmuls	4(%ecx)				// emins[1]
    351 	flds	pl_normal+8(%edx)
    352 	fxch	%st(2)
    353 	fmuls	4(%ebx)				// emaxs[1]
    354 	fxch	%st(2)
    355 	fld		%st(0)
    356 	fmuls	8(%ecx)				// emins[2]
    357 	fxch	%st(5)
    358 	faddp	%st(0),%st(3)
    359 	fmuls	8(%ebx)				// emaxs[2]
    360 	fxch	%st(1)
    361 	faddp	%st(0),%st(3)
    362 	fxch	%st(3)
    363 	faddp	%st(0),%st(2)
    364 
    365 	jmp		LSetSides
    366 
    367 //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
    368 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
    369 Lcase7:
    370 	fmuls	(%ecx)				// emins[0]
    371 	flds	pl_normal+4(%edx)
    372 	fxch	%st(2)
    373 	fmuls	(%ebx)				// emaxs[0]
    374 	fxch	%st(2)
    375 	fld		%st(0)
    376 	fmuls	4(%ecx)				// emins[1]
    377 	flds	pl_normal+8(%edx)
    378 	fxch	%st(2)
    379 	fmuls	4(%ebx)				// emaxs[1]
    380 	fxch	%st(2)
    381 	fld		%st(0)
    382 	fmuls	8(%ecx)				// emins[2]
    383 	fxch	%st(5)
    384 	faddp	%st(0),%st(3)
    385 	fmuls	8(%ebx)				// emaxs[2]
    386 	fxch	%st(1)
    387 	faddp	%st(0),%st(3)
    388 	fxch	%st(3)
    389 	faddp	%st(0),%st(2)
    390 
    391 LSetSides:
    392 
    393 //	sides = 0;
    394 //	if (dist1 >= p->dist)
    395 //		sides = 1;
    396 //	if (dist2 < p->dist)
    397 //		sides |= 2;
    398 
    399 	faddp	%st(0),%st(2)		// dist1 | dist2
    400 	fcomps	pl_dist(%edx)
    401 	xorl	%ecx,%ecx
    402 	fnstsw	%ax
    403 	fcomps	pl_dist(%edx)
    404 	andb	$1,%ah
    405 	xorb	$1,%ah
    406 	addb	%ah,%cl
    407 
    408 	fnstsw	%ax
    409 	andb	$1,%ah
    410 	addb	%ah,%ah
    411 	addb	%ah,%cl
    412 
    413 //	return sides;
    414 
    415 	popl	%ebx
    416 	movl	%ecx,%eax	// return status
    417 
    418 	ret
    419 
    420 
    421 Lerror:
    422 	movl	1, %eax
    423 	ret
    424 
    425 #endif	// id386