matha.s (11341B)
1 /* 2 =========================================================================== 3 Copyright (C) 1999-2005 Id Software, Inc. 4 5 This file is part of Quake III Arena source code. 6 7 Quake III Arena source code is free software; you can redistribute it 8 and/or modify it under the terms of the GNU General Public License as 9 published by the Free Software Foundation; either version 2 of the License, 10 or (at your option) any later version. 11 12 Quake III Arena source code is distributed in the hope that it will be 13 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with Foobar; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 =========================================================================== 21 */ 22 // 23 // math.s 24 // x86 assembly-language math routines. 25 26 #define GLQUAKE 1 // don't include unneeded defs 27 #include "qasm.h" 28 29 30 #if id386 31 32 .data 33 34 .align 4 35 Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3 36 .long Lcase4, Lcase5, Lcase6, Lcase7 37 38 .text 39 40 // TODO: rounding needed? 41 // stack parameter offset 42 #define val 4 43 44 .globl C(Invert24To16) 45 C(Invert24To16): 46 47 movl val(%esp),%ecx 48 movl $0x100,%edx // 0x10000000000 as dividend 49 cmpl %edx,%ecx 50 jle LOutOfRange 51 52 subl %eax,%eax 53 divl %ecx 54 55 ret 56 57 LOutOfRange: 58 movl $0xFFFFFFFF,%eax 59 ret 60 61 #if 0 62 63 #define in 4 64 #define out 8 65 66 .align 2 67 .globl C(TransformVector) 68 C(TransformVector): 69 movl in(%esp),%eax 70 movl out(%esp),%edx 71 72 flds (%eax) // in[0] 73 fmuls C(vright) // in[0]*vright[0] 74 flds (%eax) // in[0] | in[0]*vright[0] 75 fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0] 76 flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0] 77 fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0] 78 79 flds 4(%eax) // in[1] | ... 80 fmuls C(vright)+4 // in[1]*vright[1] | ... 81 flds 4(%eax) // in[1] | in[1]*vright[1] | ... 82 fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ... 83 flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ... 84 fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ... 85 fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ... 86 87 faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ... 88 faddp %st(0),%st(3) // in[1]*vpn[1] | ... 89 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 90 91 flds 8(%eax) // in[2] | ... 92 fmuls C(vright)+8 // in[2]*vright[2] | ... 93 flds 8(%eax) // in[2] | in[2]*vright[2] | ... 94 fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ... 95 flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ... 96 fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ... 97 fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ... 98 99 faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ... 100 faddp %st(0),%st(3) // in[2]*vpn[2] | ... 101 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 102 103 fstps 8(%edx) // out[2] 104 fstps 4(%edx) // out[1] 105 fstps (%edx) // out[0] 106 107 ret 108 109 #endif 110 111 #define EMINS 4+4 112 #define EMAXS 4+8 113 #define P 4+12 114 115 .align 2 116 .globl C(BoxOnPlaneSide) 117 C(BoxOnPlaneSide): 118 pushl %ebx 119 120 movl P(%esp),%edx 121 movl EMINS(%esp),%ecx 122 xorl %eax,%eax 123 movl EMAXS(%esp),%ebx 124 movb pl_signbits(%edx),%al 125 cmpb $8,%al 126 jge Lerror 127 flds pl_normal(%edx) // p->normal[0] 128 fld %st(0) // p->normal[0] | p->normal[0] 129 // bk000422 - warning: missing prefix `*' in absolute indirect address, maybe misassembled! 130 // bk001129 - fix from Andrew Henderson, was: Ljmptab(,%eax,4) 131 jmp *Ljmptab(,%eax,4) 132 133 134 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 135 //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 136 Lcase0: 137 fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0] 138 flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] | 139 // p->normal[0] 140 fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] | 141 // p->normal[1] 142 fmuls (%ecx) // p->normal[0]*emins[0] | 143 // p->normal[0]*emaxs[0] | p->normal[1] 144 fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] | 145 // p->normal[0]*emins[0] 146 fld %st(0) // p->normal[1] | p->normal[1] | 147 // p->normal[0]*emaxs[0] | 148 // p->normal[0]*emins[0] 149 fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] | 150 // p->normal[0]*emaxs[0] | 151 // p->normal[0]*emins[0] 152 flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] | 153 // p->normal[1] | p->normal[0]*emaxs[0] | 154 // p->normal[0]*emins[0] 155 fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] | 156 // p->normal[2] | p->normal[0]*emaxs[0] | 157 // p->normal[0]*emins[0] 158 fmuls 4(%ecx) // p->normal[1]*emins[1] | 159 // p->normal[1]*emaxs[1] | 160 // p->normal[2] | p->normal[0]*emaxs[0] | 161 // p->normal[0]*emins[0] 162 fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] | 163 // p->normal[1]*emins[1] | 164 // p->normal[0]*emaxs[0] | 165 // p->normal[0]*emins[0] 166 fld %st(0) // p->normal[2] | p->normal[2] | 167 // p->normal[1]*emaxs[1] | 168 // p->normal[1]*emins[1] | 169 // p->normal[0]*emaxs[0] | 170 // p->normal[0]*emins[0] 171 fmuls 8(%ebx) // p->normal[2]*emaxs[2] | 172 // p->normal[2] | 173 // p->normal[1]*emaxs[1] | 174 // p->normal[1]*emins[1] | 175 // p->normal[0]*emaxs[0] | 176 // p->normal[0]*emins[0] 177 fxch %st(5) // p->normal[0]*emins[0] | 178 // p->normal[2] | 179 // p->normal[1]*emaxs[1] | 180 // p->normal[1]*emins[1] | 181 // p->normal[0]*emaxs[0] | 182 // p->normal[2]*emaxs[2] 183 faddp %st(0),%st(3) //p->normal[2] | 184 // p->normal[1]*emaxs[1] | 185 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 186 // p->normal[0]*emaxs[0] | 187 // p->normal[2]*emaxs[2] 188 fmuls 8(%ecx) //p->normal[2]*emins[2] | 189 // p->normal[1]*emaxs[1] | 190 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 191 // p->normal[0]*emaxs[0] | 192 // p->normal[2]*emaxs[2] 193 fxch %st(1) //p->normal[1]*emaxs[1] | 194 // p->normal[2]*emins[2] | 195 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 196 // p->normal[0]*emaxs[0] | 197 // p->normal[2]*emaxs[2] 198 faddp %st(0),%st(3) //p->normal[2]*emins[2] | 199 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 200 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 201 // p->normal[2]*emaxs[2] 202 fxch %st(3) //p->normal[2]*emaxs[2] + 203 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 204 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 205 // p->normal[2]*emins[2] 206 faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]| 207 // dist1 | p->normal[2]*emins[2] 208 209 jmp LSetSides 210 211 //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 212 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 213 Lcase1: 214 fmuls (%ecx) // emins[0] 215 flds pl_normal+4(%edx) 216 fxch %st(2) 217 fmuls (%ebx) // emaxs[0] 218 fxch %st(2) 219 fld %st(0) 220 fmuls 4(%ebx) // emaxs[1] 221 flds pl_normal+8(%edx) 222 fxch %st(2) 223 fmuls 4(%ecx) // emins[1] 224 fxch %st(2) 225 fld %st(0) 226 fmuls 8(%ebx) // emaxs[2] 227 fxch %st(5) 228 faddp %st(0),%st(3) 229 fmuls 8(%ecx) // emins[2] 230 fxch %st(1) 231 faddp %st(0),%st(3) 232 fxch %st(3) 233 faddp %st(0),%st(2) 234 235 jmp LSetSides 236 237 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 238 //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 239 Lcase2: 240 fmuls (%ebx) // emaxs[0] 241 flds pl_normal+4(%edx) 242 fxch %st(2) 243 fmuls (%ecx) // emins[0] 244 fxch %st(2) 245 fld %st(0) 246 fmuls 4(%ecx) // emins[1] 247 flds pl_normal+8(%edx) 248 fxch %st(2) 249 fmuls 4(%ebx) // emaxs[1] 250 fxch %st(2) 251 fld %st(0) 252 fmuls 8(%ebx) // emaxs[2] 253 fxch %st(5) 254 faddp %st(0),%st(3) 255 fmuls 8(%ecx) // emins[2] 256 fxch %st(1) 257 faddp %st(0),%st(3) 258 fxch %st(3) 259 faddp %st(0),%st(2) 260 261 jmp LSetSides 262 263 //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 264 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 265 Lcase3: 266 fmuls (%ecx) // emins[0] 267 flds pl_normal+4(%edx) 268 fxch %st(2) 269 fmuls (%ebx) // emaxs[0] 270 fxch %st(2) 271 fld %st(0) 272 fmuls 4(%ecx) // emins[1] 273 flds pl_normal+8(%edx) 274 fxch %st(2) 275 fmuls 4(%ebx) // emaxs[1] 276 fxch %st(2) 277 fld %st(0) 278 fmuls 8(%ebx) // emaxs[2] 279 fxch %st(5) 280 faddp %st(0),%st(3) 281 fmuls 8(%ecx) // emins[2] 282 fxch %st(1) 283 faddp %st(0),%st(3) 284 fxch %st(3) 285 faddp %st(0),%st(2) 286 287 jmp LSetSides 288 289 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 290 //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 291 Lcase4: 292 fmuls (%ebx) // emaxs[0] 293 flds pl_normal+4(%edx) 294 fxch %st(2) 295 fmuls (%ecx) // emins[0] 296 fxch %st(2) 297 fld %st(0) 298 fmuls 4(%ebx) // emaxs[1] 299 flds pl_normal+8(%edx) 300 fxch %st(2) 301 fmuls 4(%ecx) // emins[1] 302 fxch %st(2) 303 fld %st(0) 304 fmuls 8(%ecx) // emins[2] 305 fxch %st(5) 306 faddp %st(0),%st(3) 307 fmuls 8(%ebx) // emaxs[2] 308 fxch %st(1) 309 faddp %st(0),%st(3) 310 fxch %st(3) 311 faddp %st(0),%st(2) 312 313 jmp LSetSides 314 315 //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 316 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 317 Lcase5: 318 fmuls (%ecx) // emins[0] 319 flds pl_normal+4(%edx) 320 fxch %st(2) 321 fmuls (%ebx) // emaxs[0] 322 fxch %st(2) 323 fld %st(0) 324 fmuls 4(%ebx) // emaxs[1] 325 flds pl_normal+8(%edx) 326 fxch %st(2) 327 fmuls 4(%ecx) // emins[1] 328 fxch %st(2) 329 fld %st(0) 330 fmuls 8(%ecx) // emins[2] 331 fxch %st(5) 332 faddp %st(0),%st(3) 333 fmuls 8(%ebx) // emaxs[2] 334 fxch %st(1) 335 faddp %st(0),%st(3) 336 fxch %st(3) 337 faddp %st(0),%st(2) 338 339 jmp LSetSides 340 341 //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 342 //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 343 Lcase6: 344 fmuls (%ebx) // emaxs[0] 345 flds pl_normal+4(%edx) 346 fxch %st(2) 347 fmuls (%ecx) // emins[0] 348 fxch %st(2) 349 fld %st(0) 350 fmuls 4(%ecx) // emins[1] 351 flds pl_normal+8(%edx) 352 fxch %st(2) 353 fmuls 4(%ebx) // emaxs[1] 354 fxch %st(2) 355 fld %st(0) 356 fmuls 8(%ecx) // emins[2] 357 fxch %st(5) 358 faddp %st(0),%st(3) 359 fmuls 8(%ebx) // emaxs[2] 360 fxch %st(1) 361 faddp %st(0),%st(3) 362 fxch %st(3) 363 faddp %st(0),%st(2) 364 365 jmp LSetSides 366 367 //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 368 //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 369 Lcase7: 370 fmuls (%ecx) // emins[0] 371 flds pl_normal+4(%edx) 372 fxch %st(2) 373 fmuls (%ebx) // emaxs[0] 374 fxch %st(2) 375 fld %st(0) 376 fmuls 4(%ecx) // emins[1] 377 flds pl_normal+8(%edx) 378 fxch %st(2) 379 fmuls 4(%ebx) // emaxs[1] 380 fxch %st(2) 381 fld %st(0) 382 fmuls 8(%ecx) // emins[2] 383 fxch %st(5) 384 faddp %st(0),%st(3) 385 fmuls 8(%ebx) // emaxs[2] 386 fxch %st(1) 387 faddp %st(0),%st(3) 388 fxch %st(3) 389 faddp %st(0),%st(2) 390 391 LSetSides: 392 393 // sides = 0; 394 // if (dist1 >= p->dist) 395 // sides = 1; 396 // if (dist2 < p->dist) 397 // sides |= 2; 398 399 faddp %st(0),%st(2) // dist1 | dist2 400 fcomps pl_dist(%edx) 401 xorl %ecx,%ecx 402 fnstsw %ax 403 fcomps pl_dist(%edx) 404 andb $1,%ah 405 xorb $1,%ah 406 addb %ah,%cl 407 408 fnstsw %ax 409 andb $1,%ah 410 addb %ah,%ah 411 addb %ah,%cl 412 413 // return sides; 414 415 popl %ebx 416 movl %ecx,%eax // return status 417 418 ret 419 420 421 Lerror: 422 movl 1, %eax 423 ret 424 425 #endif // id386