r_part.c (16587B)
1 /* 2 Copyright (C) 1997-2001 Id Software, Inc. 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19 */ 20 #include "r_local.h" 21 22 vec3_t r_pright, r_pup, r_ppn; 23 24 #define PARTICLE_33 0 25 #define PARTICLE_66 1 26 #define PARTICLE_OPAQUE 2 27 28 typedef struct 29 { 30 particle_t *particle; 31 int level; 32 int color; 33 } partparms_t; 34 35 static partparms_t partparms; 36 37 #if id386 && !defined __linux__ 38 39 static unsigned s_prefetch_address; 40 41 /* 42 ** BlendParticleXX 43 ** 44 ** Inputs: 45 ** EAX = color 46 ** EDI = pdest 47 ** 48 ** Scratch: 49 ** EBX = scratch (dstcolor) 50 ** EBP = scratch 51 ** 52 ** Outputs: 53 ** none 54 */ 55 __declspec(naked) void BlendParticle33( void ) 56 { 57 // return vid.alphamap[color + dstcolor*256]; 58 __asm mov ebp, vid.alphamap 59 __asm xor ebx, ebx 60 61 __asm mov bl, byte ptr [edi] 62 __asm shl ebx, 8 63 64 __asm add ebp, ebx 65 __asm add ebp, eax 66 67 __asm mov al, byte ptr [ebp] 68 69 __asm mov byte ptr [edi], al 70 71 __asm ret 72 } 73 74 __declspec(naked) void BlendParticle66( void ) 75 { 76 // return vid.alphamap[pcolor*256 + dstcolor]; 77 __asm mov ebp, vid.alphamap 78 __asm xor ebx, ebx 79 80 __asm shl eax, 8 81 __asm mov bl, byte ptr [edi] 82 83 __asm add ebp, ebx 84 __asm add ebp, eax 85 86 __asm mov al, byte ptr [ebp] 87 88 __asm mov byte ptr [edi], al 89 90 __asm ret 91 } 92 93 __declspec(naked) void BlendParticle100( void ) 94 { 95 __asm mov byte ptr [edi], al 96 __asm ret 97 } 98 99 /* 100 ** R_DrawParticle (asm version) 101 ** 102 ** Since we use __declspec( naked ) we don't have a stack frame 103 ** that we can use. Since I want to reserve EBP anyway, I tossed 104 ** all the important variables into statics. This routine isn't 105 ** meant to be re-entrant, so this shouldn't cause any problems 106 ** other than a slightly higher global memory footprint. 107 ** 108 */ 109 __declspec(naked) void R_DrawParticle( void ) 110 { 111 static vec3_t local, transformed; 112 static float zi; 113 static int u, v, tmp; 114 static short izi; 115 static int ebpsave; 116 117 static byte (*blendfunc)(void); 118 119 /* 120 ** must be memvars since x86 can't load constants 121 ** directly. I guess I could use fld1, but that 122 ** actually costs one more clock than fld [one]! 123 */ 124 static float particle_z_clip = PARTICLE_Z_CLIP; 125 static float one = 1.0F; 126 static float point_five = 0.5F; 127 static float eight_thousand_hex = 0x8000; 128 129 /* 130 ** save trashed variables 131 */ 132 __asm mov ebpsave, ebp 133 __asm push esi 134 __asm push edi 135 136 /* 137 ** transform the particle 138 */ 139 // VectorSubtract (pparticle->origin, r_origin, local); 140 __asm mov esi, partparms.particle 141 __asm fld dword ptr [esi+0] ; p_o.x 142 __asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x 143 __asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x 144 __asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x 145 __asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x 146 __asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x 147 __asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z 148 __asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z 149 __asm fstp dword ptr [local+4] ; p_o.z-r_o.z 150 __asm fstp dword ptr [local+8] ; (empty) 151 152 // transformed[0] = DotProduct(local, r_pright); 153 // transformed[1] = DotProduct(local, r_pup); 154 // transformed[2] = DotProduct(local, r_ppn); 155 __asm fld dword ptr [local+0] ; l.x 156 __asm fmul dword ptr [r_pright+0] ; l.x*pr.x 157 __asm fld dword ptr [local+4] ; l.y | l.x*pr.x 158 __asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x 159 __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x 160 __asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x 161 __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z 162 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z 163 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z 164 __asm fstp dword ptr [transformed+0] ; (empty) 165 166 __asm fld dword ptr [local+0] ; l.x 167 __asm fmul dword ptr [r_pup+0] ; l.x*pr.x 168 __asm fld dword ptr [local+4] ; l.y | l.x*pr.x 169 __asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x 170 __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x 171 __asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x 172 __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z 173 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z 174 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z 175 __asm fstp dword ptr [transformed+4] ; (empty) 176 177 __asm fld dword ptr [local+0] ; l.x 178 __asm fmul dword ptr [r_ppn+0] ; l.x*pr.x 179 __asm fld dword ptr [local+4] ; l.y | l.x*pr.x 180 __asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x 181 __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x 182 __asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x 183 __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z 184 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z 185 __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z 186 __asm fstp dword ptr [transformed+8] ; (empty) 187 188 /* 189 ** make sure that the transformed particle is not in front of 190 ** the particle Z clip plane. We can do the comparison in 191 ** integer space since we know the sign of one of the inputs 192 ** and can figure out the sign of the other easily enough. 193 */ 194 // if (transformed[2] < PARTICLE_Z_CLIP) 195 // return; 196 197 __asm mov eax, dword ptr [transformed+8] 198 __asm and eax, eax 199 __asm js end 200 __asm cmp eax, particle_z_clip 201 __asm jl end 202 203 /* 204 ** project the point by initiating the 1/z calc 205 */ 206 // zi = 1.0 / transformed[2]; 207 __asm fld one 208 __asm fdiv dword ptr [transformed+8] 209 210 /* 211 ** bind the blend function pointer to the appropriate blender 212 ** while we're dividing 213 */ 214 //if ( level == PARTICLE_33 ) 215 // blendparticle = BlendParticle33; 216 //else if ( level == PARTICLE_66 ) 217 // blendparticle = BlendParticle66; 218 //else 219 // blendparticle = BlendParticle100; 220 221 __asm cmp partparms.level, PARTICLE_66 222 __asm je blendfunc_66 223 __asm jl blendfunc_33 224 __asm lea ebx, BlendParticle100 225 __asm jmp done_selecting_blend_func 226 blendfunc_33: 227 __asm lea ebx, BlendParticle33 228 __asm jmp done_selecting_blend_func 229 blendfunc_66: 230 __asm lea ebx, BlendParticle66 231 done_selecting_blend_func: 232 __asm mov blendfunc, ebx 233 234 // prefetch the next particle 235 __asm mov ebp, s_prefetch_address 236 __asm mov ebp, [ebp] 237 238 // finish the above divide 239 __asm fstp zi 240 241 // u = (int)(xcenter + zi * transformed[0] + 0.5); 242 // v = (int)(ycenter - zi * transformed[1] + 0.5); 243 __asm fld zi ; zi 244 __asm fmul dword ptr [transformed+0] ; zi * transformed[0] 245 __asm fld zi ; zi | zi * transformed[0] 246 __asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0] 247 __asm fxch st(1) ; zi * transformed[0] | zi * transformed[1] 248 __asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1] 249 __asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0] 250 __asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0] 251 __asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] 252 __asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1] 253 __asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1] 254 __asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 255 __asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 256 __asm fxch st(1) ; u | v 257 __asm fistp dword ptr [u] ; v 258 __asm fistp dword ptr [v] ; (empty) 259 260 /* 261 ** clip out the particle 262 */ 263 264 // if ((v > d_vrectbottom_particle) || 265 // (u > d_vrectright_particle) || 266 // (v < d_vrecty) || 267 // (u < d_vrectx)) 268 // { 269 // return; 270 // } 271 272 __asm mov ebx, u 273 __asm mov ecx, v 274 __asm cmp ecx, d_vrectbottom_particle 275 __asm jg end 276 __asm cmp ecx, d_vrecty 277 __asm jl end 278 __asm cmp ebx, d_vrectright_particle 279 __asm jg end 280 __asm cmp ebx, d_vrectx 281 __asm jl end 282 283 /* 284 ** compute addresses of zbuffer, framebuffer, and 285 ** compute the Z-buffer reference value. 286 ** 287 ** EBX = U 288 ** ECX = V 289 ** 290 ** Outputs: 291 ** ESI = Z-buffer address 292 ** EDI = framebuffer address 293 */ 294 // ESI = d_pzbuffer + (d_zwidth * v) + u; 295 __asm mov esi, d_pzbuffer ; esi = d_pzbuffer 296 __asm mov eax, d_zwidth ; eax = d_zwidth 297 __asm mul ecx ; eax = d_zwidth*v 298 __asm add eax, ebx ; eax = d_zwidth*v+u 299 __asm shl eax, 1 ; eax = 2*(d_zwidth*v+u) 300 __asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u ) 301 302 // initiate 303 // izi = (int)(zi * 0x8000); 304 __asm fld zi 305 __asm fmul eight_thousand_hex 306 307 // EDI = pdest = d_viewbuffer + d_scantable[v] + u; 308 __asm lea edi, [d_scantable+ecx*4] 309 __asm mov edi, [edi] 310 __asm add edi, d_viewbuffer 311 __asm add edi, ebx 312 313 // complete 314 // izi = (int)(zi * 0x8000); 315 __asm fistp tmp 316 __asm mov eax, tmp 317 __asm mov izi, ax 318 319 /* 320 ** determine the screen area covered by the particle, 321 ** which also means clamping to a min and max 322 */ 323 // pix = izi >> d_pix_shift; 324 __asm xor edx, edx 325 __asm mov dx, izi 326 __asm mov ecx, d_pix_shift 327 __asm shr dx, cl 328 329 // if (pix < d_pix_min) 330 // pix = d_pix_min; 331 __asm cmp edx, d_pix_min 332 __asm jge check_pix_max 333 __asm mov edx, d_pix_min 334 __asm jmp skip_pix_clamp 335 336 // else if (pix > d_pix_max) 337 // pix = d_pix_max; 338 check_pix_max: 339 __asm cmp edx, d_pix_max 340 __asm jle skip_pix_clamp 341 __asm mov edx, d_pix_max 342 343 skip_pix_clamp: 344 345 /* 346 ** render the appropriate pixels 347 ** 348 ** ECX = count (used for inner loop) 349 ** EDX = count (used for outer loop) 350 ** ESI = zbuffer 351 ** EDI = framebuffer 352 */ 353 __asm mov ecx, edx 354 355 __asm cmp ecx, 1 356 __asm ja over 357 358 over: 359 360 /* 361 ** at this point: 362 ** 363 ** ECX = count 364 */ 365 __asm push ecx 366 __asm push edi 367 __asm push esi 368 369 top_of_pix_vert_loop: 370 371 top_of_pix_horiz_loop: 372 373 // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) 374 // { 375 // for (i=0 ; i<pix ; i++) 376 // { 377 // if (pz[i] <= izi) 378 // { 379 // pdest[i] = blendparticle( color, pdest[i] ); 380 // } 381 // } 382 // } 383 __asm xor eax, eax 384 385 __asm mov ax, word ptr [esi] 386 387 __asm cmp ax, izi 388 __asm jg end_of_horiz_loop 389 390 #if ENABLE_ZWRITES_FOR_PARTICLES 391 __asm mov bp, izi 392 __asm mov word ptr [esi], bp 393 #endif 394 395 __asm mov eax, partparms.color 396 397 __asm call [blendfunc] 398 399 __asm add edi, 1 400 __asm add esi, 2 401 402 end_of_horiz_loop: 403 404 __asm dec ecx 405 __asm jnz top_of_pix_horiz_loop 406 407 __asm pop esi 408 __asm pop edi 409 410 __asm mov ebp, d_zwidth 411 __asm shl ebp, 1 412 413 __asm add esi, ebp 414 __asm add edi, [r_screenwidth] 415 416 __asm pop ecx 417 __asm push ecx 418 419 __asm push edi 420 __asm push esi 421 422 __asm dec edx 423 __asm jnz top_of_pix_vert_loop 424 425 __asm pop ecx 426 __asm pop ecx 427 __asm pop ecx 428 429 end: 430 __asm pop edi 431 __asm pop esi 432 __asm mov ebp, ebpsave 433 __asm ret 434 } 435 436 #else 437 438 static byte BlendParticle33( int pcolor, int dstcolor ) 439 { 440 return vid.alphamap[pcolor + dstcolor*256]; 441 } 442 443 static byte BlendParticle66( int pcolor, int dstcolor ) 444 { 445 return vid.alphamap[pcolor*256+dstcolor]; 446 } 447 448 static byte BlendParticle100( int pcolor, int dstcolor ) 449 { 450 dstcolor = dstcolor; 451 return pcolor; 452 } 453 454 /* 455 ** R_DrawParticle 456 ** 457 ** Yes, this is amazingly slow, but it's the C reference 458 ** implementation and should be both robust and vaguely 459 ** understandable. The only time this path should be 460 ** executed is if we're debugging on x86 or if we're 461 ** recompiling and deploying on a non-x86 platform. 462 ** 463 ** To minimize error and improve readability I went the 464 ** function pointer route. This exacts some overhead, but 465 ** it pays off in clean and easy to understand code. 466 */ 467 void R_DrawParticle( void ) 468 { 469 particle_t *pparticle = partparms.particle; 470 int level = partparms.level; 471 vec3_t local, transformed; 472 float zi; 473 byte *pdest; 474 short *pz; 475 int color = pparticle->color; 476 int i, izi, pix, count, u, v; 477 byte (*blendparticle)( int, int ); 478 479 /* 480 ** transform the particle 481 */ 482 VectorSubtract (pparticle->origin, r_origin, local); 483 484 transformed[0] = DotProduct(local, r_pright); 485 transformed[1] = DotProduct(local, r_pup); 486 transformed[2] = DotProduct(local, r_ppn); 487 488 if (transformed[2] < PARTICLE_Z_CLIP) 489 return; 490 491 /* 492 ** bind the blend function pointer to the appropriate blender 493 */ 494 if ( level == PARTICLE_33 ) 495 blendparticle = BlendParticle33; 496 else if ( level == PARTICLE_66 ) 497 blendparticle = BlendParticle66; 498 else 499 blendparticle = BlendParticle100; 500 501 /* 502 ** project the point 503 */ 504 // FIXME: preadjust xcenter and ycenter 505 zi = 1.0 / transformed[2]; 506 u = (int)(xcenter + zi * transformed[0] + 0.5); 507 v = (int)(ycenter - zi * transformed[1] + 0.5); 508 509 if ((v > d_vrectbottom_particle) || 510 (u > d_vrectright_particle) || 511 (v < d_vrecty) || 512 (u < d_vrectx)) 513 { 514 return; 515 } 516 517 /* 518 ** compute addresses of zbuffer, framebuffer, and 519 ** compute the Z-buffer reference value. 520 */ 521 pz = d_pzbuffer + (d_zwidth * v) + u; 522 pdest = d_viewbuffer + d_scantable[v] + u; 523 izi = (int)(zi * 0x8000); 524 525 /* 526 ** determine the screen area covered by the particle, 527 ** which also means clamping to a min and max 528 */ 529 pix = izi >> d_pix_shift; 530 if (pix < d_pix_min) 531 pix = d_pix_min; 532 else if (pix > d_pix_max) 533 pix = d_pix_max; 534 535 /* 536 ** render the appropriate pixels 537 */ 538 count = pix; 539 540 switch (level) { 541 case PARTICLE_33 : 542 for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) 543 { 544 //FIXME--do it in blocks of 8? 545 for (i=0 ; i<pix ; i++) 546 { 547 if (pz[i] <= izi) 548 { 549 pz[i] = izi; 550 pdest[i] = vid.alphamap[color + ((int)pdest[i]<<8)]; 551 } 552 } 553 } 554 break; 555 556 case PARTICLE_66 : 557 for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) 558 { 559 for (i=0 ; i<pix ; i++) 560 { 561 if (pz[i] <= izi) 562 { 563 pz[i] = izi; 564 pdest[i] = vid.alphamap[(color<<8) + (int)pdest[i]]; 565 } 566 } 567 } 568 break; 569 570 default: //100 571 for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) 572 { 573 for (i=0 ; i<pix ; i++) 574 { 575 if (pz[i] <= izi) 576 { 577 pz[i] = izi; 578 pdest[i] = color; 579 } 580 } 581 } 582 break; 583 } 584 } 585 586 #endif // !id386 587 588 /* 589 ** R_DrawParticles 590 ** 591 ** Responsible for drawing all of the particles in the particle list 592 ** throughout the world. Doesn't care if we're using the C path or 593 ** if we're using the asm path, it simply assigns a function pointer 594 ** and goes. 595 */ 596 void R_DrawParticles (void) 597 { 598 particle_t *p; 599 int i; 600 extern unsigned long fpu_sp24_cw, fpu_chop_cw; 601 602 VectorScale( vright, xscaleshrink, r_pright ); 603 VectorScale( vup, yscaleshrink, r_pup ); 604 VectorCopy( vpn, r_ppn ); 605 606 #if id386 && !defined __linux__ 607 __asm fldcw word ptr [fpu_sp24_cw] 608 #endif 609 610 for (p=r_newrefdef.particles, i=0 ; i<r_newrefdef.num_particles ; i++,p++) 611 { 612 613 if ( p->alpha > 0.66 ) 614 partparms.level = PARTICLE_OPAQUE; 615 else if ( p->alpha > 0.33 ) 616 partparms.level = PARTICLE_66; 617 else 618 partparms.level = PARTICLE_33; 619 620 partparms.particle = p; 621 partparms.color = p->color; 622 623 #if id386 && !defined __linux__ 624 if ( i < r_newrefdef.num_particles-1 ) 625 s_prefetch_address = ( unsigned int ) ( p + 1 ); 626 else 627 s_prefetch_address = ( unsigned int ) r_newrefdef.particles; 628 #endif 629 630 R_DrawParticle(); 631 } 632 633 #if id386 && !defined __linux__ 634 __asm fldcw word ptr [fpu_chop_cw] 635 #endif 636 637 } 638