ModelOverlay.cpp (25473B)
1 /* 2 =========================================================================== 3 4 Doom 3 BFG Edition GPL Source Code 5 Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. 6 7 This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code"). 8 9 Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation, either version 3 of the License, or 12 (at your option) any later version. 13 14 Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>. 21 22 In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below. 23 24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA. 25 26 =========================================================================== 27 */ 28 29 #pragma hdrstop 30 #include "../idlib/precompiled.h" 31 32 #include "tr_local.h" 33 #include "Model_local.h" 34 35 #include "../idlib/geometry/DrawVert_intrinsics.h" 36 37 /* 38 ==================== 39 idRenderModelOverlay::idRenderModelOverlay 40 ==================== 41 */ 42 idRenderModelOverlay::idRenderModelOverlay() : 43 firstOverlay( 0 ), 44 nextOverlay( 0 ), 45 firstDeferredOverlay( 0 ), 46 nextDeferredOverlay( 0 ), 47 numOverlayMaterials( 0 ) { 48 memset( overlays, 0, sizeof( overlays ) ); 49 } 50 51 /* 52 ==================== 53 idRenderModelOverlay::~idRenderModelOverlay 54 ==================== 55 */ 56 idRenderModelOverlay::~idRenderModelOverlay() { 57 for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) { 58 FreeOverlay( overlays[i] ); 59 } 60 } 61 62 /* 63 ================= 64 idRenderModelOverlay::ReUse 65 ================= 66 */ 67 void idRenderModelOverlay::ReUse() { 68 firstOverlay = 0; 69 nextOverlay = 0; 70 firstDeferredOverlay = 0; 71 nextDeferredOverlay = 0; 72 numOverlayMaterials = 0; 73 74 for ( unsigned int i = 0; i < MAX_OVERLAYS; i++ ) { 75 FreeOverlay( overlays[i] ); 76 } 77 } 78 79 /* 80 ==================== 81 idRenderModelOverlay::FreeOverlay 82 ==================== 83 */ 84 void idRenderModelOverlay::FreeOverlay( overlay_t & overlay ) { 85 if ( overlay.verts != NULL ) { 86 Mem_Free( overlay.verts ); 87 } 88 if ( overlay.indexes != NULL ) { 89 Mem_Free( overlay.indexes ); 90 } 91 memset( &overlay, 0, sizeof( overlay ) ); 92 } 93 94 /* 95 ==================== 96 R_OverlayPointCullStatic 97 ==================== 98 */ 99 static void R_OverlayPointCullStatic( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts ) { 100 assert_16_byte_aligned( cullBits ); 101 assert_16_byte_aligned( texCoordS ); 102 assert_16_byte_aligned( texCoordT ); 103 assert_16_byte_aligned( verts ); 104 105 #ifdef ID_WIN_X86_SSE2_INTRIN 106 107 idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); 108 109 const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f }; 110 const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f }; 111 const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 ); 112 const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 ); 113 const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 ); 114 const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 ); 115 116 const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() ); 117 const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() ); 118 119 const __m128 p0X = _mm_splat_ps( p0, 0 ); 120 const __m128 p0Y = _mm_splat_ps( p0, 1 ); 121 const __m128 p0Z = _mm_splat_ps( p0, 2 ); 122 const __m128 p0W = _mm_splat_ps( p0, 3 ); 123 124 const __m128 p1X = _mm_splat_ps( p1, 0 ); 125 const __m128 p1Y = _mm_splat_ps( p1, 1 ); 126 const __m128 p1Z = _mm_splat_ps( p1, 2 ); 127 const __m128 p1W = _mm_splat_ps( p1, 3 ); 128 129 for ( int i = 0; i < numVerts; ) { 130 131 const int nextNumVerts = vertsODS.FetchNextBatch() - 4; 132 133 for ( ; i <= nextNumVerts; i += 4 ) { 134 const __m128 v0 = _mm_load_ps( vertsODS[i + 0].xyz.ToFloatPtr() ); 135 const __m128 v1 = _mm_load_ps( vertsODS[i + 1].xyz.ToFloatPtr() ); 136 const __m128 v2 = _mm_load_ps( vertsODS[i + 2].xyz.ToFloatPtr() ); 137 const __m128 v3 = _mm_load_ps( vertsODS[i + 3].xyz.ToFloatPtr() ); 138 139 const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z 140 const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w 141 const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z 142 const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w 143 144 const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x 145 const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y 146 const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z 147 148 const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) ); 149 const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) ); 150 const __m128 d2 = _mm_sub_ps( vector_float_one, d0 ); 151 const __m128 d3 = _mm_sub_ps( vector_float_one, d1 ); 152 153 __m128i flt16S = FastF32toF16( __m128c( d0 ) ); 154 __m128i flt16T = FastF32toF16( __m128c( d1 ) ); 155 156 _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S ); 157 _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T ); 158 159 __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) ); 160 __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) ); 161 __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) ); 162 __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) ); 163 164 c0 = _mm_and_si128( c0, vector_int_mask0 ); 165 c1 = _mm_and_si128( c1, vector_int_mask1 ); 166 c2 = _mm_and_si128( c2, vector_int_mask2 ); 167 c3 = _mm_and_si128( c3, vector_int_mask3 ); 168 169 c0 = _mm_or_si128( c0, c1 ); 170 c2 = _mm_or_si128( c2, c3 ); 171 c0 = _mm_or_si128( c0, c2 ); 172 173 c0 = _mm_packs_epi32( c0, c0 ); 174 c0 = _mm_packus_epi16( c0, c0 ); 175 176 *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 ); 177 } 178 } 179 180 #else 181 182 idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts ); 183 184 for ( int i = 0; i < numVerts; ) { 185 186 const int nextNumVerts = vertsODS.FetchNextBatch() - 1; 187 188 for ( ; i <= nextNumVerts; i++ ) { 189 const idVec3 & v = vertsODS[i].xyz; 190 191 const float d0 = planes[0].Distance( v ); 192 const float d1 = planes[1].Distance( v ); 193 const float d2 = 1.0f - d0; 194 const float d3 = 1.0f - d1; 195 196 halfFloat_t s = Scalar_FastF32toF16( d0 ); 197 halfFloat_t t = Scalar_FastF32toF16( d1 ); 198 199 texCoordS[i] = s; 200 texCoordT[i] = t; 201 202 byte bits; 203 bits = IEEE_FLT_SIGNBITSET( d0 ) << 0; 204 bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1; 205 bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2; 206 bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3; 207 208 cullBits[i] = bits; 209 } 210 } 211 212 #endif 213 } 214 215 /* 216 ==================== 217 R_OverlayPointCullSkinned 218 ==================== 219 */ 220 static void R_OverlayPointCullSkinned( byte * cullBits, halfFloat_t * texCoordS, halfFloat_t * texCoordT, const idPlane * planes, const idDrawVert * verts, const int numVerts, const idJointMat * joints ) { 221 assert_16_byte_aligned( cullBits ); 222 assert_16_byte_aligned( texCoordS ); 223 assert_16_byte_aligned( texCoordT ); 224 assert_16_byte_aligned( verts ); 225 226 #ifdef ID_WIN_X86_SSE2_INTRIN 227 228 idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); 229 230 const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f }; 231 const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f }; 232 const __m128i vector_int_mask0 = _mm_set1_epi32( 1 << 0 ); 233 const __m128i vector_int_mask1 = _mm_set1_epi32( 1 << 1 ); 234 const __m128i vector_int_mask2 = _mm_set1_epi32( 1 << 2 ); 235 const __m128i vector_int_mask3 = _mm_set1_epi32( 1 << 3 ); 236 237 const __m128 p0 = _mm_loadu_ps( planes[0].ToFloatPtr() ); 238 const __m128 p1 = _mm_loadu_ps( planes[1].ToFloatPtr() ); 239 240 const __m128 p0X = _mm_splat_ps( p0, 0 ); 241 const __m128 p0Y = _mm_splat_ps( p0, 1 ); 242 const __m128 p0Z = _mm_splat_ps( p0, 2 ); 243 const __m128 p0W = _mm_splat_ps( p0, 3 ); 244 245 const __m128 p1X = _mm_splat_ps( p1, 0 ); 246 const __m128 p1Y = _mm_splat_ps( p1, 1 ); 247 const __m128 p1Z = _mm_splat_ps( p1, 2 ); 248 const __m128 p1W = _mm_splat_ps( p1, 3 ); 249 250 for ( int i = 0; i < numVerts; ) { 251 252 const int nextNumVerts = vertsODS.FetchNextBatch() - 4; 253 254 for ( ; i <= nextNumVerts; i += 4 ) { 255 const __m128 v0 = LoadSkinnedDrawVertPosition( vertsODS[i + 0], joints ); 256 const __m128 v1 = LoadSkinnedDrawVertPosition( vertsODS[i + 1], joints ); 257 const __m128 v2 = LoadSkinnedDrawVertPosition( vertsODS[i + 2], joints ); 258 const __m128 v3 = LoadSkinnedDrawVertPosition( vertsODS[i + 3], joints ); 259 260 const __m128 r0 = _mm_unpacklo_ps( v0, v2 ); // v0.x, v2.x, v0.z, v2.z 261 const __m128 r1 = _mm_unpackhi_ps( v0, v2 ); // v0.y, v2.y, v0.w, v2.w 262 const __m128 r2 = _mm_unpacklo_ps( v1, v3 ); // v1.x, v3.x, v1.z, v3.z 263 const __m128 r3 = _mm_unpackhi_ps( v1, v3 ); // v1.y, v3.y, v1.w, v3.w 264 265 const __m128 vX = _mm_unpacklo_ps( r0, r2 ); // v0.x, v1.x, v2.x, v3.x 266 const __m128 vY = _mm_unpackhi_ps( r0, r2 ); // v0.y, v1.y, v2.y, v3.y 267 const __m128 vZ = _mm_unpacklo_ps( r1, r3 ); // v0.z, v1.z, v2.z, v3.z 268 269 const __m128 d0 = _mm_madd_ps( vX, p0X, _mm_madd_ps( vY, p0Y, _mm_madd_ps( vZ, p0Z, p0W ) ) ); 270 const __m128 d1 = _mm_madd_ps( vX, p1X, _mm_madd_ps( vY, p1Y, _mm_madd_ps( vZ, p1Z, p1W ) ) ); 271 const __m128 d2 = _mm_sub_ps( vector_float_one, d0 ); 272 const __m128 d3 = _mm_sub_ps( vector_float_one, d1 ); 273 274 __m128i flt16S = FastF32toF16( __m128c( d0 ) ); 275 __m128i flt16T = FastF32toF16( __m128c( d1 ) ); 276 277 _mm_storel_epi64( (__m128i *)&texCoordS[i], flt16S ); 278 _mm_storel_epi64( (__m128i *)&texCoordT[i], flt16T ); 279 280 __m128i c0 = __m128c( _mm_cmplt_ps( d0, vector_float_zero ) ); 281 __m128i c1 = __m128c( _mm_cmplt_ps( d1, vector_float_zero ) ); 282 __m128i c2 = __m128c( _mm_cmplt_ps( d2, vector_float_zero ) ); 283 __m128i c3 = __m128c( _mm_cmplt_ps( d3, vector_float_zero ) ); 284 285 c0 = _mm_and_si128( c0, vector_int_mask0 ); 286 c1 = _mm_and_si128( c1, vector_int_mask1 ); 287 c2 = _mm_and_si128( c2, vector_int_mask2 ); 288 c3 = _mm_and_si128( c3, vector_int_mask3 ); 289 290 c0 = _mm_or_si128( c0, c1 ); 291 c2 = _mm_or_si128( c2, c3 ); 292 c0 = _mm_or_si128( c0, c2 ); 293 294 c0 = _mm_packs_epi32( c0, c0 ); 295 c0 = _mm_packus_epi16( c0, c0 ); 296 297 *(unsigned int *)&cullBits[i] = _mm_cvtsi128_si32( c0 ); 298 } 299 } 300 301 #else 302 303 idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 1 > vertsODS( verts, numVerts ); 304 305 for ( int i = 0; i < numVerts; ) { 306 307 const int nextNumVerts = vertsODS.FetchNextBatch() - 1; 308 309 for ( ; i <= nextNumVerts; i++ ) { 310 const idVec3 transformed = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints ); 311 312 const float d0 = planes[0].Distance( transformed ); 313 const float d1 = planes[1].Distance( transformed ); 314 const float d2 = 1.0f - d0; 315 const float d3 = 1.0f - d1; 316 317 halfFloat_t s = Scalar_FastF32toF16( d0 ); 318 halfFloat_t t = Scalar_FastF32toF16( d1 ); 319 320 texCoordS[i] = s; 321 texCoordT[i] = t; 322 323 byte bits; 324 bits = IEEE_FLT_SIGNBITSET( d0 ) << 0; 325 bits |= IEEE_FLT_SIGNBITSET( d1 ) << 1; 326 bits |= IEEE_FLT_SIGNBITSET( d2 ) << 2; 327 bits |= IEEE_FLT_SIGNBITSET( d3 ) << 3; 328 329 cullBits[i] = bits; 330 } 331 } 332 333 #endif 334 } 335 336 /* 337 ===================== 338 idRenderModelOverlay::CreateOverlay 339 340 This projects on both front and back sides to avoid seams 341 The material should be clamped, because entire triangles are added, some of which 342 may extend well past the 0.0 to 1.0 texture range 343 ===================== 344 */ 345 void idRenderModelOverlay::CreateOverlay( const idRenderModel *model, const idPlane localTextureAxis[2], const idMaterial *material ) { 346 // count up the maximum possible vertices and indexes per surface 347 int maxVerts = 0; 348 int maxIndexes = 0; 349 for ( int surfNum = 0; surfNum < model->NumSurfaces(); surfNum++ ) { 350 const modelSurface_t *surf = model->Surface( surfNum ); 351 if ( surf->geometry->numVerts > maxVerts ) { 352 maxVerts = surf->geometry->numVerts; 353 } 354 if ( surf->geometry->numIndexes > maxIndexes ) { 355 maxIndexes = surf->geometry->numIndexes; 356 } 357 } 358 maxIndexes += 3 * 16 / sizeof( triIndex_t ); // to allow the index size to be a multiple of 16 bytes 359 360 // make temporary buffers for the building process 361 idTempArray< byte > cullBits( maxVerts ); 362 idTempArray< halfFloat_t > texCoordS( maxVerts ); 363 idTempArray< halfFloat_t > texCoordT( maxVerts ); 364 idTempArray< triIndex_t > vertexRemap( maxVerts ); 365 idTempArray< overlayVertex_t > overlayVerts( maxVerts ); 366 idTempArray< triIndex_t > overlayIndexes( maxIndexes ); 367 368 // pull out the triangles we need from the base surfaces 369 for ( int surfNum = 0; surfNum < model->NumBaseSurfaces(); surfNum++ ) { 370 const modelSurface_t *surf = model->Surface( surfNum ); 371 372 if ( surf->geometry == NULL || surf->shader == NULL ) { 373 continue; 374 } 375 376 // some surfaces can explicitly disallow overlays 377 if ( !surf->shader->AllowOverlays() ) { 378 continue; 379 } 380 381 const srfTriangles_t *tri = surf->geometry; 382 383 // try to cull the whole surface along the first texture axis 384 const float d0 = tri->bounds.PlaneDistance( localTextureAxis[0] ); 385 if ( d0 < 0.0f || d0 > 1.0f ) { 386 continue; 387 } 388 389 // try to cull the whole surface along the second texture axis 390 const float d1 = tri->bounds.PlaneDistance( localTextureAxis[1] ); 391 if ( d1 < 0.0f || d1 > 1.0f ) { 392 continue; 393 } 394 395 if ( tri->staticModelWithJoints != NULL && r_useGPUSkinning.GetBool() ) { 396 R_OverlayPointCullSkinned( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts, tri->staticModelWithJoints->jointsInverted ); 397 } else { 398 R_OverlayPointCullStatic( cullBits.Ptr(), texCoordS.Ptr(), texCoordT.Ptr(), localTextureAxis, tri->verts, tri->numVerts ); 399 } 400 401 // start streaming the indexes 402 idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 3 > indexesODS( tri->indexes, tri->numIndexes ); 403 404 memset( vertexRemap.Ptr(), -1, vertexRemap.Size() ); 405 int numIndexes = 0; 406 int numVerts = 0; 407 int maxReferencedVertex = 0; 408 409 // find triangles that need the overlay 410 for ( int i = 0; i < tri->numIndexes; ) { 411 412 const int nextNumIndexes = indexesODS.FetchNextBatch() - 3; 413 414 for ( ; i <= nextNumIndexes; i += 3 ) { 415 const int i0 = indexesODS[i + 0]; 416 const int i1 = indexesODS[i + 1]; 417 const int i2 = indexesODS[i + 2]; 418 419 // skip triangles completely off one side 420 if ( cullBits[i0] & cullBits[i1] & cullBits[i2] ) { 421 continue; 422 } 423 424 // we could do more precise triangle culling, like a light interaction does, but it's not worth it 425 426 // keep this triangle 427 for ( int j = 0; j < 3; j++ ) { 428 int index = tri->indexes[i + j]; 429 if ( vertexRemap[index] == (triIndex_t) -1 ) { 430 vertexRemap[index] = numVerts; 431 432 overlayVerts[numVerts].vertexNum = index; 433 overlayVerts[numVerts].st[0] = texCoordS[index]; 434 overlayVerts[numVerts].st[1] = texCoordT[index]; 435 numVerts++; 436 437 maxReferencedVertex = Max( maxReferencedVertex, index ); 438 } 439 overlayIndexes[numIndexes] = vertexRemap[index]; 440 numIndexes++; 441 } 442 } 443 } 444 445 if ( numIndexes == 0 ) { 446 continue; 447 } 448 449 // add degenerate triangles until the index size is a multiple of 16 bytes 450 for ( ; ( ( ( numIndexes * sizeof( triIndex_t ) ) & 15 ) != 0 ); numIndexes += 3 ) { 451 overlayIndexes[numIndexes + 0] = 0; 452 overlayIndexes[numIndexes + 1] = 0; 453 overlayIndexes[numIndexes + 2] = 0; 454 } 455 456 // allocate a new overlay 457 overlay_t & overlay = overlays[nextOverlay++ & ( MAX_OVERLAYS - 1 )]; 458 FreeOverlay( overlay ); 459 overlay.material = material; 460 overlay.surfaceNum = surfNum; 461 overlay.surfaceId = surf->id; 462 overlay.numIndexes = numIndexes; 463 overlay.indexes = (triIndex_t *)Mem_Alloc( numIndexes * sizeof( overlay.indexes[0] ), TAG_MODEL ); 464 memcpy( overlay.indexes, overlayIndexes.Ptr(), numIndexes * sizeof( overlay.indexes[0] ) ); 465 overlay.numVerts = numVerts; 466 overlay.verts = (overlayVertex_t *)Mem_Alloc( numVerts * sizeof( overlay.verts[0] ), TAG_MODEL ); 467 memcpy( overlay.verts, overlayVerts.Ptr(), numVerts * sizeof( overlay.verts[0] ) ); 468 overlay.maxReferencedVertex = maxReferencedVertex; 469 470 if ( nextOverlay - firstOverlay > MAX_OVERLAYS ) { 471 firstOverlay = nextOverlay - MAX_OVERLAYS; 472 } 473 } 474 } 475 476 /* 477 ==================== 478 idRenderModelOverlay::CreateDeferredOverlays 479 ==================== 480 */ 481 void idRenderModelOverlay::CreateDeferredOverlays( const idRenderModel * model ) { 482 for ( unsigned int i = firstDeferredOverlay; i < nextDeferredOverlay; i++ ) { 483 const overlayProjectionParms_t & parms = deferredOverlays[i & ( MAX_DEFERRED_OVERLAYS - 1 )]; 484 if ( parms.startTime > tr.viewDef->renderView.time[0] - DEFFERED_OVERLAY_TIMEOUT ) { 485 CreateOverlay( model, parms.localTextureAxis, parms.material ); 486 } 487 } 488 firstDeferredOverlay = 0; 489 nextDeferredOverlay = 0; 490 } 491 492 /* 493 ==================== 494 idRenderModelOverlay::AddDeferredOverlay 495 ==================== 496 */ 497 void idRenderModelOverlay::AddDeferredOverlay( const overlayProjectionParms_t & localParms ) { 498 deferredOverlays[nextDeferredOverlay++ & ( MAX_DEFERRED_OVERLAYS - 1 )] = localParms; 499 if ( nextDeferredOverlay - firstDeferredOverlay > MAX_DEFERRED_OVERLAYS ) { 500 firstDeferredOverlay = nextDeferredOverlay - MAX_DEFERRED_OVERLAYS; 501 } 502 } 503 504 /* 505 ==================== 506 R_CopyOverlaySurface 507 ==================== 508 */ 509 static void R_CopyOverlaySurface( idDrawVert * verts, int numVerts, triIndex_t * indexes, int numIndexes, const overlay_t * overlay, const idDrawVert * sourceVerts ) { 510 assert_16_byte_aligned( &verts[numVerts] ); 511 assert_16_byte_aligned( &indexes[numIndexes] ); 512 assert_16_byte_aligned( overlay->verts ); 513 assert_16_byte_aligned( overlay->indexes ); 514 assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 ); 515 assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 ); 516 517 #ifdef ID_WIN_X86_SSE2_INTRIN 518 519 const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 ); 520 const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 ); 521 const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts ); 522 523 // copy vertices 524 for ( int i = 0; i < overlay->numVerts; i++ ) { 525 const overlayVertex_t &overlayVert = overlay->verts[i]; 526 const idDrawVert &srcVert = sourceVerts[overlayVert.vertexNum]; 527 idDrawVert &dstVert = verts[numVerts + i]; 528 529 __m128i v0 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 0 ) ); 530 __m128i v1 = _mm_load_si128( (const __m128i *)( (byte *)&srcVert + 16 ) ); 531 __m128i st = _mm_cvtsi32_si128( *(unsigned int *)overlayVert.st ); 532 533 st = _mm_shuffle_epi32( st, _MM_SHUFFLE( 0, 1, 2, 3 ) ); 534 v0 = _mm_and_si128( v0, vector_int_clear_last ); 535 v0 = _mm_or_si128( v0, st ); 536 537 _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 0 ), v0 ); 538 _mm_stream_si128( (__m128i *)( (byte *)&dstVert + 16 ), v1 ); 539 } 540 541 // copy indexes 542 assert( ( overlay->numIndexes & 7 ) == 0 ); 543 assert( sizeof( triIndex_t ) == 2 ); 544 for ( int i = 0; i < overlay->numIndexes; i += 8 ) { 545 __m128i vi = _mm_load_si128( (const __m128i *)&overlay->indexes[i] ); 546 547 vi = _mm_add_epi16( vi, vector_short_num_verts ); 548 549 _mm_stream_si128( (__m128i *)&indexes[numIndexes + i], vi ); 550 } 551 552 _mm_sfence(); 553 554 #else 555 556 // copy vertices 557 for ( int i = 0; i < overlay->numVerts; i++ ) { 558 const overlayVertex_t &overlayVert = overlay->verts[i]; 559 560 // NOTE: bad out-of-order write-combined write, SIMD code does the right thing 561 verts[numVerts + i] = sourceVerts[overlayVert.vertexNum]; 562 verts[numVerts + i].st[0] = overlayVert.st[0]; 563 verts[numVerts + i].st[1] = overlayVert.st[1]; 564 } 565 566 // copy indexes 567 for ( int i = 0; i < overlay->numIndexes; i += 2 ) { 568 assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts ); 569 WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] ); 570 } 571 572 #endif 573 } 574 575 /* 576 ===================== 577 idRenderModelOverlay::GetNumOverlayDrawSurfs 578 ===================== 579 */ 580 unsigned int idRenderModelOverlay::GetNumOverlayDrawSurfs() { 581 numOverlayMaterials = 0; 582 583 for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) { 584 const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )]; 585 586 unsigned int j = 0; 587 for ( ; j < numOverlayMaterials; j++ ) { 588 if ( overlayMaterials[j] == overlay.material ) { 589 break; 590 } 591 } 592 if ( j >= numOverlayMaterials ) { 593 overlayMaterials[numOverlayMaterials++] = overlay.material; 594 } 595 } 596 597 return numOverlayMaterials; 598 } 599 600 /* 601 ==================== 602 idRenderModelOverlay::CreateOverlayDrawSurf 603 ==================== 604 */ 605 drawSurf_t * idRenderModelOverlay::CreateOverlayDrawSurf( const viewEntity_t *space, const idRenderModel *baseModel, unsigned int index ) { 606 if ( index < 0 || index >= numOverlayMaterials ) { 607 return NULL; 608 } 609 610 // md5 models won't have any surfaces when r_showSkel is set 611 if ( baseModel == NULL || baseModel->IsDefaultModel() || baseModel->NumSurfaces() == 0 ) { 612 return NULL; 613 } 614 615 assert( baseModel->IsDynamicModel() == DM_STATIC ); 616 617 const idRenderModelStatic * staticModel = static_cast< const idRenderModelStatic * >( baseModel ); 618 619 const idMaterial * material = overlayMaterials[index]; 620 621 int maxVerts = 0; 622 int maxIndexes = 0; 623 for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) { 624 const overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )]; 625 if ( overlay.material == material ) { 626 maxVerts += overlay.numVerts; 627 maxIndexes += overlay.numIndexes; 628 } 629 } 630 631 if ( maxVerts == 0 || maxIndexes == 0 ) { 632 return NULL; 633 } 634 635 // create a new triangle surface in frame memory so it gets automatically disposed of 636 srfTriangles_t *newTri = (srfTriangles_t *)R_ClearedFrameAlloc( sizeof( *newTri ), FRAME_ALLOC_SURFACE_TRIANGLES ); 637 newTri->staticModelWithJoints = ( staticModel->jointsInverted != NULL ) ? const_cast< idRenderModelStatic * >( staticModel ) : NULL; // allow GPU skinning 638 639 newTri->ambientCache = vertexCache.AllocVertex( NULL, ALIGN( maxVerts * sizeof( idDrawVert ), VERTEX_CACHE_ALIGN ) ); 640 newTri->indexCache = vertexCache.AllocIndex( NULL, ALIGN( maxIndexes * sizeof( triIndex_t ), INDEX_CACHE_ALIGN ) ); 641 642 idDrawVert * mappedVerts = (idDrawVert *)vertexCache.MappedVertexBuffer( newTri->ambientCache ); 643 triIndex_t * mappedIndexes = (triIndex_t *)vertexCache.MappedIndexBuffer( newTri->indexCache ); 644 645 int numVerts = 0; 646 int numIndexes = 0; 647 648 for ( unsigned int i = firstOverlay; i < nextOverlay; i++ ) { 649 overlay_t & overlay = overlays[i & ( MAX_OVERLAYS - 1 )]; 650 651 if ( overlay.numVerts == 0 ) { 652 if ( i == firstOverlay ) { 653 firstOverlay++; 654 } 655 continue; 656 } 657 658 if ( overlay.material != material ) { 659 continue; 660 } 661 662 // get the source model surface for this overlay surface 663 const modelSurface_t * baseSurf = ( overlay.surfaceNum < staticModel->NumSurfaces() ) ? staticModel->Surface( overlay.surfaceNum ) : NULL; 664 665 // if the surface ids no longer match 666 if ( baseSurf == NULL || baseSurf->id != overlay.surfaceId ) { 667 // find the surface with the correct id 668 if ( staticModel->FindSurfaceWithId( overlay.surfaceId, overlay.surfaceNum ) ) { 669 baseSurf = staticModel->Surface( overlay.surfaceNum ); 670 } else { 671 // the surface with this id no longer exists 672 FreeOverlay( overlay ); 673 if ( i == firstOverlay ) { 674 firstOverlay++; 675 } 676 continue; 677 } 678 } 679 680 // check for out of range vertex references 681 const srfTriangles_t * baseTri = baseSurf->geometry; 682 if ( overlay.maxReferencedVertex >= baseTri->numVerts ) { 683 // This can happen when playing a demofile and a model has been changed since it was recorded, so just issue a warning and go on. 684 common->Warning( "idRenderModelOverlay::CreateOverlayDrawSurf: overlay vertex out of range. Model has probably changed since generating the overlay." ); 685 FreeOverlay( overlay ); 686 if ( i == firstOverlay ) { 687 firstOverlay++; 688 } 689 continue; 690 } 691 692 // use SIMD optimized routine to copy the vertices and indices directly to write-combined memory 693 R_CopyOverlaySurface( mappedVerts, numVerts, mappedIndexes, numIndexes, &overlay, baseTri->verts ); 694 695 numIndexes += overlay.numIndexes; 696 numVerts += overlay.numVerts; 697 } 698 699 newTri->numVerts = numVerts; 700 newTri->numIndexes = numIndexes; 701 702 // create the drawsurf 703 drawSurf_t * drawSurf = (drawSurf_t *)R_FrameAlloc( sizeof( *drawSurf ), FRAME_ALLOC_DRAW_SURFACE ); 704 drawSurf->frontEndGeo = newTri; 705 drawSurf->numIndexes = newTri->numIndexes; 706 drawSurf->ambientCache = newTri->ambientCache; 707 drawSurf->indexCache = newTri->indexCache; 708 drawSurf->shadowCache = 0; 709 drawSurf->space = space; 710 drawSurf->scissorRect = space->scissorRect; 711 drawSurf->extraGLState = 0; 712 drawSurf->renderZFail = 0; 713 714 R_SetupDrawSurfShader( drawSurf, material, &space->entityDef->parms ); 715 R_SetupDrawSurfJoints( drawSurf, newTri, NULL ); 716 717 return drawSurf; 718 } 719 720 /* 721 ==================== 722 idRenderModelOverlay::ReadFromDemoFile 723 ==================== 724 */ 725 void idRenderModelOverlay::ReadFromDemoFile( idDemoFile *f ) { 726 // FIXME: implement 727 } 728 729 /* 730 ==================== 731 idRenderModelOverlay::WriteToDemoFile 732 ==================== 733 */ 734 void idRenderModelOverlay::WriteToDemoFile( idDemoFile *f ) const { 735 // FIXME: implement 736 }