DynamicShadowVolume.cpp (52080B)
1 /* 2 =========================================================================== 3 4 Doom 3 BFG Edition GPL Source Code 5 Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. 6 7 This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code"). 8 9 Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation, either version 3 of the License, or 12 (at your option) any later version. 13 14 Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>. 21 22 In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below. 23 24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA. 25 26 =========================================================================== 27 */ 28 29 #include "DynamicShadowVolume_local.h" 30 31 #include "../../../idlib/sys/sys_intrinsics.h" 32 #include "../../../idlib/geometry/DrawVert_intrinsics.h" 33 34 #ifdef ID_WIN_X86_SSE2_INTRIN 35 36 static const __m128i vector_int_neg_one = _mm_set_epi32( -1, -1, -1, -1 ); 37 38 /* 39 ===================== 40 TriangleFacing_SSE2 41 ===================== 42 */ 43 static __forceinline __m128i TriangleFacing_SSE2( const __m128 & vert0X, const __m128 & vert0Y, const __m128 & vert0Z, 44 const __m128 & vert1X, const __m128 & vert1Y, const __m128 & vert1Z, 45 const __m128 & vert2X, const __m128 & vert2Y, const __m128 & vert2Z, 46 const __m128 & lightOriginX, const __m128 & lightOriginY, const __m128 & lightOriginZ ) { 47 const __m128 sX = _mm_sub_ps( vert1X, vert0X ); 48 const __m128 sY = _mm_sub_ps( vert1Y, vert0Y ); 49 const __m128 sZ = _mm_sub_ps( vert1Z, vert0Z ); 50 51 const __m128 tX = _mm_sub_ps( vert2X, vert0X ); 52 const __m128 tY = _mm_sub_ps( vert2Y, vert0Y ); 53 const __m128 tZ = _mm_sub_ps( vert2Z, vert0Z ); 54 55 const __m128 normalX = _mm_nmsub_ps( tZ, sY, _mm_mul_ps( tY, sZ ) ); 56 const __m128 normalY = _mm_nmsub_ps( tX, sZ, _mm_mul_ps( tZ, sX ) ); 57 const __m128 normalZ = _mm_nmsub_ps( tY, sX, _mm_mul_ps( tX, sY ) ); 58 const __m128 normalW = _mm_madd_ps( normalX, vert0X, _mm_madd_ps( normalY, vert0Y, _mm_mul_ps( normalZ, vert0Z ) ) ); 59 60 const __m128 delta = _mm_nmsub_ps( lightOriginX, normalX, _mm_nmsub_ps( lightOriginY, normalY, _mm_nmsub_ps( lightOriginZ, normalZ, normalW ) ) ); 61 return _mm_castps_si128( _mm_cmplt_ps( delta, _mm_setzero_ps() ) ); 62 } 63 64 /* 65 ===================== 66 TriangleCulled 67 68 The clip space of the 'lightProject' is assumed to be in the range [0, 1]. 69 ===================== 70 */ 71 static __forceinline __m128i TriangleCulled_SSE2( const __m128 & vert0X, const __m128 & vert0Y, const __m128 & vert0Z, 72 const __m128 & vert1X, const __m128 & vert1Y, const __m128 & vert1Z, 73 const __m128 & vert2X, const __m128 & vert2Y, const __m128 & vert2Z, 74 const __m128 & lightProjectX, const __m128 & lightProjectY, const __m128 & lightProjectZ, const __m128 & lightProjectW ) { 75 76 const __m128 mvpX0 = _mm_splat_ps( lightProjectX, 0 ); 77 const __m128 mvpX1 = _mm_splat_ps( lightProjectX, 1 ); 78 const __m128 mvpX2 = _mm_splat_ps( lightProjectX, 2 ); 79 const __m128 mvpX3 = _mm_splat_ps( lightProjectX, 3 ); 80 81 const __m128 c0X = _mm_madd_ps( vert0X, mvpX0, _mm_madd_ps( vert0Y, mvpX1, _mm_madd_ps( vert0Z, mvpX2, mvpX3 ) ) ); 82 const __m128 c1X = _mm_madd_ps( vert1X, mvpX0, _mm_madd_ps( vert1Y, mvpX1, _mm_madd_ps( vert1Z, mvpX2, mvpX3 ) ) ); 83 const __m128 c2X = _mm_madd_ps( vert2X, mvpX0, _mm_madd_ps( vert2Y, mvpX1, _mm_madd_ps( vert2Z, mvpX2, mvpX3 ) ) ); 84 85 const __m128 mvpY0 = _mm_splat_ps( lightProjectY, 0 ); 86 const __m128 mvpY1 = _mm_splat_ps( lightProjectY, 1 ); 87 const __m128 mvpY2 = _mm_splat_ps( lightProjectY, 2 ); 88 const __m128 mvpY3 = _mm_splat_ps( lightProjectY, 3 ); 89 90 const __m128 c0Y = _mm_madd_ps( vert0X, mvpY0, _mm_madd_ps( vert0Y, mvpY1, _mm_madd_ps( vert0Z, mvpY2, mvpY3 ) ) ); 91 const __m128 c1Y = _mm_madd_ps( vert1X, mvpY0, _mm_madd_ps( vert1Y, mvpY1, _mm_madd_ps( vert1Z, mvpY2, mvpY3 ) ) ); 92 const __m128 c2Y = _mm_madd_ps( vert2X, mvpY0, _mm_madd_ps( vert2Y, mvpY1, _mm_madd_ps( vert2Z, mvpY2, mvpY3 ) ) ); 93 94 const __m128 mvpZ0 = _mm_splat_ps( lightProjectZ, 0 ); 95 const __m128 mvpZ1 = _mm_splat_ps( lightProjectZ, 1 ); 96 const __m128 mvpZ2 = _mm_splat_ps( lightProjectZ, 2 ); 97 const __m128 mvpZ3 = _mm_splat_ps( lightProjectZ, 3 ); 98 99 const __m128 c0Z = _mm_madd_ps( vert0X, mvpZ0, _mm_madd_ps( vert0Y, mvpZ1, _mm_madd_ps( vert0Z, mvpZ2, mvpZ3 ) ) ); 100 const __m128 c1Z = _mm_madd_ps( vert1X, mvpZ0, _mm_madd_ps( vert1Y, mvpZ1, _mm_madd_ps( vert1Z, mvpZ2, mvpZ3 ) ) ); 101 const __m128 c2Z = _mm_madd_ps( vert2X, mvpZ0, _mm_madd_ps( vert2Y, mvpZ1, _mm_madd_ps( vert2Z, mvpZ2, mvpZ3 ) ) ); 102 103 const __m128 mvpW0 = _mm_splat_ps( lightProjectW, 0 ); 104 const __m128 mvpW1 = _mm_splat_ps( lightProjectW, 1 ); 105 const __m128 mvpW2 = _mm_splat_ps( lightProjectW, 2 ); 106 const __m128 mvpW3 = _mm_splat_ps( lightProjectW, 3 ); 107 108 const __m128 c0W = _mm_madd_ps( vert0X, mvpW0, _mm_madd_ps( vert0Y, mvpW1, _mm_madd_ps( vert0Z, mvpW2, mvpW3 ) ) ); 109 const __m128 c1W = _mm_madd_ps( vert1X, mvpW0, _mm_madd_ps( vert1Y, mvpW1, _mm_madd_ps( vert1Z, mvpW2, mvpW3 ) ) ); 110 const __m128 c2W = _mm_madd_ps( vert2X, mvpW0, _mm_madd_ps( vert2Y, mvpW1, _mm_madd_ps( vert2Z, mvpW2, mvpW3 ) ) ); 111 112 const __m128 zero = _mm_setzero_ps(); 113 114 __m128 b0 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0X, zero ), _mm_cmpgt_ps( c1X, zero ) ), _mm_cmpgt_ps( c2X, zero ) ); 115 __m128 b1 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0Y, zero ), _mm_cmpgt_ps( c1Y, zero ) ), _mm_cmpgt_ps( c2Y, zero ) ); 116 __m128 b2 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0Z, zero ), _mm_cmpgt_ps( c1Z, zero ) ), _mm_cmpgt_ps( c2Z, zero ) ); 117 __m128 b3 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0W, c0X ), _mm_cmpgt_ps( c1W, c1X ) ), _mm_cmpgt_ps( c2W, c2X ) ); 118 __m128 b4 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0W, c0Y ), _mm_cmpgt_ps( c1W, c1Y ) ), _mm_cmpgt_ps( c2W, c2Y ) ); 119 __m128 b5 = _mm_or_ps( _mm_or_ps( _mm_cmpgt_ps( c0W, c0Z ), _mm_cmpgt_ps( c1W, c1Z ) ), _mm_cmpgt_ps( c2W, c2Z ) ); 120 121 b0 = _mm_and_ps( b0, b1 ); 122 b2 = _mm_and_ps( b2, b3 ); 123 b4 = _mm_and_ps( b4, b5 ); 124 b0 = _mm_and_ps( b0, b2 ); 125 b0 = _mm_and_ps( b0, b4 ); 126 127 return _mm_castps_si128( _mm_cmpeq_ps( b0, zero ) ); 128 } 129 130 #else 131 132 /* 133 ===================== 134 TriangleFacing 135 136 Returns 255 if the triangle is facing the light origin, otherwise returns 0. 137 ===================== 138 */ 139 static byte TriangleFacing_Generic( const idVec3 & v1, const idVec3 & v2, const idVec3 & v3, const idVec3 & lightOrigin ) { 140 const float sx = v2.x - v1.x; 141 const float sy = v2.y - v1.y; 142 const float sz = v2.z - v1.z; 143 144 const float tx = v3.x - v1.x; 145 const float ty = v3.y - v1.y; 146 const float tz = v3.z - v1.z; 147 148 const float normalX = ty * sz - tz * sy; 149 const float normalY = tz * sx - tx * sz; 150 const float normalZ = tx * sy - ty * sx; 151 const float normalW = normalX * v1.x + normalY * v1.y + normalZ * v1.z; 152 153 const float d = lightOrigin.x * normalX + lightOrigin.y * normalY + lightOrigin.z * normalZ - normalW; 154 return ( d > 0.0f ) ? 255 : 0; 155 } 156 157 /* 158 ===================== 159 TriangleCulled 160 161 Returns 255 if the triangle is culled to the light projection matrix, otherwise returns 0. 162 The clip space of the 'lightProject' is assumed to be in the range [0, 1]. 163 ===================== 164 */ 165 static byte TriangleCulled_Generic( const idVec3 & v1, const idVec3 & v2, const idVec3 & v3, const idRenderMatrix & lightProject ) { 166 // transform the triangle 167 idVec4 c[3]; 168 for ( int i = 0; i < 4; i++ ) { 169 c[0][i] = v1[0] * lightProject[i][0] + v1[1] * lightProject[i][1] + v1[2] * lightProject[i][2] + lightProject[i][3]; 170 c[1][i] = v2[0] * lightProject[i][0] + v2[1] * lightProject[i][1] + v2[2] * lightProject[i][2] + lightProject[i][3]; 171 c[2][i] = v3[0] * lightProject[i][0] + v3[1] * lightProject[i][1] + v3[2] * lightProject[i][2] + lightProject[i][3]; 172 } 173 174 // calculate the culled bits 175 int bits = 0; 176 for ( int i = 0; i < 3; i++ ) { 177 const float minW = 0.0f; 178 const float maxW = c[i][3]; 179 180 if ( c[i][0] > minW ) { bits |= ( 1 << 0 ); } 181 if ( c[i][0] < maxW ) { bits |= ( 1 << 1 ); } 182 if ( c[i][1] > minW ) { bits |= ( 1 << 2 ); } 183 if ( c[i][1] < maxW ) { bits |= ( 1 << 3 ); } 184 if ( c[i][2] > minW ) { bits |= ( 1 << 4 ); } 185 if ( c[i][2] < maxW ) { bits |= ( 1 << 5 ); } 186 } 187 188 // if any bits weren't set, the triangle is completely off one side of the frustum 189 return ( bits != 63 ) ? 255 : 0; 190 } 191 192 #endif 193 194 /* 195 ===================== 196 CalculateTriangleFacingCulledStatic 197 ===================== 198 */ 199 static int CalculateTriangleFacingCulledStatic( byte * __restrict facing, byte * __restrict culled, const triIndex_t * __restrict indexes, int numIndexes, 200 const idDrawVert * __restrict verts, const int numVerts, 201 const idVec3 & lightOrigin, const idVec3 & viewOrigin, 202 bool cullShadowTrianglesToLight, const idRenderMatrix & lightProject, 203 bool * insideShadowVolume, const float radius ) { 204 205 assert_spu_local_store( facing ); 206 assert_not_spu_local_store( indexes ); 207 assert_not_spu_local_store( verts ); 208 209 if ( insideShadowVolume != NULL ) { 210 *insideShadowVolume = false; 211 } 212 213 // calculate the start, end, dir and length of the line from the view origin to the light origin 214 const idVec3 lineStart = viewOrigin; 215 const idVec3 lineEnd = lightOrigin; 216 const idVec3 lineDelta = lineEnd - lineStart; 217 const float lineLengthSqr = lineDelta.LengthSqr(); 218 const float lineLengthRcp = idMath::InvSqrt( lineLengthSqr ); 219 const idVec3 lineDir = lineDelta * lineLengthRcp; 220 const float lineLength = lineLengthSqr * lineLengthRcp; 221 222 #ifdef ID_WIN_X86_SSE2_INTRIN 223 224 idODSStreamedIndexedArray< idDrawVert, triIndex_t, 32, SBT_QUAD, 4 * 3 > indexedVertsODS( verts, numVerts, indexes, numIndexes ); 225 226 const __m128 lightOriginX = _mm_splat_ps( _mm_load_ss( &lightOrigin.x ), 0 ); 227 const __m128 lightOriginY = _mm_splat_ps( _mm_load_ss( &lightOrigin.y ), 0 ); 228 const __m128 lightOriginZ = _mm_splat_ps( _mm_load_ss( &lightOrigin.z ), 0 ); 229 230 const __m128 lightProjectX = _mm_loadu_ps( lightProject[0] ); 231 const __m128 lightProjectY = _mm_loadu_ps( lightProject[1] ); 232 const __m128 lightProjectZ = _mm_loadu_ps( lightProject[2] ); 233 const __m128 lightProjectW = _mm_loadu_ps( lightProject[3] ); 234 235 const __m128i cullShadowTrianglesToLightMask = cullShadowTrianglesToLight ? vector_int_neg_one : vector_int_zero; 236 237 __m128i numFrontFacing = _mm_setzero_si128(); 238 239 for ( int i = 0, j = 0; i < numIndexes; ) { 240 241 const int batchStart = i; 242 const int batchEnd = indexedVertsODS.FetchNextBatch(); 243 const int batchEnd4x = batchEnd - 4 * 3; 244 const int indexStart = j; 245 246 for ( ; i <= batchEnd4x; i += 4 * 3, j += 4 ) { 247 const __m128 vertA0 = _mm_load_ps( indexedVertsODS[i + 0 * 3 + 0].xyz.ToFloatPtr() ); 248 const __m128 vertA1 = _mm_load_ps( indexedVertsODS[i + 0 * 3 + 1].xyz.ToFloatPtr() ); 249 const __m128 vertA2 = _mm_load_ps( indexedVertsODS[i + 0 * 3 + 2].xyz.ToFloatPtr() ); 250 251 const __m128 vertB0 = _mm_load_ps( indexedVertsODS[i + 1 * 3 + 0].xyz.ToFloatPtr() ); 252 const __m128 vertB1 = _mm_load_ps( indexedVertsODS[i + 1 * 3 + 1].xyz.ToFloatPtr() ); 253 const __m128 vertB2 = _mm_load_ps( indexedVertsODS[i + 1 * 3 + 2].xyz.ToFloatPtr() ); 254 255 const __m128 vertC0 = _mm_load_ps( indexedVertsODS[i + 2 * 3 + 0].xyz.ToFloatPtr() ); 256 const __m128 vertC1 = _mm_load_ps( indexedVertsODS[i + 2 * 3 + 1].xyz.ToFloatPtr() ); 257 const __m128 vertC2 = _mm_load_ps( indexedVertsODS[i + 2 * 3 + 2].xyz.ToFloatPtr() ); 258 259 const __m128 vertD0 = _mm_load_ps( indexedVertsODS[i + 3 * 3 + 0].xyz.ToFloatPtr() ); 260 const __m128 vertD1 = _mm_load_ps( indexedVertsODS[i + 3 * 3 + 1].xyz.ToFloatPtr() ); 261 const __m128 vertD2 = _mm_load_ps( indexedVertsODS[i + 3 * 3 + 2].xyz.ToFloatPtr() ); 262 263 const __m128 r0X = _mm_unpacklo_ps( vertA0, vertC0 ); // vertA0.x, vertC0.x, vertA0.z, vertC0.z 264 const __m128 r0Y = _mm_unpackhi_ps( vertA0, vertC0 ); // vertA0.y, vertC0.y, vertA0.w, vertC0.w 265 const __m128 r0Z = _mm_unpacklo_ps( vertB0, vertD0 ); // vertB0.x, vertD0.x, vertB0.z, vertD0.z 266 const __m128 r0W = _mm_unpackhi_ps( vertB0, vertD0 ); // vertB0.y, vertD0.y, vertB0.w, vertD0.w 267 268 const __m128 vert0X = _mm_unpacklo_ps( r0X, r0Z ); // vertA0.x, vertB0.x, vertC0.x, vertD0.x 269 const __m128 vert0Y = _mm_unpackhi_ps( r0X, r0Z ); // vertA0.y, vertB0.y, vertC0.y, vertD0.y 270 const __m128 vert0Z = _mm_unpacklo_ps( r0Y, r0W ); // vertA0.z, vertB0.z, vertC0.z, vertD0.z 271 272 const __m128 r1X = _mm_unpacklo_ps( vertA1, vertC1 ); // vertA1.x, vertC1.x, vertA1.z, vertC1.z 273 const __m128 r1Y = _mm_unpackhi_ps( vertA1, vertC1 ); // vertA1.y, vertC1.y, vertA1.w, vertC1.w 274 const __m128 r1Z = _mm_unpacklo_ps( vertB1, vertD1 ); // vertB1.x, vertD1.x, vertB1.z, vertD1.z 275 const __m128 r1W = _mm_unpackhi_ps( vertB1, vertD1 ); // vertB1.y, vertD1.y, vertB1.w, vertD1.w 276 277 const __m128 vert1X = _mm_unpacklo_ps( r1X, r1Z ); // vertA1.x, vertB1.x, vertC1.x, vertD1.x 278 const __m128 vert1Y = _mm_unpackhi_ps( r1X, r1Z ); // vertA1.y, vertB1.y, vertC1.y, vertD1.y 279 const __m128 vert1Z = _mm_unpacklo_ps( r1Y, r1W ); // vertA1.z, vertB1.z, vertC1.z, vertD1.z 280 281 const __m128 r2X = _mm_unpacklo_ps( vertA2, vertC2 ); // vertA2.x, vertC2.x, vertA2.z, vertC2.z 282 const __m128 r2Y = _mm_unpackhi_ps( vertA2, vertC2 ); // vertA2.y, vertC2.y, vertA2.w, vertC2.w 283 const __m128 r2Z = _mm_unpacklo_ps( vertB2, vertD2 ); // vertB2.x, vertD2.x, vertB2.z, vertD2.z 284 const __m128 r2W = _mm_unpackhi_ps( vertB2, vertD2 ); // vertB2.y, vertD2.y, vertB2.w, vertD2.w 285 286 const __m128 vert2X = _mm_unpacklo_ps( r2X, r2Z ); // vertA2.x, vertB2.x, vertC2.x, vertD2.x 287 const __m128 vert2Y = _mm_unpackhi_ps( r2X, r2Z ); // vertA2.y, vertB2.y, vertC2.y, vertD2.y 288 const __m128 vert2Z = _mm_unpacklo_ps( r2Y, r2W ); // vertA2.z, vertB2.z, vertC2.z, vertD2.z 289 290 const __m128i triangleCulled = TriangleCulled_SSE2( vert0X, vert0Y, vert0Z, vert1X, vert1Y, vert1Z, vert2X, vert2Y, vert2Z, lightProjectX, lightProjectY, lightProjectZ, lightProjectW ); 291 292 __m128i triangleFacing = TriangleFacing_SSE2( vert0X, vert0Y, vert0Z, vert1X, vert1Y, vert1Z, vert2X, vert2Y, vert2Z, lightOriginX, lightOriginY, lightOriginZ ); 293 294 // optionally make triangles that are outside the light frustum facing so they do not contribute to the shadow volume 295 triangleFacing = _mm_or_si128( triangleFacing, _mm_and_si128( triangleCulled, cullShadowTrianglesToLightMask ) ); 296 297 // store culled 298 const __m128i culled_s = _mm_packs_epi32( triangleCulled, triangleCulled ); 299 const __m128i culled_b = _mm_packs_epi16( culled_s, culled_s ); 300 *(int *)&culled[j] = _mm_cvtsi128_si32( culled_b ); 301 302 // store facing 303 const __m128i facing_s = _mm_packs_epi32( triangleFacing, triangleFacing ); 304 const __m128i facing_b = _mm_packs_epi16( facing_s, facing_s ); 305 *(int *)&facing[j] = _mm_cvtsi128_si32( facing_b ); 306 307 // count the number of facing triangles 308 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_and_si128( triangleFacing, vector_int_one ) ); 309 } 310 311 if ( insideShadowVolume != NULL ) { 312 for ( int k = batchStart, n = indexStart; k <= batchEnd - 3; k += 3, n++ ) { 313 if ( !facing[n] ) { 314 if ( R_LineIntersectsTriangleExpandedWithSphere( lineStart, lineEnd, lineDir, lineLength, radius, indexedVertsODS[k + 2].xyz, indexedVertsODS[k + 1].xyz, indexedVertsODS[k + 0].xyz ) ) { 315 *insideShadowVolume = true; 316 insideShadowVolume = NULL; 317 break; 318 } 319 } 320 } 321 } 322 } 323 324 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_shuffle_epi32( numFrontFacing, _MM_SHUFFLE( 1, 0, 3, 2 ) ) ); 325 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_shuffle_epi32( numFrontFacing, _MM_SHUFFLE( 2, 3, 0, 1 ) ) ); 326 327 return _mm_cvtsi128_si32( numFrontFacing ); 328 329 #else 330 331 idODSStreamedIndexedArray< idDrawVert, triIndex_t, 32, SBT_QUAD, 1 > indexedVertsODS( verts, numVerts, indexes, numIndexes ); 332 333 const byte cullShadowTrianglesToLightMask = cullShadowTrianglesToLight ? 255 : 0; 334 335 int numFrontFacing = 0; 336 337 for ( int i = 0, j = 0; i < numIndexes; ) { 338 339 const int batchStart = i; 340 const int batchEnd = indexedVertsODS.FetchNextBatch(); 341 const int indexStart = j; 342 343 for ( ; i <= batchEnd - 3; i += 3, j++ ) { 344 const idVec3 & v1 = indexedVertsODS[i + 0].xyz; 345 const idVec3 & v2 = indexedVertsODS[i + 1].xyz; 346 const idVec3 & v3 = indexedVertsODS[i + 2].xyz; 347 348 const byte triangleCulled = TriangleCulled_Generic( v1, v2, v3, lightProject ); 349 350 byte triangleFacing = TriangleFacing_Generic( v1, v2, v3, lightOrigin ); 351 352 // optionally make triangles that are outside the light frustum facing so they do not contribute to the shadow volume 353 triangleFacing |= ( triangleCulled & cullShadowTrianglesToLightMask ); 354 355 culled[j] = triangleCulled; 356 facing[j] = triangleFacing; 357 358 // count the number of facing triangles 359 numFrontFacing += ( triangleFacing & 1 ); 360 } 361 362 if ( insideShadowVolume != NULL ) { 363 for ( int k = batchStart, n = indexStart; k <= batchEnd - 3; k += 3, n++ ) { 364 if ( !facing[n] ) { 365 if ( R_LineIntersectsTriangleExpandedWithSphere( lineStart, lineEnd, lineDir, lineLength, radius, indexedVertsODS[k + 2].xyz, indexedVertsODS[k + 1].xyz, indexedVertsODS[k + 0].xyz ) ) { 366 *insideShadowVolume = true; 367 insideShadowVolume = NULL; 368 break; 369 } 370 } 371 } 372 } 373 } 374 375 return numFrontFacing; 376 377 #endif 378 } 379 380 /* 381 ===================== 382 CalculateTriangleFacingCulledSkinned 383 ===================== 384 */ 385 static int CalculateTriangleFacingCulledSkinned( byte * __restrict facing, byte * __restrict culled, idVec4 * __restrict tempVerts, const triIndex_t * __restrict indexes, int numIndexes, 386 const idDrawVert * __restrict verts, const int numVerts, const idJointMat * __restrict joints, 387 const idVec3 & lightOrigin, const idVec3 & viewOrigin, 388 bool cullShadowTrianglesToLight, const idRenderMatrix & lightProject, 389 bool * insideShadowVolume, const float radius ) { 390 assert_spu_local_store( facing ); 391 assert_spu_local_store( joints ); 392 assert_not_spu_local_store( indexes ); 393 assert_not_spu_local_store( verts ); 394 395 if ( insideShadowVolume != NULL ) { 396 *insideShadowVolume = false; 397 } 398 399 // calculate the start, end, dir and length of the line from the view origin to the light origin 400 const idVec3 lineStart = viewOrigin; 401 const idVec3 lineEnd = lightOrigin; 402 const idVec3 lineDelta = lineEnd - lineStart; 403 const float lineLengthSqr = lineDelta.LengthSqr(); 404 const float lineLengthRcp = idMath::InvSqrt( lineLengthSqr ); 405 const idVec3 lineDir = lineDelta * lineLengthRcp; 406 const float lineLength = lineLengthSqr * lineLengthRcp; 407 408 #ifdef ID_WIN_X86_SSE2_INTRIN 409 410 idODSStreamedArray< idDrawVert, 32, SBT_DOUBLE, 1 > vertsODS( verts, numVerts ); 411 412 for ( int i = 0; i < numVerts; ) { 413 414 const int nextNumVerts = vertsODS.FetchNextBatch() - 1; 415 416 for ( ; i <= nextNumVerts; i++ ) { 417 __m128 v = LoadSkinnedDrawVertPosition( vertsODS[i], joints ); 418 _mm_store_ps( tempVerts[i].ToFloatPtr(), v ); 419 } 420 } 421 422 idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 4 * 3 > indexesODS( indexes, numIndexes ); 423 424 const __m128 lightOriginX = _mm_splat_ps( _mm_load_ss( &lightOrigin.x ), 0 ); 425 const __m128 lightOriginY = _mm_splat_ps( _mm_load_ss( &lightOrigin.y ), 0 ); 426 const __m128 lightOriginZ = _mm_splat_ps( _mm_load_ss( &lightOrigin.z ), 0 ); 427 428 const __m128 lightProjectX = _mm_loadu_ps( lightProject[0] ); 429 const __m128 lightProjectY = _mm_loadu_ps( lightProject[1] ); 430 const __m128 lightProjectZ = _mm_loadu_ps( lightProject[2] ); 431 const __m128 lightProjectW = _mm_loadu_ps( lightProject[3] ); 432 433 const __m128i cullShadowTrianglesToLightMask = cullShadowTrianglesToLight ? vector_int_neg_one : vector_int_zero; 434 435 __m128i numFrontFacing = _mm_setzero_si128(); 436 437 for ( int i = 0, j = 0; i < numIndexes; ) { 438 439 const int batchStart = i; 440 const int batchEnd = indexesODS.FetchNextBatch(); 441 const int batchEnd4x = batchEnd - 4 * 3; 442 const int indexStart = j; 443 444 for ( ; i <= batchEnd4x; i += 4 * 3, j += 4 ) { 445 const int indexA0 = indexesODS[( i + 0 * 3 + 0 )]; 446 const int indexA1 = indexesODS[( i + 0 * 3 + 1 )]; 447 const int indexA2 = indexesODS[( i + 0 * 3 + 2 )]; 448 449 const int indexB0 = indexesODS[( i + 1 * 3 + 0 )]; 450 const int indexB1 = indexesODS[( i + 1 * 3 + 1 )]; 451 const int indexB2 = indexesODS[( i + 1 * 3 + 2 )]; 452 453 const int indexC0 = indexesODS[( i + 2 * 3 + 0 )]; 454 const int indexC1 = indexesODS[( i + 2 * 3 + 1 )]; 455 const int indexC2 = indexesODS[( i + 2 * 3 + 2 )]; 456 457 const int indexD0 = indexesODS[( i + 3 * 3 + 0 )]; 458 const int indexD1 = indexesODS[( i + 3 * 3 + 1 )]; 459 const int indexD2 = indexesODS[( i + 3 * 3 + 2 )]; 460 461 const __m128 vertA0 = _mm_load_ps( tempVerts[indexA0].ToFloatPtr() ); 462 const __m128 vertA1 = _mm_load_ps( tempVerts[indexA1].ToFloatPtr() ); 463 const __m128 vertA2 = _mm_load_ps( tempVerts[indexA2].ToFloatPtr() ); 464 465 const __m128 vertB0 = _mm_load_ps( tempVerts[indexB0].ToFloatPtr() ); 466 const __m128 vertB1 = _mm_load_ps( tempVerts[indexB1].ToFloatPtr() ); 467 const __m128 vertB2 = _mm_load_ps( tempVerts[indexB2].ToFloatPtr() ); 468 469 const __m128 vertC0 = _mm_load_ps( tempVerts[indexC0].ToFloatPtr() ); 470 const __m128 vertC1 = _mm_load_ps( tempVerts[indexC1].ToFloatPtr() ); 471 const __m128 vertC2 = _mm_load_ps( tempVerts[indexC2].ToFloatPtr() ); 472 473 const __m128 vertD0 = _mm_load_ps( tempVerts[indexD0].ToFloatPtr() ); 474 const __m128 vertD1 = _mm_load_ps( tempVerts[indexD1].ToFloatPtr() ); 475 const __m128 vertD2 = _mm_load_ps( tempVerts[indexD2].ToFloatPtr() ); 476 477 const __m128 r0X = _mm_unpacklo_ps( vertA0, vertC0 ); // vertA0.x, vertC0.x, vertA0.z, vertC0.z 478 const __m128 r0Y = _mm_unpackhi_ps( vertA0, vertC0 ); // vertA0.y, vertC0.y, vertA0.w, vertC0.w 479 const __m128 r0Z = _mm_unpacklo_ps( vertB0, vertD0 ); // vertB0.x, vertD0.x, vertB0.z, vertD0.z 480 const __m128 r0W = _mm_unpackhi_ps( vertB0, vertD0 ); // vertB0.y, vertD0.y, vertB0.w, vertD0.w 481 482 const __m128 vert0X = _mm_unpacklo_ps( r0X, r0Z ); // vertA0.x, vertB0.x, vertC0.x, vertD0.x 483 const __m128 vert0Y = _mm_unpackhi_ps( r0X, r0Z ); // vertA0.y, vertB0.y, vertC0.y, vertD0.y 484 const __m128 vert0Z = _mm_unpacklo_ps( r0Y, r0W ); // vertA0.z, vertB0.z, vertC0.z, vertD0.z 485 486 const __m128 r1X = _mm_unpacklo_ps( vertA1, vertC1 ); // vertA1.x, vertC1.x, vertA1.z, vertC1.z 487 const __m128 r1Y = _mm_unpackhi_ps( vertA1, vertC1 ); // vertA1.y, vertC1.y, vertA1.w, vertC1.w 488 const __m128 r1Z = _mm_unpacklo_ps( vertB1, vertD1 ); // vertB1.x, vertD1.x, vertB1.z, vertD1.z 489 const __m128 r1W = _mm_unpackhi_ps( vertB1, vertD1 ); // vertB1.y, vertD1.y, vertB1.w, vertD1.w 490 491 const __m128 vert1X = _mm_unpacklo_ps( r1X, r1Z ); // vertA1.x, vertB1.x, vertC1.x, vertD1.x 492 const __m128 vert1Y = _mm_unpackhi_ps( r1X, r1Z ); // vertA1.y, vertB1.y, vertC1.y, vertD1.y 493 const __m128 vert1Z = _mm_unpacklo_ps( r1Y, r1W ); // vertA1.z, vertB1.z, vertC1.z, vertD1.z 494 495 const __m128 r2X = _mm_unpacklo_ps( vertA2, vertC2 ); // vertA2.x, vertC2.x, vertA2.z, vertC2.z 496 const __m128 r2Y = _mm_unpackhi_ps( vertA2, vertC2 ); // vertA2.y, vertC2.y, vertA2.w, vertC2.w 497 const __m128 r2Z = _mm_unpacklo_ps( vertB2, vertD2 ); // vertB2.x, vertD2.x, vertB2.z, vertD2.z 498 const __m128 r2W = _mm_unpackhi_ps( vertB2, vertD2 ); // vertB2.y, vertD2.y, vertB2.w, vertD2.w 499 500 const __m128 vert2X = _mm_unpacklo_ps( r2X, r2Z ); // vertA2.x, vertB2.x, vertC2.x, vertD2.x 501 const __m128 vert2Y = _mm_unpackhi_ps( r2X, r2Z ); // vertA2.y, vertB2.y, vertC2.y, vertD2.y 502 const __m128 vert2Z = _mm_unpacklo_ps( r2Y, r2W ); // vertA2.z, vertB2.z, vertC2.z, vertD2.z 503 504 const __m128i triangleCulled = TriangleCulled_SSE2( vert0X, vert0Y, vert0Z, vert1X, vert1Y, vert1Z, vert2X, vert2Y, vert2Z, lightProjectX, lightProjectY, lightProjectZ, lightProjectW ); 505 506 __m128i triangleFacing = TriangleFacing_SSE2( vert0X, vert0Y, vert0Z, vert1X, vert1Y, vert1Z, vert2X, vert2Y, vert2Z, lightOriginX, lightOriginY, lightOriginZ ); 507 508 // optionally make triangles that are outside the light frustum facing so they do not contribute to the shadow volume 509 triangleFacing = _mm_or_si128( triangleFacing, _mm_and_si128( triangleCulled, cullShadowTrianglesToLightMask ) ); 510 511 // store culled 512 const __m128i culled_s = _mm_packs_epi32( triangleCulled, triangleCulled ); 513 const __m128i culled_b = _mm_packs_epi16( culled_s, culled_s ); 514 *(int *)&culled[j] = _mm_cvtsi128_si32( culled_b ); 515 516 // store facing 517 const __m128i facing_s = _mm_packs_epi32( triangleFacing, triangleFacing ); 518 const __m128i facing_b = _mm_packs_epi16( facing_s, facing_s ); 519 *(int *)&facing[j] = _mm_cvtsi128_si32( facing_b ); 520 521 // count the number of facing triangles 522 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_and_si128( triangleFacing, vector_int_one ) ); 523 } 524 525 if ( insideShadowVolume != NULL ) { 526 for ( int k = batchStart, n = indexStart; k <= batchEnd - 3; k += 3, n++ ) { 527 if ( !facing[n] ) { 528 const int i0 = indexesODS[k + 0]; 529 const int i1 = indexesODS[k + 1]; 530 const int i2 = indexesODS[k + 2]; 531 if ( R_LineIntersectsTriangleExpandedWithSphere( lineStart, lineEnd, lineDir, lineLength, radius, tempVerts[i2].ToVec3(), tempVerts[i1].ToVec3(), tempVerts[i0].ToVec3() ) ) { 532 *insideShadowVolume = true; 533 insideShadowVolume = NULL; 534 break; 535 } 536 } 537 } 538 } 539 } 540 541 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_shuffle_epi32( numFrontFacing, _MM_SHUFFLE( 1, 0, 3, 2 ) ) ); 542 numFrontFacing = _mm_add_epi32( numFrontFacing, _mm_shuffle_epi32( numFrontFacing, _MM_SHUFFLE( 2, 3, 0, 1 ) ) ); 543 544 return _mm_cvtsi128_si32( numFrontFacing ); 545 546 #else 547 548 idODSStreamedArray< idDrawVert, 32, SBT_DOUBLE, 1 > vertsODS( verts, numVerts ); 549 550 for ( int i = 0; i < numVerts; ) { 551 552 const int nextNumVerts = vertsODS.FetchNextBatch() - 1; 553 554 for ( ; i <= nextNumVerts; i++ ) { 555 tempVerts[i].ToVec3() = Scalar_LoadSkinnedDrawVertPosition( vertsODS[i], joints ); 556 tempVerts[i].w = 1.0f; 557 } 558 } 559 560 idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 1 > indexesODS( indexes, numIndexes ); 561 562 const byte cullShadowTrianglesToLightMask = cullShadowTrianglesToLight ? 255 : 0; 563 564 int numFrontFacing = 0; 565 566 for ( int i = 0, j = 0; i < numIndexes; ) { 567 568 const int batchStart = i; 569 const int batchEnd = indexesODS.FetchNextBatch(); 570 const int indexStart = j; 571 572 for ( ; i <= batchEnd - 3; i += 3, j++ ) { 573 const int i0 = indexesODS[i + 0]; 574 const int i1 = indexesODS[i + 1]; 575 const int i2 = indexesODS[i + 2]; 576 577 const idVec3 & v1 = tempVerts[i0].ToVec3(); 578 const idVec3 & v2 = tempVerts[i1].ToVec3(); 579 const idVec3 & v3 = tempVerts[i2].ToVec3(); 580 581 const byte triangleCulled = TriangleCulled_Generic( v1, v2, v3, lightProject ); 582 583 byte triangleFacing = TriangleFacing_Generic( v1, v2, v3, lightOrigin ); 584 585 // optionally make triangles that are outside the light frustum facing so they do not contribute to the shadow volume 586 triangleFacing |= ( triangleCulled & cullShadowTrianglesToLightMask ); 587 588 culled[j] = triangleCulled; 589 facing[j] = triangleFacing; 590 591 // count the number of facing triangles 592 numFrontFacing += ( triangleFacing & 1 ); 593 } 594 595 if ( insideShadowVolume != NULL ) { 596 for ( int k = batchStart, n = indexStart; k <= batchEnd - 3; k += 3, n++ ) { 597 if ( !facing[n] ) { 598 const int i0 = indexesODS[k + 0]; 599 const int i1 = indexesODS[k + 1]; 600 const int i2 = indexesODS[k + 2]; 601 if ( R_LineIntersectsTriangleExpandedWithSphere( lineStart, lineEnd, lineDir, lineLength, radius, tempVerts[i2].ToVec3(), tempVerts[i1].ToVec3(), tempVerts[i0].ToVec3() ) ) { 602 *insideShadowVolume = true; 603 insideShadowVolume = NULL; 604 break; 605 } 606 } 607 } 608 } 609 } 610 611 return numFrontFacing; 612 613 #endif 614 } 615 616 /* 617 ============ 618 StreamOut 619 ============ 620 */ 621 static void StreamOut( void * dst, const void * src, int numBytes ) { 622 numBytes = ( numBytes + 15 ) & ~15; 623 assert_16_byte_aligned( dst ); 624 assert_16_byte_aligned( src ); 625 626 #ifdef ID_WIN_X86_SSE2_INTRIN 627 int i = 0; 628 for ( ; i + 128 <= numBytes; i += 128 ) { 629 __m128i d0 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 0*16 ) ); 630 __m128i d1 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 1*16 ) ); 631 __m128i d2 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 2*16 ) ); 632 __m128i d3 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 3*16 ) ); 633 __m128i d4 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 4*16 ) ); 634 __m128i d5 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 5*16 ) ); 635 __m128i d6 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 6*16 ) ); 636 __m128i d7 = _mm_load_si128( (const __m128i *)( (byte *)src + i + 7*16 ) ); 637 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 0*16 ), d0 ); 638 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 1*16 ), d1 ); 639 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 2*16 ), d2 ); 640 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 3*16 ), d3 ); 641 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 4*16 ), d4 ); 642 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 5*16 ), d5 ); 643 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 6*16 ), d6 ); 644 _mm_stream_si128( (__m128i *)( (byte *)dst + i + 7*16 ), d7 ); 645 } 646 for ( ; i + 16 <= numBytes; i += 16 ) { 647 __m128i d = _mm_load_si128( (__m128i *)( (byte *)src + i ) ); 648 _mm_stream_si128( (__m128i *)( (byte *)dst + i ), d ); 649 } 650 #else 651 memcpy( dst, src, numBytes ); 652 #endif 653 } 654 655 /* 656 ============ 657 R_CreateShadowVolumeTriangles 658 ============ 659 */ 660 static void R_CreateShadowVolumeTriangles( triIndex_t *__restrict shadowIndices, triIndex_t *__restrict indexBuffer, int & numShadowIndexesTotal, 661 const byte *__restrict facing, const silEdge_t *__restrict silEdges, const int numSilEdges, 662 const triIndex_t *__restrict indexes, const int numIndexes, const bool includeCaps ) { 663 assert_spu_local_store( facing ); 664 assert_not_spu_local_store( shadowIndices ); 665 assert_not_spu_local_store( silEdges ); 666 assert_not_spu_local_store( indexes ); 667 668 #if 1 669 670 const int IN_BUFFER_SIZE = 64; 671 const int OUT_BUFFER_SIZE = IN_BUFFER_SIZE * 8; // each silhouette edge or cap triangle may create 6 indices (8 > 6) 672 const int OUT_BUFFER_DEPTH = 4; // quad buffer to allow overlapped output streaming 673 const int OUT_BUFFER_MASK = ( OUT_BUFFER_SIZE * OUT_BUFFER_DEPTH - 1 ); 674 675 compile_time_assert( OUT_BUFFER_SIZE * OUT_BUFFER_DEPTH * sizeof( triIndex_t ) == OUTPUT_INDEX_BUFFER_SIZE ); 676 assert_16_byte_aligned( indexBuffer ); 677 678 int numShadowIndices = 0; 679 int numStreamedIndices = 0; 680 681 { 682 idODSStreamedArray< silEdge_t, IN_BUFFER_SIZE, SBT_DOUBLE, 1 > silEdgesODS( silEdges, numSilEdges ); 683 684 for ( int i = 0; i < numSilEdges; ) { 685 686 const int nextNumSilEdges = silEdgesODS.FetchNextBatch(); 687 688 // NOTE: we rely on FetchNextBatch() to wait for all previous DMAs to complete 689 while( numShadowIndices - numStreamedIndices >= OUT_BUFFER_SIZE ) { 690 StreamOut( shadowIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], OUT_BUFFER_SIZE * sizeof( triIndex_t ) ); 691 numStreamedIndices += OUT_BUFFER_SIZE; 692 } 693 694 for ( ; i + 4 <= nextNumSilEdges; i += 4 ) { 695 const silEdge_t & sil0 = silEdgesODS[i + 0]; 696 const silEdge_t & sil1 = silEdgesODS[i + 1]; 697 const silEdge_t & sil2 = silEdgesODS[i + 2]; 698 const silEdge_t & sil3 = silEdgesODS[i + 3]; 699 700 { 701 const byte f1a = facing[sil0.p1]; 702 const byte f2a = facing[sil0.p2]; 703 const byte ta = ( f1a ^ f2a ) & 6; 704 const triIndex_t v1a = sil0.v1 << 1; 705 const triIndex_t v2a = sil0.v2 << 1; 706 707 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], v1a ^ ( 0 & 1 ), v2a ^ ( f1a & 1 ) ); 708 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], v2a ^ ( f2a & 1 ), v1a ^ ( f2a & 1 ) ); 709 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], v1a ^ ( f1a & 1 ), v2a ^ ( 1 & 1 ) ); 710 711 numShadowIndices += ta; 712 } 713 714 { 715 const byte f1b = facing[sil1.p1]; 716 const byte f2b = facing[sil1.p2]; 717 const byte tb = ( f1b ^ f2b ) & 6; 718 const triIndex_t v1b = sil1.v1 << 1; 719 const triIndex_t v2b = sil1.v2 << 1; 720 721 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], v1b ^ ( 0 & 1 ), v2b ^ ( f1b & 1 ) ); 722 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], v2b ^ ( f2b & 1 ), v1b ^ ( f2b & 1 ) ); 723 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], v1b ^ ( f1b & 1 ), v2b ^ ( 1 & 1 ) ); 724 725 numShadowIndices += tb; 726 } 727 728 { 729 const byte f1c = facing[sil2.p1]; 730 const byte f2c = facing[sil2.p2]; 731 const byte tc = ( f1c ^ f2c ) & 6; 732 const triIndex_t v1c = sil2.v1 << 1; 733 const triIndex_t v2c = sil2.v2 << 1; 734 735 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], v1c ^ ( 0 & 1 ), v2c ^ ( f1c & 1 ) ); 736 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], v2c ^ ( f2c & 1 ), v1c ^ ( f2c & 1 ) ); 737 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], v1c ^ ( f1c & 1 ), v2c ^ ( 1 & 1 ) ); 738 739 numShadowIndices += tc; 740 } 741 742 { 743 const byte f1d = facing[sil3.p1]; 744 const byte f2d = facing[sil3.p2]; 745 const byte td = ( f1d ^ f2d ) & 6; 746 const triIndex_t v1d = sil3.v1 << 1; 747 const triIndex_t v2d = sil3.v2 << 1; 748 749 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], v1d ^ ( 0 & 1 ), v2d ^ ( f1d & 1 ) ); 750 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], v2d ^ ( f2d & 1 ), v1d ^ ( f2d & 1 ) ); 751 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], v1d ^ ( f1d & 1 ), v2d ^ ( 1 & 1 ) ); 752 753 numShadowIndices += td; 754 } 755 } 756 for ( ; i + 1 <= nextNumSilEdges; i++ ) { 757 const silEdge_t & sil = silEdgesODS[i]; 758 759 const byte f1 = facing[sil.p1]; 760 const byte f2 = facing[sil.p2]; 761 const byte t = ( f1 ^ f2 ) & 6; 762 const triIndex_t v1 = sil.v1 << 1; 763 const triIndex_t v2 = sil.v2 << 1; 764 765 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], v1 ^ ( 0 & 1 ), v2 ^ ( f1 & 1 ) ); 766 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], v2 ^ ( f2 & 1 ), v1 ^ ( f2 & 1 ) ); 767 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], v1 ^ ( f1 & 1 ), v2 ^ ( 1 & 1 ) ); 768 769 numShadowIndices += t; 770 } 771 } 772 } 773 774 if ( includeCaps ) { 775 idODSStreamedArray< triIndex_t, IN_BUFFER_SIZE, SBT_QUAD, 1 > indexesODS( indexes, numIndexes ); 776 777 for ( int i = 0, j = 0; i < numIndexes; ) { 778 779 const int nextNumIndexes = indexesODS.FetchNextBatch(); 780 781 // NOTE: we rely on FetchNextBatch() to wait for all previous DMAs to complete 782 while( numShadowIndices - numStreamedIndices >= OUT_BUFFER_SIZE ) { 783 StreamOut( shadowIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], OUT_BUFFER_SIZE * sizeof( triIndex_t ) ); 784 numStreamedIndices += OUT_BUFFER_SIZE; 785 } 786 787 for ( ; i + 4 * 3 <= nextNumIndexes; i += 4 * 3, j += 4 ) { 788 const byte ta = ~facing[j + 0] & 6; 789 const byte tb = ~facing[j + 1] & 6; 790 const byte tc = ~facing[j + 2] & 6; 791 const byte td = ~facing[j + 3] & 6; 792 793 const triIndex_t i0a = indexesODS[i + 0 * 3 + 0] << 1; 794 const triIndex_t i1a = indexesODS[i + 0 * 3 + 1] << 1; 795 const triIndex_t i2a = indexesODS[i + 0 * 3 + 2] << 1; 796 797 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], i2a + 0, i1a + 0 ); 798 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], i0a + 0, i0a + 1 ); 799 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], i1a + 1, i2a + 1 ); 800 801 numShadowIndices += ta; 802 803 const triIndex_t i0b = indexesODS[i + 1 * 3 + 0] << 1; 804 const triIndex_t i1b = indexesODS[i + 1 * 3 + 1] << 1; 805 const triIndex_t i2b = indexesODS[i + 1 * 3 + 2] << 1; 806 807 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], i2b + 0, i1b + 0 ); 808 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], i0b + 0, i0b + 1 ); 809 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], i1b + 1, i2b + 1 ); 810 811 numShadowIndices += tb; 812 813 const triIndex_t i0c = indexesODS[i + 2 * 3 + 0] << 1; 814 const triIndex_t i1c = indexesODS[i + 2 * 3 + 1] << 1; 815 const triIndex_t i2c = indexesODS[i + 2 * 3 + 2] << 1; 816 817 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], i2c + 0, i1c + 0 ); 818 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], i0c + 0, i0c + 1 ); 819 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], i1c + 1, i2c + 1 ); 820 821 numShadowIndices += tc; 822 823 const triIndex_t i0d = indexesODS[i + 3 * 3 + 0] << 1; 824 const triIndex_t i1d = indexesODS[i + 3 * 3 + 1] << 1; 825 const triIndex_t i2d = indexesODS[i + 3 * 3 + 2] << 1; 826 827 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], i2d + 0, i1d + 0 ); 828 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], i0d + 0, i0d + 1 ); 829 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], i1d + 1, i2d + 1 ); 830 831 numShadowIndices += td; 832 } 833 834 for ( ; i + 3 <= nextNumIndexes; i += 3, j++ ) { 835 const byte t = ~facing[j] & 6; 836 837 const triIndex_t i0 = indexesODS[i + 0] << 1; 838 const triIndex_t i1 = indexesODS[i + 1] << 1; 839 const triIndex_t i2 = indexesODS[i + 2] << 1; 840 841 WriteIndexPair( &indexBuffer[( numShadowIndices + 0 ) & OUT_BUFFER_MASK], i2 + 0, i1 + 0 ); 842 WriteIndexPair( &indexBuffer[( numShadowIndices + 2 ) & OUT_BUFFER_MASK], i0 + 0, i0 + 1 ); 843 WriteIndexPair( &indexBuffer[( numShadowIndices + 4 ) & OUT_BUFFER_MASK], i1 + 1, i2 + 1 ); 844 845 numShadowIndices += t; 846 } 847 } 848 } 849 850 while( numShadowIndices - numStreamedIndices >= OUT_BUFFER_SIZE ) { 851 StreamOut( shadowIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], OUT_BUFFER_SIZE * sizeof( triIndex_t ) ); 852 numStreamedIndices += OUT_BUFFER_SIZE; 853 } 854 if ( numShadowIndices > numStreamedIndices ) { 855 assert( numShadowIndices - numStreamedIndices < OUT_BUFFER_SIZE ); 856 StreamOut( shadowIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], ( numShadowIndices - numStreamedIndices ) * sizeof( triIndex_t ) ); 857 } 858 859 numShadowIndexesTotal = numShadowIndices; 860 861 #if defined( ID_WIN_X86_SSE2_INTRIN ) 862 _mm_sfence(); 863 #endif 864 865 #else // NOTE: this code will not work on the SPU because it tries to write directly to the destination 866 867 triIndex_t * shadowIndexPtr = shadowIndices; 868 869 { 870 idODSStreamedArray< silEdge_t, 128, SBT_DOUBLE, 1 > silEdgesODS( silEdges, numSilEdges ); 871 872 for ( int i = 0; i < numSilEdges; ) { 873 874 const int nextNumSilEdges = silEdgesODS.FetchNextBatch() - 1; 875 876 for ( ; i <= nextNumSilEdges; i++ ) { 877 const silEdge_t & sil = silEdgesODS[i]; 878 879 const byte f1 = facing[sil.p1] & 1; 880 const byte f2 = facing[sil.p2] & 1; 881 882 if ( ( f1 ^ f2 ) == 0 ) { 883 continue; 884 } 885 886 const triIndex_t v1 = sil.v1 << 1; 887 const triIndex_t v2 = sil.v2 << 1; 888 889 // set the two triangle winding orders based on facing 890 // without using a poorly-predictable branch 891 #if 1 892 // only write dwords to write combined memory 893 WriteIndexPair( shadowIndexPtr + 0, v1 ^ 0, v2 ^ f1 ); 894 WriteIndexPair( shadowIndexPtr + 2, v2 ^ f2, v1 ^ f2 ); 895 WriteIndexPair( shadowIndexPtr + 4, v1 ^ f1, v2 ^ 1 ); 896 #else 897 shadowIndexPtr[0] == v1; 898 shadowIndexPtr[1] == v2 ^ f1; 899 shadowIndexPtr[2] == v2 ^ f2; 900 shadowIndexPtr[3] == v1 ^ f2; 901 shadowIndexPtr[4] == v1 ^ f1; 902 shadowIndexPtr[5] == v2 ^ 1; 903 #endif 904 shadowIndexPtr += 6; 905 } 906 } 907 } 908 909 if ( includeCaps ) { 910 idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 1 > indexesODS( indexes, numIndexes ); 911 912 for ( int i = 0, j = 0; i < numIndexes; ) { 913 914 const int nextNumIndexes = indexesODS.FetchNextBatch() - 3; 915 916 for ( ; i <= nextNumIndexes; i += 3, j++ ) { 917 if ( facing[j] ) { 918 continue; 919 } 920 921 const triIndex_t i0 = indexesODS[i + 0] << 1; 922 const triIndex_t i1 = indexesODS[i + 1] << 1; 923 const triIndex_t i2 = indexesODS[i + 2] << 1; 924 #if 1 925 // only write dwords to write combined memory 926 WriteIndexPair( shadowIndexPtr + 0, i2 + 0, i1 + 0 ); 927 WriteIndexPair( shadowIndexPtr + 2, i0 + 0, i0 + 1 ); 928 WriteIndexPair( shadowIndexPtr + 4, i1 + 1, i2 + 1 ); 929 #else 930 shadowIndexPtr[0] = i2; 931 shadowIndexPtr[1] = i1; 932 shadowIndexPtr[2] = i0; 933 shadowIndexPtr[3] = i0 + 1; 934 shadowIndexPtr[4] = i1 + 1; 935 shadowIndexPtr[5] = i2 + 1; 936 #endif 937 shadowIndexPtr += 6; 938 } 939 } 940 } 941 942 numShadowIndexesTotal = shadowIndexPtr - shadowIndices; 943 944 #endif 945 } 946 947 /* 948 ===================== 949 R_CreateLightTriangles 950 ===================== 951 */ 952 void R_CreateLightTriangles( triIndex_t * __restrict lightIndices, triIndex_t * __restrict indexBuffer, int & numLightIndicesTotal, 953 const byte * __restrict culled, const triIndex_t * __restrict indexes, const int numIndexes ) { 954 assert_spu_local_store( culled ); 955 assert_not_spu_local_store( lightIndices ); 956 assert_not_spu_local_store( indexes ); 957 958 #if 1 959 960 const int IN_BUFFER_SIZE = 256; 961 const int OUT_BUFFER_SIZE = IN_BUFFER_SIZE * 2; // there are never more indices generated than the original indices 962 const int OUT_BUFFER_DEPTH = 4; // quad buffer to allow overlapped output streaming 963 const int OUT_BUFFER_MASK = ( OUT_BUFFER_SIZE * OUT_BUFFER_DEPTH - 1 ); 964 965 compile_time_assert( OUT_BUFFER_SIZE * OUT_BUFFER_DEPTH * sizeof( triIndex_t ) == OUTPUT_INDEX_BUFFER_SIZE ); 966 assert_16_byte_aligned( indexBuffer ); 967 968 int numLightIndices = 0; 969 int numStreamedIndices = 0; 970 971 idODSStreamedArray< triIndex_t, IN_BUFFER_SIZE, SBT_QUAD, 1 > indexesODS( indexes, numIndexes ); 972 973 for ( int i = 0, j = 0; i < numIndexes; ) { 974 975 const int nextNumIndexes = indexesODS.FetchNextBatch(); 976 977 // NOTE: we rely on FetchNextBatch() to wait for all previous DMAs to complete 978 while( numLightIndices - numStreamedIndices >= OUT_BUFFER_SIZE ) { 979 StreamOut( lightIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], OUT_BUFFER_SIZE * sizeof( triIndex_t ) ); 980 numStreamedIndices += OUT_BUFFER_SIZE; 981 } 982 983 for ( ; i + 4 * 3 <= nextNumIndexes; i += 4 * 3, j += 4 ) { 984 const byte ta = ~culled[j + 0] & 3; 985 const byte tb = ~culled[j + 1] & 3; 986 const byte tc = ~culled[j + 2] & 3; 987 const byte td = ~culled[j + 3] & 3; 988 989 indexBuffer[( numLightIndices + 0 ) & OUT_BUFFER_MASK] = indexesODS[i + 0 * 3 + 0]; 990 indexBuffer[( numLightIndices + 1 ) & OUT_BUFFER_MASK] = indexesODS[i + 0 * 3 + 1]; 991 indexBuffer[( numLightIndices + 2 ) & OUT_BUFFER_MASK] = indexesODS[i + 0 * 3 + 2]; 992 993 numLightIndices += ta; 994 995 indexBuffer[( numLightIndices + 0 ) & OUT_BUFFER_MASK] = indexesODS[i + 1 * 3 + 0]; 996 indexBuffer[( numLightIndices + 1 ) & OUT_BUFFER_MASK] = indexesODS[i + 1 * 3 + 1]; 997 indexBuffer[( numLightIndices + 2 ) & OUT_BUFFER_MASK] = indexesODS[i + 1 * 3 + 2]; 998 999 numLightIndices += tb; 1000 1001 indexBuffer[( numLightIndices + 0 ) & OUT_BUFFER_MASK] = indexesODS[i + 2 * 3 + 0]; 1002 indexBuffer[( numLightIndices + 1 ) & OUT_BUFFER_MASK] = indexesODS[i + 2 * 3 + 1]; 1003 indexBuffer[( numLightIndices + 2 ) & OUT_BUFFER_MASK] = indexesODS[i + 2 * 3 + 2]; 1004 1005 numLightIndices += tc; 1006 1007 indexBuffer[( numLightIndices + 0 ) & OUT_BUFFER_MASK] = indexesODS[i + 3 * 3 + 0]; 1008 indexBuffer[( numLightIndices + 1 ) & OUT_BUFFER_MASK] = indexesODS[i + 3 * 3 + 1]; 1009 indexBuffer[( numLightIndices + 2 ) & OUT_BUFFER_MASK] = indexesODS[i + 3 * 3 + 2]; 1010 1011 numLightIndices += td; 1012 } 1013 1014 for ( ; i + 3 <= nextNumIndexes; i += 3, j++ ) { 1015 const byte t = ~culled[j] & 3; 1016 1017 indexBuffer[( numLightIndices + 0 ) & OUT_BUFFER_MASK] = indexesODS[i + 0]; 1018 indexBuffer[( numLightIndices + 1 ) & OUT_BUFFER_MASK] = indexesODS[i + 1]; 1019 indexBuffer[( numLightIndices + 2 ) & OUT_BUFFER_MASK] = indexesODS[i + 2]; 1020 1021 numLightIndices += t; 1022 } 1023 } 1024 1025 while( numLightIndices - numStreamedIndices >= OUT_BUFFER_SIZE ) { 1026 StreamOut( lightIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], OUT_BUFFER_SIZE * sizeof( triIndex_t ) ); 1027 numStreamedIndices += OUT_BUFFER_SIZE; 1028 } 1029 if ( numLightIndices > numStreamedIndices ) { 1030 assert( numLightIndices - numStreamedIndices < OUT_BUFFER_SIZE ); 1031 StreamOut( lightIndices + numStreamedIndices, & indexBuffer[numStreamedIndices & OUT_BUFFER_MASK], ( numLightIndices - numStreamedIndices ) * sizeof( triIndex_t ) ); 1032 } 1033 1034 numLightIndicesTotal = numLightIndices; 1035 1036 #if defined( ID_WIN_X86_SSE2_INTRIN ) 1037 _mm_sfence(); 1038 #endif 1039 1040 #else // NOTE: this code will not work on the SPU because it tries to write directly to the destination 1041 1042 int numLightIndices = 0; 1043 1044 idODSStreamedArray< triIndex_t, 256, SBT_QUAD, 1 > indexesODS( indexes, numIndexes ); 1045 1046 for ( int i = 0, j = 0; i < numIndexes; ) { 1047 1048 const int nextNumIndexes = indexesODS.FetchNextBatch() - 3; 1049 1050 for ( ; i <= nextNumIndexes; i += 3, j++ ) { 1051 if ( culled[j] ) { 1052 continue; 1053 } 1054 1055 lightIndices[numLightIndices + 0] = indexesODS[i + 0]; 1056 lightIndices[numLightIndices + 1] = indexesODS[i + 1]; 1057 lightIndices[numLightIndices + 2] = indexesODS[i + 2]; 1058 1059 numLightIndices += 3; 1060 } 1061 } 1062 1063 numLightIndicesTotal = numLightIndices; 1064 1065 #endif 1066 } 1067 1068 /* 1069 ===================== 1070 DynamicShadowVolumeJob 1071 1072 Creates shadow volume indices for a surface that intersects a light. 1073 Optionally also creates new surface indices with just the triangles 1074 inside the light volume. These indices will be unique for a given 1075 light / surface combination. 1076 1077 The shadow volume indices are created using the original surface vertices. 1078 However, the indices are setup to be used with a shadow volume vertex buffer 1079 with all vertices duplicated where the even vertices have the same positions 1080 as the surface vertices (at the near cap) and each odd vertex has the 1081 same position as the previous even vertex but is projected to infinity 1082 (the far cap) in the vertex program. 1083 ===================== 1084 */ 1085 void DynamicShadowVolumeJob( const dynamicShadowVolumeParms_t * parms ) { 1086 if ( parms->tempFacing == NULL ) { 1087 *const_cast< byte ** >( &parms->tempFacing ) = (byte *)_alloca16( TEMP_FACING( parms->numIndexes ) ); 1088 } 1089 if ( parms->tempCulled == NULL ) { 1090 *const_cast< byte ** >( &parms->tempCulled ) = (byte *)_alloca16( TEMP_CULL( parms->numIndexes ) ); 1091 } 1092 if ( parms->tempVerts == NULL && parms->joints != NULL ) { 1093 *const_cast< idVec4 ** >( &parms->tempVerts ) = (idVec4 *)_alloca16( TEMP_VERTS( parms->numVerts ) ); 1094 } 1095 if ( parms->indexBuffer == NULL ) { 1096 *const_cast< triIndex_t ** >( &parms->indexBuffer ) = (triIndex_t *)_alloca16( OUTPUT_INDEX_BUFFER_SIZE ); 1097 } 1098 1099 assert( parms->joints == NULL || parms->numJoints > 0 ); 1100 1101 // Calculate the shadow depth bounds. 1102 float shadowZMin = parms->lightZMin; 1103 float shadowZMax = parms->lightZMax; 1104 if ( parms->useShadowDepthBounds ) { 1105 idRenderMatrix::DepthBoundsForShadowBounds( shadowZMin, shadowZMax, parms->triangleMVP, parms->triangleBounds, parms->localLightOrigin, true ); 1106 shadowZMin = Max( shadowZMin, parms->lightZMin ); 1107 shadowZMax = Min( shadowZMax, parms->lightZMax ); 1108 } 1109 1110 bool renderZFail = false; 1111 int numShadowIndices = 0; 1112 int numLightIndices = 0; 1113 1114 // The shadow volume may be depth culled if either the shadow volume was culled to the view frustum or if the 1115 // depth range of the visible part of the shadow volume is outside the depth range of the light volume. 1116 if ( shadowZMin < shadowZMax ) { 1117 1118 // Check if we need to render the shadow volume with Z-fail. 1119 bool * preciseInsideShadowVolume = NULL; 1120 // If the view is potentially inside the shadow volume bounds we may need to render with Z-fail. 1121 if ( R_ViewPotentiallyInsideInfiniteShadowVolume( parms->triangleBounds, parms->localLightOrigin, parms->localViewOrigin, parms->zNear * INSIDE_SHADOW_VOLUME_EXTRA_STRETCH ) ) { 1122 // Optionally perform a more precise test to see whether or not the view is inside the shadow volume. 1123 if ( parms->useShadowPreciseInsideTest ) { 1124 preciseInsideShadowVolume = & renderZFail; 1125 } else { 1126 renderZFail = true; 1127 } 1128 } 1129 1130 // Calculate the facing of each triangle and cull each triangle to the light volume. 1131 // Optionally also calculate more precisely whether or not the view is inside the shadow volume. 1132 int numFrontFacing = 0; 1133 if ( parms->joints != NULL ) { 1134 numFrontFacing = CalculateTriangleFacingCulledSkinned( parms->tempFacing, parms->tempCulled, parms->tempVerts, parms->indexes, parms->numIndexes, 1135 parms->verts, parms->numVerts, parms->joints, 1136 parms->localLightOrigin, parms->localViewOrigin, 1137 parms->cullShadowTrianglesToLight, parms->localLightProject, 1138 preciseInsideShadowVolume, parms->zNear * INSIDE_SHADOW_VOLUME_EXTRA_STRETCH ); 1139 } else { 1140 numFrontFacing = CalculateTriangleFacingCulledStatic( parms->tempFacing, parms->tempCulled, parms->indexes, parms->numIndexes, 1141 parms->verts, parms->numVerts, 1142 parms->localLightOrigin, parms->localViewOrigin, 1143 parms->cullShadowTrianglesToLight, parms->localLightProject, 1144 preciseInsideShadowVolume, parms->zNear * INSIDE_SHADOW_VOLUME_EXTRA_STRETCH ); 1145 } 1146 1147 // Create shadow volume indices. 1148 if ( parms->shadowIndices != NULL ) { 1149 const int numTriangles = parms->numIndexes / 3; 1150 1151 // If there are any triangles facing away from the light. 1152 if ( numTriangles - numFrontFacing > 0 ) { 1153 // Set the "fake triangle" used by dangling edges to facing so a dangling edge will 1154 // make a silhouette if the triangle that uses the dangling edges is not facing. 1155 // Note that dangling edges outside the light frustum do not make silhouettes because 1156 // a triangle outside the light frustum is also set to facing just like the "fake triangle" 1157 // used by a dangling edge. 1158 parms->tempFacing[numTriangles] = 255; 1159 1160 // Check if we can avoid rendering the shadow volume caps. 1161 bool renderShadowCaps = parms->forceShadowCaps || renderZFail; 1162 1163 // Create new triangles along the silhouette planes and optionally add end-cap triangles on the model and on the distant projection. 1164 R_CreateShadowVolumeTriangles( parms->shadowIndices, parms->indexBuffer, numShadowIndices, parms->tempFacing, 1165 parms->silEdges, parms->numSilEdges, parms->indexes, parms->numIndexes, renderShadowCaps ); 1166 1167 assert( numShadowIndices <= parms->maxShadowIndices ); 1168 } 1169 } 1170 1171 // Create new indices with only the triangles that are inside the light volume. 1172 if ( parms->lightIndices != NULL ) { 1173 R_CreateLightTriangles( parms->lightIndices, parms->indexBuffer, numLightIndices, parms->tempCulled, parms->indexes, parms->numIndexes ); 1174 1175 assert( numLightIndices <= parms->maxLightIndices ); 1176 } 1177 } 1178 1179 // write out the number of shadow indices 1180 if ( parms->numShadowIndices != NULL ) { 1181 *parms->numShadowIndices = numShadowIndices; 1182 } 1183 // write out the number of light indices 1184 if ( parms->numLightIndices != NULL ) { 1185 *parms->numLightIndices = numLightIndices; 1186 } 1187 // write out whether or not the shadow volume needs to be rendered with Z-Fail 1188 if ( parms->renderZFail != NULL ) { 1189 *parms->renderZFail = renderZFail; 1190 } 1191 // write out the shadow depth bounds 1192 if ( parms->shadowZMin != NULL ) { 1193 *parms->shadowZMin = shadowZMin; 1194 } 1195 if ( parms->shadowZMax != NULL ) { 1196 *parms->shadowZMax = shadowZMax; 1197 } 1198 // write out the shadow volume state 1199 if ( parms->shadowVolumeState != NULL ) { 1200 *parms->shadowVolumeState = SHADOWVOLUME_DONE; 1201 } 1202 } 1203 1204 REGISTER_PARALLEL_JOB( DynamicShadowVolumeJob, "DynamicShadowVolumeJob" );