DXTEncoder.cpp - DOOM-3-BFG - DOOM 3 BFG Edition

DXTEncoder.cpp (135362B)
      1 /*
      2 ===========================================================================
      3 
      4 Doom 3 BFG Edition GPL Source Code
      5 Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. 
      6 
      7 This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").  
      8 
      9 Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
     10 it under the terms of the GNU General Public License as published by
     11 the Free Software Foundation, either version 3 of the License, or
     12 (at your option) any later version.
     13 
     14 Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
     15 but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 GNU General Public License for more details.
     18 
     19 You should have received a copy of the GNU General Public License
     20 along with Doom 3 BFG Edition Source Code.  If not, see <http://www.gnu.org/licenses/>.
     21 
     22 In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code.  If not, please request a copy in writing from id Software at the address below.
     23 
     24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
     25 
     26 ===========================================================================
     27 */
     28 /*
     29 ================================================================================================
     30 Contains the DxtEncoder implementation.
     31 ================================================================================================
     32 */
     33 
     34 #pragma hdrstop
     35 #include "DXTCodec_local.h"
     36 #include "DXTCodec.h"
     37 
     38 #define INSET_COLOR_SHIFT		4		// inset the bounding box with ( range >> shift )
     39 #define INSET_ALPHA_SHIFT		5		// inset alpha channel
     40 
     41 #define C565_5_MASK				0xF8	// 0xFF minus last three bits
     42 #define C565_6_MASK				0xFC	// 0xFF minus last two bits
     43 
     44 #define NVIDIA_7X_HARDWARE_BUG_FIX		// keep the DXT5 colors sorted as: max, min
     45 
     46 typedef uint16	word;
     47 typedef uint32	dword;
     48 
     49 /*
     50 ========================
     51 idDxtEncoder::NV4XHardwareBugFix
     52 ========================
     53 */
     54 void idDxtEncoder::NV4XHardwareBugFix( byte *minColor, byte *maxColor ) const {
     55 #ifdef ID_WIN_X86_ASM
     56 	int minq = ( ( minColor[0] << 16 ) | ( minColor[1] << 8 ) | minColor[2] ) & 0x00F8FCF8;
     57 	int maxq = ( ( maxColor[0] << 16 ) | ( maxColor[1] << 8 ) | maxColor[2] ) & 0x00F8FCF8;
     58 	int mask = -( minq > maxq ) & 0x00FFFFFF;
     59 	int min = *(int *)minColor;
     60 	int max = *(int *)maxColor;
     61 	min ^= max;
     62 	max ^= ( min & mask );
     63 	min ^= max;
     64 	*(int *)minColor = min;
     65 	*(int *)maxColor = max;
     66 #else
     67 	if ( ColorTo565( minColor ) > ColorTo565( maxColor ) ) {
     68 		SwapValues( minColor[0], maxColor[0] );
     69 		SwapValues( minColor[1], maxColor[1] );
     70 		SwapValues( minColor[2], maxColor[2] );
     71 	}
     72 #endif
     73 }
     74 
     75 /*
     76 ========================
     77 idDxtEncoder::HasConstantValuePer4x4Block
     78 ========================
     79 */
     80 bool idDxtEncoder::HasConstantValuePer4x4Block( const byte *inBuf, int width, int height, int channel ) const {
     81 	if ( width < 4 || height < 4 ) {
     82 		byte value = inBuf[channel];
     83 		for ( int k = 0; k < height; k++ ) {
     84 			for ( int l = 0; l < width; l++ ) {
     85 				if ( inBuf[(k*width+l)*4+channel] != value ) {
     86 					return false;
     87 				}
     88 			}
     89 		}
     90 		return true;
     91 	}
     92 
     93 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
     94 		for ( int i = 0; i < width; i += 4 ) {
     95 			const byte *inPtr = inBuf + i * 4;
     96 			byte value = inPtr[channel];
     97 			for ( int k = 0; k < 4; k++ ) {
     98 				for ( int l = 0; l < 4; l++ ) {
     99 					if ( inPtr[(k*width+l)*4+channel] != value ) {
    100 						return false;
    101 					}
    102 				}
    103 			}
    104 		}
    105 		inBuf += srcPadding;
    106 	}
    107 	return true;
    108 }
    109 
    110 /*
    111 ========================
    112 idDxtEncoder::WriteTinyColorDXT1
    113 ========================
    114 */
    115 void idDxtEncoder::WriteTinyColorDXT1( const byte *inBuf, int width, int height ) {
    116 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    117 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    118 	// example: 2x8 pixels
    119 	// numBlocks = 2
    120 	// stride = 32 bytes (8 pixels)
    121 
    122 	for ( int i = 0; i < numBlocks; i++ ) {
    123 		// FIXME: This just emits a fake block based on the color at position 0,0
    124 		EmitUShort( ColorTo565( inBuf ) );
    125 		EmitUShort( 0 );	// dummy, never used
    126 		EmitUInt( 0 );		// 4 color index bytes all use the first color
    127 
    128 		inBuf += stride;
    129 	}
    130 }
    131 
    132 /*
    133 ========================
    134 idDxtEncoder::WriteTinyColorDXT5
    135 ========================
    136 */
    137 void idDxtEncoder::WriteTinyColorDXT5( const byte *inBuf, int width, int height ) {
    138 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    139 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    140 	// example: 2x8 pixels
    141 	// numBlocks = 2
    142 	// stride = 32 bytes (8 pixels)
    143 
    144 	for ( int i = 0; i < numBlocks; i++ ) {
    145 		// FIXME: This just emits a fake block based on the color at position 0,0
    146 		EmitByte( inBuf[3] );
    147 		EmitByte( 0 );		// dummy, never used
    148 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    149 		EmitByte( 0 );
    150 		EmitByte( 0 );
    151 		EmitByte( 0 );
    152 		EmitByte( 0 );
    153 		EmitByte( 0 );
    154 
    155 		EmitUShort( ColorTo565( inBuf ) );
    156 		EmitUShort( 0 );	// dummy, never used
    157 		EmitUInt( 0 );		// 4 color index bytes all use the first color
    158 
    159 		inBuf += stride;
    160 	}
    161 }
    162 
    163 /*
    164 ========================
    165 idDxtEncoder::WriteTinyColorCTX1DXT5A
    166 ========================
    167 */
    168 void idDxtEncoder::WriteTinyColorCTX1DXT5A( const byte *inBuf, int width, int height ) {
    169 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    170 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    171 	// example: 2x8 pixels
    172 	// numBlocks = 2
    173 	// stride = 32 bytes (8 pixels)
    174 
    175 	for ( int i = 0; i < numBlocks; i++ ) {
    176 		// FIXME: This just emits a fake block based on the color at position 0,0
    177 		EmitByte( inBuf[0] );
    178 		EmitByte( inBuf[1] );
    179 		EmitByte( inBuf[0] );
    180 		EmitByte( inBuf[1] );
    181 		EmitUInt( 0 );		// 4 color index bytes all use the first color
    182 
    183 		EmitByte( inBuf[3] );
    184 		EmitByte( 0 );		// dummy, never used
    185 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    186 		EmitByte( 0 );
    187 		EmitByte( 0 );
    188 		EmitByte( 0 );
    189 		EmitByte( 0 );
    190 		EmitByte( 0 );
    191 
    192 		inBuf += stride;
    193 	}
    194 }
    195 
    196 /*
    197 ========================
    198 idDxtEncoder::WriteTinyNormalMapDXT5
    199 ========================
    200 */
    201 void idDxtEncoder::WriteTinyNormalMapDXT5( const byte *inBuf, int width, int height ) {
    202 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    203 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    204 	// example: 2x8 pixels
    205 	// numBlocks = 2
    206 	// stride = 32 bytes (8 pixels)
    207 
    208 	for ( int i = 0; i < numBlocks; i++ ) {
    209 		// FIXME: This just emits a fake block based on the normal at position 0,0
    210 		EmitByte( inBuf[3] );
    211 		EmitByte( 0 );		// dummy, never used
    212 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    213 		EmitByte( 0 );
    214 		EmitByte( 0 );
    215 		EmitByte( 0 );
    216 		EmitByte( 0 );
    217 		EmitByte( 0 );
    218 
    219 		EmitUShort( ColorTo565( inBuf[0], inBuf[1], inBuf[2] ) );
    220 		EmitUShort( 0 );	// dummy, never used
    221 		EmitUInt( 0 );		// 4 color index bytes all use the first color
    222 
    223 		inBuf += stride;
    224 	}
    225 }
    226 
    227 /*
    228 ========================
    229 idDxtEncoder::WriteTinyNormalMapDXN
    230 ========================
    231 */
    232 void idDxtEncoder::WriteTinyNormalMapDXN( const byte *inBuf, int width, int height ) {
    233 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    234 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    235 	// example: 2x8 pixels
    236 	// numBlocks = 2
    237 	// stride = 32 bytes (8 pixels)
    238 
    239 	for ( int i = 0; i < numBlocks; i++ ) {
    240 		// FIXME: This just emits a fake block based on the normal at position 0,0
    241 		EmitByte( inBuf[0] );
    242 		EmitByte( 0 );		// dummy, never used
    243 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    244 		EmitByte( 0 );
    245 		EmitByte( 0 );
    246 		EmitByte( 0 );
    247 		EmitByte( 0 );
    248 		EmitByte( 0 );
    249 
    250 		EmitByte( inBuf[1] );
    251 		EmitByte( 0 );		// dummy, never used
    252 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    253 		EmitByte( 0 );
    254 		EmitByte( 0 );
    255 		EmitByte( 0 );
    256 		EmitByte( 0 );
    257 		EmitByte( 0 );
    258 
    259 		inBuf += stride;
    260 	}
    261 }
    262 
    263 /*
    264 ========================
    265 idDxtEncoder::WriteTinyDXT5A
    266 ========================
    267 */
    268 void idDxtEncoder::WriteTinyDXT5A( const byte *inBuf, int width, int height ) {
    269 	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
    270 	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
    271 	// example: 2x8 pixels
    272 	// numBlocks = 2
    273 	// stride = 32 bytes (8 pixels)
    274 
    275 	for ( int i = 0; i < numBlocks; i++ ) {
    276 		// FIXME: This just emits a fake block based on the normal at position 0,0
    277 		EmitByte( inBuf[0] );
    278 		EmitByte( 0 );		// dummy, never used
    279 		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
    280 		EmitByte( 0 );
    281 		EmitByte( 0 );
    282 		EmitByte( 0 );
    283 		EmitByte( 0 );
    284 		EmitByte( 0 );
    285 
    286 		inBuf += stride;
    287 	}
    288 }
    289 
    290 /*
    291 ========================
    292 idDxtEncoder::ExtractBlock
    293 
    294 params:	inPtr		- input image, 4 bytes per pixel
    295 paramO:	colorBlock	- 4*4 output tile, 4 bytes per pixel
    296 ========================
    297 */
    298 ID_INLINE void idDxtEncoder::ExtractBlock( const byte *inPtr, int width, byte *colorBlock ) const {
    299 	for ( int j = 0; j < 4; j++ ) {
    300 		memcpy( &colorBlock[j*4*4], inPtr, 4*4 );
    301 		inPtr += width * 4;
    302 	}
    303 }
    304 
    305 /*
    306 ========================
    307 SwapColors
    308 ========================
    309 */
    310 void SwapColors( byte *c1, byte *c2 ) {
    311 	byte tm[3];
    312 	memcpy( tm, c1, 3 );
    313 	memcpy( c1, c2, 3 );
    314 	memcpy( c2, tm, 3 );
    315 }
    316 
    317 /*
    318 ========================
    319 idDxtEncoder::GetMinMaxColorsMaxDist
    320 
    321 Finds the two RGB colors in a 4x4 block furthest apart. Also finds the two alpha values 
    322 furthest apart.
    323 
    324 params: colorBlock	- 4*4 input tile, 4 bytes per pixel
    325 paramO:	minColor	- 4 byte min color
    326 paramO:	maxColor	- 4 byte max color
    327 ========================
    328 */
    329 void idDxtEncoder::GetMinMaxColorsMaxDist( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
    330 	int maxDistC = -1;
    331 	int maxDistA = -1;
    332 
    333 	for ( int i = 0; i < 64 - 4; i += 4 ) {
    334 		for ( int j = i + 4; j < 64; j += 4 ) {
    335 			int dc = ColorDistance( &colorBlock[i], &colorBlock[j] );
    336 			if ( dc > maxDistC ) {
    337 				maxDistC = dc;
    338 				memcpy( minColor, colorBlock+i, 3 );
    339 				memcpy( maxColor, colorBlock+j, 3 );
    340 			}
    341 			int da = AlphaDistance( colorBlock[i+3], colorBlock[j+3] );
    342 			if ( da > maxDistA ) {
    343 				maxDistA = da;
    344 				minColor[3] = colorBlock[i+3];
    345 				maxColor[3] = colorBlock[j+3];
    346 			}
    347 		}
    348 	}
    349 	if ( maxColor[0] < minColor[0] ) {
    350 		SwapColors( minColor, maxColor );
    351 	}
    352 }
    353 
    354 /*
    355 ========================
    356 idDxtEncoder::GetMinMaxColorsLuminance
    357 
    358 Finds the two RGB colors in a 4x4 block furthest apart based on luminance. Also finds the two 
    359 alpha values furthest apart.
    360 
    361 params: colorBlock	- 4*4 input tile, 4 bytes per pixel
    362 paramO:	minColor	- 4 byte min color
    363 paramO:	maxColor	- 4 byte max color
    364 ========================
    365 */
    366 void idDxtEncoder::GetMinMaxColorsLuminance( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
    367 	int maxLumC = 0, minLumC = 256 * 4;
    368 	int maxAlpha = 0, minAlpha = 256 * 4;
    369 
    370 	for ( int i = 0; i < 16; i++ ) {
    371 		int luminance = colorBlock[i*4+0] + colorBlock[i*4+1] * 2 + colorBlock[i*4+2];
    372 		if ( luminance > maxLumC ) {
    373 			maxLumC = luminance;
    374 			memcpy( maxColor, colorBlock+i*4, 3 );
    375 		}
    376 		if ( luminance < minLumC ) {
    377 			minLumC = luminance;
    378 			memcpy( minColor, colorBlock+i*4, 3 );
    379 		}
    380 		int alpha = colorBlock[i*4+3];
    381 		if ( alpha > maxAlpha ) {
    382 			maxAlpha = alpha;
    383 			maxColor[3] = (byte)alpha;
    384 		}
    385 		if ( alpha < minAlpha ) {
    386 			minAlpha = alpha;
    387 			minColor[3] = (byte)alpha;
    388 		}
    389 	}
    390 	if ( maxColor[0] < minColor[0] ) {
    391 		SwapColors( minColor, maxColor );
    392 	}
    393 }
    394 
    395 /*
    396 ========================
    397 idDxtEncoder::GetSquareAlphaError
    398 
    399 params:	colorBlock	- 16 pixel block for which to find color indexes
    400 paramO:	minAlpha	- Min alpha found
    401 paramO:	maxAlpha	- Max alpha found
    402 return: 4 byte color index block
    403 ========================
    404 */
    405 int idDxtEncoder::GetSquareAlphaError( const byte *colorBlock, const int alphaOffset, const byte minAlpha, const byte maxAlpha, int lastError ) const {
    406 	int i, j;
    407 	byte alphas[8];
    408 
    409 	alphas[0] = maxAlpha;
    410 	alphas[1] = minAlpha;
    411 
    412 	if ( maxAlpha > minAlpha ) {
    413 		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
    414 		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
    415 		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
    416 		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
    417 		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
    418 		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
    419 	} else {
    420 		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
    421 		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
    422 		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
    423 		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
    424 		alphas[6] = 0;
    425 		alphas[7] = 255;
    426 	}
    427 
    428 	int error = 0;
    429 	for ( i = 0; i < 16; i++ ) {
    430 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
    431 		byte a = colorBlock[i*4+alphaOffset];
    432 		for ( j = 0; j < 8; j++ ) {
    433 			unsigned int dist = AlphaDistance( a, alphas[j] );
    434 			if ( dist < minDist ) {
    435 				minDist = dist;
    436 			}
    437 		}
    438 		error += minDist;
    439 
    440 		if ( error >= lastError ) {
    441 			return error;
    442 		}
    443 	}
    444 
    445 	return error;
    446 }
    447 
    448 /*
    449 ========================
    450 idDxtEncoder::GetMinMaxAlphaHQ
    451 
    452 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
    453 paramO:	minColor		- 4 byte min color found
    454 paramO:	maxColor		- 4 byte max color found
    455 ========================
    456 */
    457 int idDxtEncoder::GetMinMaxAlphaHQ( const byte *colorBlock, const int alphaOffset, byte *minColor, byte *maxColor ) const {
    458 	int i, j;
    459 	byte alphaMin, alphaMax;
    460 	int error, bestError = MAX_TYPE( int );
    461 
    462 	alphaMin = 255;
    463 	alphaMax = 0;
    464 
    465 	// get alpha min / max
    466 	for ( i = 0; i < 16; i++ ) {
    467 		if ( colorBlock[i*4+alphaOffset] < alphaMin ) {
    468 			alphaMin = colorBlock[i*4+alphaOffset];
    469 		}
    470 		if ( colorBlock[i*4+alphaOffset] > alphaMax ) {
    471 			alphaMax = colorBlock[i*4+alphaOffset];
    472 		}
    473 	}
    474 
    475 	const int ALPHA_EXPAND = 32;
    476 
    477 	alphaMin = ( alphaMin <= ALPHA_EXPAND ) ? 0 : alphaMin - ALPHA_EXPAND;
    478 	alphaMax = ( alphaMax >= 255 - ALPHA_EXPAND ) ? 255 : alphaMax + ALPHA_EXPAND;
    479 
    480 	for ( i = alphaMin; i <= alphaMax; i++ ) {
    481 		for ( j = alphaMax; j >= i; j-- ) {
    482 
    483 			error = GetSquareAlphaError( colorBlock, alphaOffset, (byte)i, (byte)j, bestError );
    484 			if ( error < bestError ) {
    485 				bestError = error;
    486 				minColor[alphaOffset] = (byte)i;
    487 				maxColor[alphaOffset] = (byte)j;
    488 			}
    489 
    490 			error = GetSquareAlphaError( colorBlock, alphaOffset, (byte)j, (byte)i, bestError );
    491 			if ( error < bestError ) {
    492 				bestError = error;
    493 				minColor[alphaOffset] = (byte)i;
    494 				maxColor[alphaOffset] = (byte)j;
    495 			}
    496 		}
    497 	}
    498 
    499 	return bestError;
    500 }
    501 
    502 /*
    503 ========================
    504 idDxtEncoder::GetSquareColorsError
    505 
    506 params:	colorBlock	- 16 pixel block for which to find color indexes
    507 paramO:	color0		- 4 byte min color found
    508 paramO:	color1		- 4 byte max color found
    509 return: 4 byte color index block
    510 ========================
    511 */
    512 int idDxtEncoder::GetSquareColorsError( const byte *colorBlock, const unsigned short color0, const unsigned short color1, int lastError ) const {
    513 	int i, j;
    514 	byte colors[4][4];
    515 
    516 	ColorFrom565( color0, colors[0] );
    517 	ColorFrom565( color1, colors[1] );
    518 
    519 	if ( color0 > color1 ) {
    520 		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
    521 		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
    522 		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
    523 		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
    524 		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
    525 		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
    526 	} else {
    527 		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
    528 		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
    529 		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
    530 		colors[3][0] = 0;
    531 		colors[3][1] = 0;
    532 		colors[3][2] = 0;
    533 	}
    534 
    535 	int error = 0;
    536 	for ( i = 0; i < 16; i++ ) {
    537 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
    538 		for ( j = 0; j < 4; j++ ) {
    539 			unsigned int dist = ColorDistance( &colorBlock[i*4], &colors[j][0] );
    540 			if ( dist < minDist ) {
    541 				minDist = dist;
    542 			}
    543 		}
    544 		// accumulated error
    545 		error += minDist;
    546 
    547 		if ( error > lastError ) {
    548 			return error;
    549 		}
    550 	}
    551 	return error;
    552 }
    553 
    554 /*
    555 ========================
    556 idDxtEncoder::GetSquareNormalYError
    557 
    558 params:	colorBlock	- 16 pixel block for which to find color indexes
    559 paramO:	color0		- 4 byte min color found
    560 paramO:	color1		- 4 byte max color found
    561 return: 4 byte color index block
    562 ========================
    563 */
    564 int idDxtEncoder::GetSquareNormalYError( const byte *colorBlock, const unsigned short color0, const unsigned short color1, int lastError, int scale ) const {
    565 	int i, j;
    566 	byte colors[4][4];
    567 
    568 	ColorFrom565( color0, colors[0] );
    569 	ColorFrom565( color1, colors[1] );
    570 
    571 	if ( color0 > color1 ) {
    572 		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
    573 		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
    574 		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
    575 		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
    576 		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
    577 		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
    578 	} else {
    579 		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
    580 		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
    581 		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
    582 		colors[3][0] = 0;
    583 		colors[3][1] = 0;
    584 		colors[3][2] = 0;
    585 	}
    586 
    587 	int error = 0;
    588 	for ( i = 0; i < 16; i++ ) {
    589 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
    590 		for ( j = 0; j < 4; j++ ) {
    591 			float r = (float) colorBlock[i*4+1] / scale;
    592 			float s = (float) colors[j][1] / scale;
    593 			unsigned int dist = idMath::Ftoi( ( r - s ) * ( r - s ) );
    594 			if ( dist < minDist ) {
    595 				minDist = dist;
    596 			}
    597 		}
    598 		// accumulated error
    599 		error += minDist;
    600 
    601 		if ( error > lastError ) {
    602 			return error;
    603 		}
    604 	}
    605 	return error;
    606 }
    607 
    608 /*
    609 ========================
    610 idDxtEncoder::GetMinMaxColorsHQ
    611 
    612 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
    613 compress the 4x4 block. Also finds the minimum and maximum alpha values.
    614 
    615 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
    616 paramO:	minColor	- 4 byte min color found
    617 paramO:	maxColor	- 4 byte max color found
    618 ========================
    619 */
    620 int idDxtEncoder::GetMinMaxColorsHQ( const byte *colorBlock, byte *minColor, byte *maxColor, bool noBlack ) const {
    621 	int i;
    622 	int i0, i1, i2, j0, j1, j2;
    623 	unsigned short minColor565, maxColor565, bestMinColor565, bestMaxColor565;
    624 	byte bboxMin[3], bboxMax[3], minAxisDist[3];
    625 	int error, bestError = MAX_TYPE( int );
    626 
    627 	bboxMin[0] = bboxMin[1] = bboxMin[2] = 255;
    628 	bboxMax[0] = bboxMax[1] = bboxMax[2] = 0;
    629 
    630 	// get color bbox
    631 	for ( i = 0; i < 16; i++ ) {
    632 		if ( colorBlock[i*4+0] < bboxMin[0] ) {
    633 			bboxMin[0] = colorBlock[i*4+0];
    634 		}
    635 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
    636 			bboxMin[1] = colorBlock[i*4+1];
    637 		}
    638 		if ( colorBlock[i*4+2] < bboxMin[2] ) {
    639 			bboxMin[2] = colorBlock[i*4+2];
    640 		}
    641 		if ( colorBlock[i*4+0] > bboxMax[0] ) {
    642 			bboxMax[0] = colorBlock[i*4+0];
    643 		}
    644 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
    645 			bboxMax[1] = colorBlock[i*4+1];
    646 		}
    647 		if ( colorBlock[i*4+2] > bboxMax[2] ) {
    648 			bboxMax[2] = colorBlock[i*4+2];
    649 		}
    650 	}
    651 
    652 	// decrease range for 565 encoding
    653 	bboxMin[0] >>= 3;
    654 	bboxMin[1] >>= 2;
    655 	bboxMin[2] >>= 3;
    656 	bboxMax[0] >>= 3;
    657 	bboxMax[1] >>= 2;
    658 	bboxMax[2] >>= 3;
    659 
    660 	// get the minimum distance the end points of the line must be apart along each axis
    661 	for ( i = 0; i < 3; i++ ) {
    662 		minAxisDist[i] = ( bboxMax[i] - bboxMin[i] );
    663 		if ( minAxisDist[i] >= 16 ) {
    664 			minAxisDist[i] = minAxisDist[i] * 3 / 4;
    665 		} else if ( minAxisDist[i] >= 8 ) {
    666 			minAxisDist[i] = minAxisDist[i] * 2 / 4;
    667 		} else if ( minAxisDist[i] >= 4 ) {
    668 			minAxisDist[i] = minAxisDist[i] * 1 / 4;
    669 		} else {
    670 			minAxisDist[i] = 0;
    671 		}
    672 	}
    673 
    674 	// expand the bounding box
    675 	const int C565_BBOX_EXPAND = 1;
    676 
    677 	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
    678 	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
    679 	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
    680 	bboxMax[0] = ( bboxMax[0] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[0] + C565_BBOX_EXPAND;
    681 	bboxMax[1] = ( bboxMax[1] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[1] + C565_BBOX_EXPAND;
    682 	bboxMax[2] = ( bboxMax[2] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[2] + C565_BBOX_EXPAND;
    683 
    684 	bestMinColor565 = 0;
    685 	bestMaxColor565 = 0;
    686 
    687 	for ( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ ) {
    688 		for ( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- ) {
    689 			if ( abs( i0 - j0 ) < minAxisDist[0] ) {
    690 				continue;
    691 			}
    692 
    693 			for ( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
    694 				for ( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
    695 					if ( abs( i1 - j1 ) < minAxisDist[1] ) {
    696 						continue;
    697 					}
    698 
    699 					for ( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ ) {
    700 						for ( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- ) {
    701 							if ( abs( i2 - j2 ) < minAxisDist[2] ) {
    702 								continue;
    703 							}
    704 
    705 							minColor565 = (unsigned short)( ( i0 << 11 ) | ( i1 << 5 ) | ( i2 << 0 ) ); 
    706 							maxColor565 = (unsigned short)( ( j0 << 11 ) | ( j1 << 5 ) | ( j2 << 0 ) );
    707 
    708 							if ( !noBlack ) {
    709 								error = GetSquareColorsError( colorBlock, maxColor565, minColor565, bestError );
    710 								if ( error < bestError ) {
    711 									bestError = error;
    712 									bestMinColor565 = minColor565;
    713 									bestMaxColor565 = maxColor565;
    714 								}
    715 							} else {
    716 								if ( minColor565 <= maxColor565 ) {
    717 									SwapValues( minColor565, maxColor565 );
    718 								}
    719 							}
    720 
    721 							error = GetSquareColorsError( colorBlock, minColor565, maxColor565, bestError );
    722 							if ( error < bestError ) {
    723 								bestError = error;
    724 								bestMinColor565 = minColor565;
    725 								bestMaxColor565 = maxColor565;
    726 							}
    727 						}
    728 					}
    729 				}
    730 			}
    731 		}
    732 	}
    733 
    734 	ColorFrom565( bestMinColor565, minColor );
    735 	ColorFrom565( bestMaxColor565, maxColor );
    736 
    737 	return bestError;
    738 }
    739 
    740 /*
    741 ========================
    742 idDxtEncoder::GetSquareCTX1Error
    743 
    744 params:	colorBlock	- 16 pixel block for which to find color indexes
    745 paramO:	color0		- Min color found
    746 paramO:	color1		- Max color found
    747 return: 4 byte color index block
    748 ========================
    749 */
    750 int idDxtEncoder::GetSquareCTX1Error( const byte *colorBlock, const byte *color0, const byte *color1, int lastError ) const {
    751 	int i, j;
    752 	byte colors[4][4];
    753 
    754 	colors[0][0] = color0[0];
    755 	colors[0][1] = color0[1];
    756 	colors[1][0] = color1[0];
    757 	colors[1][1] = color1[1];
    758 
    759 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
    760 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
    761 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
    762 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
    763 
    764 	int error = 0;
    765 	for ( i = 0; i < 16; i++ ) {
    766 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
    767 		for ( j = 0; j < 4; j++ ) {
    768 			unsigned int dist = CTX1Distance( &colorBlock[i*4], &colors[j][0] );
    769 			if ( dist < minDist ) {
    770 				minDist = dist;
    771 			}
    772 		}
    773 		// accumulated error
    774 		error += minDist;
    775 
    776 		if ( error > lastError ) {
    777 			return error;
    778 		}
    779 	}
    780 	return error;
    781 }
    782 
    783 /*
    784 ========================
    785 idDxtEncoder::GetMinMaxCTX1HQ
    786 
    787 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
    788 compress the 4x4 block. Also finds the minimum and maximum alpha values.
    789 
    790 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
    791 paramO:	minColor	- 4 byte Min color found
    792 paramO:	maxColor	- 4 byte Max color found
    793 ========================
    794 */
    795 int idDxtEncoder::GetMinMaxCTX1HQ( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
    796 	int i;
    797 	int i0, i1, j0, j1;
    798 	byte curMinColor[2], curMaxColor[2];
    799 	byte bboxMin[2], bboxMax[2], minAxisDist[2];
    800 	int error, bestError = MAX_TYPE( int );
    801 
    802 	bboxMin[0] = bboxMin[1] = 255;
    803 	bboxMax[0] = bboxMax[1] = 0;
    804 
    805 	// get color bbox
    806 	for ( i = 0; i < 16; i++ ) {
    807 		if ( colorBlock[i*4+0] < bboxMin[0] ) {
    808 			bboxMin[0] = colorBlock[i*4+0];
    809 		}
    810 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
    811 			bboxMin[1] = colorBlock[i*4+1];
    812 		}
    813 		if ( colorBlock[i*4+0] > bboxMax[0] ) {
    814 			bboxMax[0] = colorBlock[i*4+0];
    815 		}
    816 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
    817 			bboxMax[1] = colorBlock[i*4+1];
    818 		}
    819 	}
    820 
    821 	// get the minimum distance the end points of the line must be apart along each axis
    822 	for ( i = 0; i < 2; i++ ) {
    823 		minAxisDist[i] = ( bboxMax[i] - bboxMin[i] );
    824 		if ( minAxisDist[i] >= 64 ) {
    825 			minAxisDist[i] = minAxisDist[i] * 3 / 4;
    826 		} else if ( minAxisDist[i] >= 32 ) {
    827 			minAxisDist[i] = minAxisDist[i] * 2 / 4;
    828 		} else if ( minAxisDist[i] >= 16 ) {
    829 			minAxisDist[i] = minAxisDist[i] * 1 / 4;
    830 		} else {
    831 			minAxisDist[i] = 0;
    832 		}
    833 	}
    834 
    835 	// expand the bounding box
    836 	const int CXT1_BBOX_EXPAND = 6;
    837 
    838 	bboxMin[0] = ( bboxMin[0] <= CXT1_BBOX_EXPAND ) ? 0 : bboxMin[0] - CXT1_BBOX_EXPAND;
    839 	bboxMin[1] = ( bboxMin[1] <= CXT1_BBOX_EXPAND ) ? 0 : bboxMin[1] - CXT1_BBOX_EXPAND;
    840 	bboxMax[0] = ( bboxMax[0] >= 255 - CXT1_BBOX_EXPAND ) ? 255 : bboxMax[0] + CXT1_BBOX_EXPAND;
    841 	bboxMax[1] = ( bboxMax[1] >= 255 - CXT1_BBOX_EXPAND ) ? 255 : bboxMax[1] + CXT1_BBOX_EXPAND;
    842 
    843 	for ( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ ) {
    844 		for ( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- ) {
    845 			if ( abs( i0 - j0 ) < minAxisDist[0] ) {
    846 				continue;
    847 			}
    848 
    849 			for ( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
    850 				for ( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
    851 					if ( abs( i1 - j1 ) < minAxisDist[1] ) {
    852 						continue;
    853 					}
    854 
    855 					curMinColor[0] = (byte)i0;
    856 					curMinColor[1] = (byte)i1;
    857 
    858 					curMaxColor[0] = (byte)j0;
    859 					curMaxColor[1] = (byte)j1;
    860 
    861 					error = GetSquareCTX1Error( colorBlock, curMinColor, curMaxColor, bestError );
    862 					if ( error < bestError ) {
    863 						bestError = error;
    864 						memcpy( minColor, curMinColor, 2 );
    865 						memcpy( maxColor, curMaxColor, 2 );
    866 					}
    867 				}
    868 			}
    869 		}
    870 	}
    871 
    872 	return bestError;
    873 }
    874 
    875 /*
    876 ========================
    877 idDxtEncoder::GetMinMaxNormalYHQ
    878 
    879 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
    880 compress the 4x4 block. Also finds the minimum and maximum alpha values.
    881 
    882 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
    883 paramO:	minColor	- 4 byte Min color found
    884 paramO:	maxColor	- 4 byte Max color found
    885 ========================
    886 */
    887 int idDxtEncoder::GetMinMaxNormalYHQ( const byte *colorBlock, byte *minColor, byte *maxColor, bool noBlack, int scale ) const {
    888 	unsigned short bestMinColor565, bestMaxColor565;
    889 	byte bboxMin[3], bboxMax[3];
    890 	int error, bestError = MAX_TYPE( int );
    891 
    892 	bboxMin[1] = 255;
    893 	bboxMax[1] = 0;
    894 
    895 	// get color bbox
    896 	for ( int i = 0; i < 16; i++ ) {
    897 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
    898 			bboxMin[1] = colorBlock[i*4+1];
    899 		}
    900 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
    901 			bboxMax[1] = colorBlock[i*4+1];
    902 		}
    903 	}
    904 
    905 	// decrease range for 565 encoding
    906 	bboxMin[1] >>= 2;
    907 	bboxMax[1] >>= 2;
    908 
    909 	// expand the bounding box
    910 	const int C565_BBOX_EXPAND = 1;
    911 
    912 	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
    913 	bboxMax[1] = ( bboxMax[1] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[1] + C565_BBOX_EXPAND;
    914 
    915 	bestMinColor565 = 0;
    916 	bestMaxColor565 = 0;
    917 
    918 	for ( int i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
    919 		for ( int j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
    920 			if ( abs( i1 - j1 ) < 0 ) {
    921 				continue;
    922 			}
    923 
    924 			unsigned short minColor565 = (unsigned short)i1 << 5;
    925 			unsigned short maxColor565 = (unsigned short)j1 << 5;
    926 
    927 			if ( !noBlack ) {
    928 				error = GetSquareNormalYError( colorBlock, maxColor565, minColor565, bestError, scale );
    929 				if ( error < bestError ) {
    930 					bestError = error;
    931 					bestMinColor565 = minColor565;
    932 					bestMaxColor565 = maxColor565;
    933 				}
    934 			} else {
    935 				if ( minColor565 <= maxColor565 ) {
    936 					SwapValues( minColor565, maxColor565 );
    937 				}
    938 			}
    939 
    940 			error = GetSquareNormalYError( colorBlock, minColor565, maxColor565, bestError, scale );
    941 			if ( error < bestError ) {
    942 				bestError = error;
    943 				bestMinColor565 = minColor565;
    944 				bestMaxColor565 = maxColor565;
    945 			}
    946 		}
    947 	}
    948 
    949 	ColorFrom565( bestMinColor565, minColor );
    950 	ColorFrom565( bestMaxColor565, maxColor );
    951 
    952 	int bias = colorBlock[0*4+0];
    953 	int size = colorBlock[0*4+2];
    954 
    955 	minColor[0] = maxColor[0] = (byte)bias;
    956 	minColor[2] = maxColor[2] = (byte)size;
    957 
    958 	return bestError;
    959 }
    960 
    961 #if defined( ID_WIN_X86_ASM )
    962 ALIGN16( static float SIMD_SSE2_float_scale[4] ) = { 2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f };
    963 ALIGN16( static float SIMD_SSE2_float_descale[4] ) = { 255.0f / 2.0f, 255.0f / 2.0f, 255.0f / 2.0f, 255.0f / 2.0f };
    964 ALIGN16( static float SIMD_SSE2_float_zero[4] ) = { 0.0f, 0.0f, 0.0f, 0.0f };
    965 ALIGN16( static float SIMD_SSE2_float_one[4] ) = { 1.0f, 1.0f, 1.0f, 1.0f };
    966 ALIGN16( static float SIMD_SSE2_float_half[4] ) = { 0.5f, 0.5f, 0.5f, 0.5f };
    967 ALIGN16( static float SIMD_SSE2_float_255[4] ) = { 255.0f, 255.0f, 255.0f, 255.0f };
    968 ALIGN16( static float SIMD_SP_rsqrt_c0[4] ) = { 3.0f, 3.0f, 3.0f, 3.0f };
    969 ALIGN16( static float SIMD_SP_rsqrt_c1[4] ) = { -0.5f, -0.5f, -0.5f, -0.5f };
    970 ALIGN16( static dword SIMD_SSE2_dword_maskFirstThree[4] ) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 };
    971 ALIGN16( static dword SIMD_SSE2_dword_maskWords[4] ) = { 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000000 };
    972 #define R_SHUFFLE_PS( x, y, z, w )	(( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))
    973 #endif
    974 
    975 /*
    976 ========================
    977 NormalDistanceDXT1
    978 ========================
    979 */
    980 int NormalDistanceDXT1( const int *vector, const int *normalized ) {
    981 #if defined( ID_WIN_X86_ASM )
    982 	int result;
    983 	__asm {
    984 		mov			esi, vector
    985 		mov			edi, normalized
    986 		cvtdq2ps	xmm0, [esi]
    987 		mulps		xmm0, SIMD_SSE2_float_scale
    988 		subps		xmm0, SIMD_SSE2_float_one
    989 		pand		xmm0, SIMD_SSE2_dword_maskFirstThree
    990 		movaps		xmm1, xmm0
    991 		mulps		xmm1, xmm1
    992 		pshufd		xmm2, xmm1, R_SHUFFLE_PS( 2, 3, 0, 1 )
    993 		addps		xmm2, xmm1
    994 		pshufd		xmm1, xmm2, R_SHUFFLE_PS( 1, 0, 1, 0 )
    995 		addps		xmm2, xmm1
    996 
    997 		rsqrtps		xmm1, xmm2
    998 		mulps		xmm2, xmm1
    999 		mulps		xmm2, xmm1
   1000 		subps		xmm2, SIMD_SP_rsqrt_c0
   1001 		mulps		xmm1, SIMD_SP_rsqrt_c1
   1002 		mulps		xmm2, xmm1
   1003 
   1004 		mulps		xmm0, xmm2
   1005 		addps		xmm0, SIMD_SSE2_float_one
   1006 		mulps		xmm0, SIMD_SSE2_float_descale
   1007 		addps		xmm0, SIMD_SSE2_float_half
   1008 		maxps		xmm0, SIMD_SSE2_float_zero
   1009 		minps		xmm0, SIMD_SSE2_float_255
   1010 		cvttps2dq	xmm0, xmm0
   1011 		psubd		xmm0, [edi]
   1012 		pand		xmm0, SIMD_SSE2_dword_maskWords
   1013 		pmullw		xmm0, xmm0
   1014 		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 2, 3, 0, 1 )
   1015 		paddd		xmm0, xmm1
   1016 		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 1, 0, 1, 0 )
   1017 		paddd		xmm0, xmm1
   1018 		movd		result, xmm0
   1019 	}
   1020 	return result;
   1021 #else
   1022 	float floatNormal[3];
   1023 	byte intNormal[4];
   1024 	floatNormal[0] = vector[0] * ( 2.0f / 255.0f ) - 1.0f;
   1025 	floatNormal[1] = vector[1] * ( 2.0f / 255.0f ) - 1.0f;
   1026 	floatNormal[2] = vector[2] * ( 2.0f / 255.0f ) - 1.0f;
   1027 	float rcplen = idMath::InvSqrt( floatNormal[0] * floatNormal[0] + floatNormal[1] * floatNormal[1] + floatNormal[2] * floatNormal[2] );
   1028 	floatNormal[0] *= rcplen;
   1029 	floatNormal[1] *= rcplen;
   1030 	floatNormal[2] *= rcplen;
   1031 	intNormal[0] = idMath::Ftob( ( floatNormal[0] + 1.0f ) * ( 255.0f / 2.0f ) + 0.5f );
   1032 	intNormal[1] = idMath::Ftob( ( floatNormal[1] + 1.0f ) * ( 255.0f / 2.0f ) + 0.5f );
   1033 	intNormal[2] = idMath::Ftob( ( floatNormal[2] + 1.0f ) * ( 255.0f / 2.0f ) + 0.5f );
   1034 	int result =	( ( intNormal[ 0 ] - normalized[ 0 ] ) * ( intNormal[ 0 ] - normalized[ 0 ] ) ) +
   1035 					( ( intNormal[ 1 ] - normalized[ 1 ] ) * ( intNormal[ 1 ] - normalized[ 1 ] ) ) +
   1036 					( ( intNormal[ 2 ] - normalized[ 2 ] ) * ( intNormal[ 2 ] - normalized[ 2 ] ) );
   1037 	return result;
   1038 #endif
   1039 }
   1040 
   1041 /*
   1042 ========================
   1043 NormalDistanceDXT5
   1044 ========================
   1045 */
   1046 int NormalDistanceDXT5( const int *vector, const int *normalized ) {
   1047 #if defined( ID_WIN_X86_ASM )
   1048 	int result;
   1049 	__asm {
   1050 		mov			esi, vector
   1051 		mov			edi, normalized
   1052 #if 0	// object-space
   1053 		pshufd		xmm0, [esi], R_SHUFFLE_PS( 0, 1, 3, 2 )
   1054 #else
   1055 		pshufd		xmm0, [esi], R_SHUFFLE_PS( 1, 2, 3, 0 )
   1056 #endif
   1057 		cvtdq2ps	xmm0, xmm0
   1058 		mulps		xmm0, SIMD_SSE2_float_scale
   1059 		subps		xmm0, SIMD_SSE2_float_one
   1060 		pand		xmm0, SIMD_SSE2_dword_maskFirstThree
   1061 		movaps		xmm1, xmm0
   1062 		mulps		xmm1, xmm1
   1063 		pshufd		xmm2, xmm1, R_SHUFFLE_PS( 2, 3, 0, 1 )
   1064 		addps		xmm2, xmm1
   1065 		pshufd		xmm1, xmm2, R_SHUFFLE_PS( 1, 0, 1, 0 )
   1066 		addps		xmm2, xmm1
   1067 
   1068 		rsqrtps		xmm1, xmm2
   1069 		mulps		xmm2, xmm1
   1070 		mulps		xmm2, xmm1
   1071 		subps		xmm2, SIMD_SP_rsqrt_c0
   1072 		mulps		xmm1, SIMD_SP_rsqrt_c1
   1073 		mulps		xmm2, xmm1
   1074 
   1075 		mulps		xmm0, xmm2
   1076 		addps		xmm0, SIMD_SSE2_float_one
   1077 		mulps		xmm0, SIMD_SSE2_float_descale
   1078 		addps		xmm0, SIMD_SSE2_float_half
   1079 		maxps		xmm0, SIMD_SSE2_float_zero
   1080 		minps		xmm0, SIMD_SSE2_float_255
   1081 		cvttps2dq	xmm0, xmm0
   1082 #if 0	// object-space
   1083 		pshufd		xmm3, [edi], R_SHUFFLE_PS( 0, 1, 3, 2 )
   1084 #else
   1085 		pshufd		xmm3, [edi], R_SHUFFLE_PS( 1, 2, 3, 0 )
   1086 #endif
   1087 		psubd		xmm0, xmm3
   1088 		pand		xmm0, SIMD_SSE2_dword_maskWords
   1089 		pmullw		xmm0, xmm0
   1090 		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 2, 3, 0, 1 )
   1091 		paddd		xmm0, xmm1
   1092 		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 1, 0, 1, 0 )
   1093 		paddd		xmm0, xmm1
   1094 		movd		result, xmm0
   1095 	}
   1096 	return result;
   1097 #else
   1098 #if 0	// object-space
   1099 	const int c0 = 0;
   1100 	const int c1 = 1;
   1101 	const int c2 = 3;
   1102 #else
   1103 	const int c0 = 1;
   1104 	const int c1 = 2;
   1105 	const int c2 = 3;
   1106 #endif
   1107 	float floatNormal[3];
   1108 	byte intNormal[4];
   1109 	floatNormal[0] = vector[c0] / 255.0f * 2.0f - 1.0f;
   1110 	floatNormal[1] = vector[c1] / 255.0f * 2.0f - 1.0f;
   1111 	floatNormal[2] = vector[c2] / 255.0f * 2.0f - 1.0f;
   1112 	float rcplen = idMath::InvSqrt( floatNormal[0] * floatNormal[0] + floatNormal[1] * floatNormal[1] + floatNormal[2] * floatNormal[2] );
   1113 	floatNormal[0] *= rcplen;
   1114 	floatNormal[1] *= rcplen;
   1115 	floatNormal[2] *= rcplen;
   1116 	intNormal[c0] = idMath::Ftob( ( floatNormal[0] + 1.0f ) / 2.0f * 255.0f + 0.5f );
   1117 	intNormal[c1] = idMath::Ftob( ( floatNormal[1] + 1.0f ) / 2.0f * 255.0f + 0.5f );
   1118 	intNormal[c2] = idMath::Ftob( ( floatNormal[2] + 1.0f ) / 2.0f * 255.0f + 0.5f );
   1119 	int result =	( ( intNormal[ c0 ] - normalized[ c0 ] ) * ( intNormal[ c0 ] - normalized[ c0 ] ) ) +
   1120 					( ( intNormal[ c1 ] - normalized[ c1 ] ) * ( intNormal[ c1 ] - normalized[ c1 ] ) ) +
   1121 					( ( intNormal[ c2 ] - normalized[ c2 ] ) * ( intNormal[ c2 ] - normalized[ c2 ] ) );
   1122 	return result;
   1123 #endif
   1124 }
   1125 
   1126 /*
   1127 ========================
   1128 idDxtEncoder::GetSquareNormalsDXT1Error
   1129 
   1130 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
   1131 paramO:	color0		- 4 byte Min color found
   1132 paramO:	color1		- 4 byte Max color found
   1133 return: 4 byte color index block
   1134 ========================
   1135 */
   1136 int idDxtEncoder::GetSquareNormalsDXT1Error( const int *colorBlock, const unsigned short color0, const unsigned short color1, int lastError, unsigned int &colorIndices ) const {
   1137 	byte byteColors[2][4];
   1138 	ALIGN16( int colors[4][4] );
   1139 
   1140 	ColorFrom565( color0, byteColors[0] );
   1141 	ColorFrom565( color1, byteColors[1] );
   1142 
   1143 	for ( int i = 0; i < 4; i++ ) {
   1144 		colors[0][i] = byteColors[0][i];
   1145 		colors[1][i] = byteColors[1][i];
   1146 	}
   1147 
   1148 	if ( color0 > color1 ) {
   1149 		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   1150 		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   1151 		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   1152 		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   1153 		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   1154 		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   1155 	} else {
   1156 		assert( color0 == color1 );
   1157 		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
   1158 		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
   1159 		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
   1160 		colors[3][0] = 0;
   1161 		colors[3][1] = 0;
   1162 		colors[3][2] = 0;
   1163 	}
   1164 
   1165 	int error = 0;
   1166 	int tempColorIndices[16];
   1167 	for ( int i = 0; i < 16; i++ ) {
   1168 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
   1169 
   1170 		for ( int j = 0; j < 4; j++ ) {
   1171 			unsigned int dist = NormalDistanceDXT1( &colors[j][0], &colorBlock[i*4] );
   1172 			if ( dist < minDist ) {
   1173 				minDist = dist;
   1174 				tempColorIndices[i] = j;
   1175 			}
   1176 		}
   1177 		// accumulated error
   1178 		error += minDist;
   1179 
   1180 		if ( error > lastError ) {
   1181 			return error;
   1182 		}
   1183 	}
   1184 
   1185 	colorIndices = 0;
   1186 	for ( int i = 0; i < 16; i++ ) {
   1187 		colorIndices |= ( tempColorIndices[i] << (unsigned int)( i << 1 ) );
   1188 	}
   1189 
   1190 	return error;
   1191 }
   1192 
   1193 /*
   1194 ========================
   1195 idDxtEncoder::GetMinMaxNormalsDXT1HQ
   1196 
   1197 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
   1198 compress the 4x4 block. Also finds the minimum and maximum alpha values.
   1199 
   1200 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
   1201 paramO:	minColor	- 4 byte Min color found
   1202 paramO:	maxColor	- 4 byte Max color found
   1203 ========================
   1204 */
   1205 int idDxtEncoder::GetMinMaxNormalsDXT1HQ( const byte *colorBlock, byte *minColor, byte *maxColor, unsigned int &colorIndices, bool noBlack ) const {
   1206 	int i;
   1207 	int i0, i1, i2, j0, j1, j2;
   1208 	unsigned short bestMinColor565 = 0;
   1209 	unsigned short bestMaxColor565 = 0;
   1210 	byte bboxMin[3], bboxMax[3], minAxisDist[3];
   1211 	int error, bestError = MAX_TYPE( int );
   1212 	unsigned int tempColorIndices;
   1213 	ALIGN16( int intColorBlock[16*4] );
   1214 
   1215 	bboxMin[0] = bboxMin[1] = bboxMin[2] = 128;
   1216 	bboxMax[0] = bboxMax[1] = bboxMax[2] = 128;
   1217 
   1218 	// get color bbox
   1219 	for ( i = 0; i < 16; i++ ) {
   1220 		if ( colorBlock[i*4+0] < bboxMin[0] ) {
   1221 			bboxMin[0] = colorBlock[i*4+0];
   1222 		}
   1223 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
   1224 			bboxMin[1] = colorBlock[i*4+1];
   1225 		}
   1226 		if ( colorBlock[i*4+2] < bboxMin[2] ) {
   1227 			bboxMin[2] = colorBlock[i*4+2];
   1228 		}
   1229 		if ( colorBlock[i*4+0] > bboxMax[0] ) {
   1230 			bboxMax[0] = colorBlock[i*4+0];
   1231 		}
   1232 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
   1233 			bboxMax[1] = colorBlock[i*4+1];
   1234 		}
   1235 		if ( colorBlock[i*4+2] > bboxMax[2] ) {
   1236 			bboxMax[2] = colorBlock[i*4+2];
   1237 		}
   1238 	}
   1239 
   1240 	for ( int i = 0; i < 64; i++ ) {
   1241 		intColorBlock[i] = colorBlock[i];
   1242 	}
   1243 
   1244 	// decrease range for 565 encoding
   1245 	bboxMin[0] >>= 3;
   1246 	bboxMin[1] >>= 2;
   1247 	bboxMin[2] >>= 3;
   1248 	bboxMax[0] >>= 3;
   1249 	bboxMax[1] >>= 2;
   1250 	bboxMax[2] >>= 3;
   1251 
   1252 	// get the minimum distance the end points of the line must be apart along each axis
   1253 	for ( i = 0; i < 3; i++ ) {
   1254 		minAxisDist[i] = 0;
   1255 	}
   1256 
   1257 	// expand the bounding box
   1258 	const int C565_BBOX_EXPAND = 2;
   1259 
   1260 	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
   1261 	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
   1262 	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
   1263 	bboxMax[0] = ( bboxMax[0] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[0] + C565_BBOX_EXPAND;
   1264 	bboxMax[1] = ( bboxMax[1] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[1] + C565_BBOX_EXPAND;
   1265 	bboxMax[2] = ( bboxMax[2] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[2] + C565_BBOX_EXPAND;
   1266 
   1267 	for ( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ ) {
   1268 		for ( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- ) {
   1269 			if ( abs( i0 - j0 ) < minAxisDist[0] ) {
   1270 				continue;
   1271 			}
   1272 
   1273 			for ( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
   1274 				for ( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
   1275 					if ( abs( i1 - j1 ) < minAxisDist[1] ) {
   1276 						continue;
   1277 					}
   1278 
   1279 					for ( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ ) {
   1280 						for ( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- ) {
   1281 							if ( abs( i2 - j2 ) < minAxisDist[2] ) {
   1282 								continue;
   1283 							}
   1284 
   1285 							unsigned short minColor565 = (unsigned short)( ( i0 << 11 ) | ( i1 << 5 ) | ( i2 << 0 ) );
   1286 							unsigned short maxColor565 = (unsigned short)( ( j0 << 11 ) | ( j1 << 5 ) | ( j2 << 0 ) );
   1287 
   1288 							if ( !noBlack ) {
   1289 								error = GetSquareNormalsDXT1Error( intColorBlock, maxColor565, minColor565, bestError, tempColorIndices );
   1290 								if ( error < bestError ) {
   1291 									bestError = error;
   1292 									bestMinColor565 = minColor565;
   1293 									bestMaxColor565 = maxColor565;
   1294 									colorIndices = tempColorIndices;
   1295 								}
   1296 							} else {
   1297 								if ( minColor565 <= maxColor565 ) {
   1298 									SwapValues( minColor565, maxColor565 );
   1299 								}
   1300 							}
   1301 
   1302 							error = GetSquareNormalsDXT1Error( intColorBlock, minColor565, maxColor565, bestError, tempColorIndices );
   1303 							if ( error < bestError ) {
   1304 								bestError = error;
   1305 								bestMinColor565 = minColor565;
   1306 								bestMaxColor565 = maxColor565;
   1307 								colorIndices = tempColorIndices;
   1308 							}
   1309 						}
   1310 					}
   1311 				}
   1312 			}
   1313 		}
   1314 	}
   1315 
   1316 	ColorFrom565( bestMinColor565, minColor );
   1317 	ColorFrom565( bestMaxColor565, maxColor );
   1318 
   1319 	return bestError;
   1320 }
   1321 
   1322 /*
   1323 ========================
   1324 idDxtEncoder::GetSquareNormalsDXT5Error
   1325 
   1326 params:	normalBlock	- 16 pixel block for which to find normal indexes
   1327 paramO:	minNormal	- Min normal found
   1328 paramO:	maxNormal	- Max normal found
   1329 ========================
   1330 */
   1331 int idDxtEncoder::GetSquareNormalsDXT5Error( const int *normalBlock, const byte *minNormal, const byte *maxNormal, int lastError, unsigned int &colorIndices, byte *alphaIndices ) const {
   1332 	byte alphas[8];
   1333 	byte colors[4][4];
   1334 
   1335 	unsigned short smin = ColorTo565( minNormal );
   1336 	unsigned short smax = ColorTo565( maxNormal );
   1337 
   1338 	ColorFrom565( smax, colors[0] );
   1339 	ColorFrom565( smin, colors[1] );
   1340 
   1341 	if ( smax > smin ) {
   1342 		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   1343 		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   1344 		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   1345 		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   1346 		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   1347 		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   1348 	} else {
   1349 		assert( smax == smin );
   1350 		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
   1351 		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
   1352 		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
   1353 		colors[3][0] = 0;
   1354 		colors[3][1] = 0;
   1355 		colors[3][2] = 0;
   1356 	}
   1357 
   1358 	alphas[0] = maxNormal[3];
   1359 	alphas[1] = minNormal[3];
   1360 
   1361 	if ( maxNormal[3] > minNormal[3] ) {
   1362 		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
   1363 		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
   1364 		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
   1365 		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
   1366 		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
   1367 		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
   1368 	} else {
   1369 		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
   1370 		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
   1371 		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
   1372 		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
   1373 		alphas[6] = 0;
   1374 		alphas[7] = 255;
   1375 	}
   1376 
   1377 	int error = 0;
   1378 	int tempColorIndices[16];
   1379 	int tempAlphaIndices[16];
   1380 	for ( int i = 0; i < 16; i++ ) {
   1381 		ALIGN16( int normal[4] );
   1382 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
   1383 
   1384 		for ( int j = 0; j < 4; j++ ) {
   1385 			normal[0] = colors[j][0];
   1386 			normal[1] = colors[j][1];
   1387 			normal[2] = colors[j][2];
   1388 
   1389 			for ( int k = 0; k < 8; k++ ) {
   1390 				normal[3] = alphas[k];
   1391 				unsigned int dist = NormalDistanceDXT5( normal, &normalBlock[i*4] );
   1392 				if ( dist < minDist ) {
   1393 					minDist = dist;
   1394 					tempColorIndices[i] = j;
   1395 					tempAlphaIndices[i] = k;
   1396 				}
   1397 			}
   1398 		}
   1399 		error += minDist;
   1400 
   1401 		if ( error >= lastError ) {
   1402 			return error;
   1403 		}
   1404 	}
   1405 
   1406 	alphaIndices[0] = byte( (tempAlphaIndices[ 0] >> 0) | (tempAlphaIndices[ 1] << 3) | (tempAlphaIndices[ 2] << 6) );
   1407 	alphaIndices[1] = byte( (tempAlphaIndices[ 2] >> 2) | (tempAlphaIndices[ 3] << 1) | (tempAlphaIndices[ 4] << 4) | (tempAlphaIndices[ 5] << 7) );
   1408 	alphaIndices[2] = byte( (tempAlphaIndices[ 5] >> 1) | (tempAlphaIndices[ 6] << 2) | (tempAlphaIndices[ 7] << 5) );
   1409 
   1410 	alphaIndices[3] = byte( (tempAlphaIndices[ 8] >> 0) | (tempAlphaIndices[ 9] << 3) | (tempAlphaIndices[10] << 6) );
   1411 	alphaIndices[4] = byte( (tempAlphaIndices[10] >> 2) | (tempAlphaIndices[11] << 1) | (tempAlphaIndices[12] << 4) | (tempAlphaIndices[13] << 7) );
   1412 	alphaIndices[5] = byte( (tempAlphaIndices[13] >> 1) | (tempAlphaIndices[14] << 2) | (tempAlphaIndices[15] << 5) );
   1413 
   1414 	colorIndices = 0;
   1415 	for ( int i = 0; i < 16; i++ ) {
   1416 		colorIndices |= ( tempColorIndices[i] << (unsigned int)( i << 1 ) );
   1417 	}
   1418 
   1419 	return error;
   1420 }
   1421 
   1422 /*
   1423 ========================
   1424 idDxtEncoder::GetMinMaxNormalsDXT5HQ
   1425 
   1426 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
   1427 compress the 4x4 block. Also finds the minimum and maximum alpha values.
   1428 
   1429 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
   1430 paramO:	minColor	- 4 byte Min color found
   1431 paramO:	maxColor	- 4 byte Max color found
   1432 ========================
   1433 */
   1434 int idDxtEncoder::GetMinMaxNormalsDXT5HQ( const byte *colorBlock, byte *minColor, byte *maxColor, unsigned int &colorIndices, byte *alphaIndices ) const {
   1435 	int i;
   1436 	int i0, i1, i3, j0, j1, j3;
   1437 	byte bboxMin[4], bboxMax[4], minAxisDist[4];
   1438 	byte tmin[4], tmax[4];
   1439 	int error, bestError = MAX_TYPE( int );
   1440 	unsigned int tempColorIndices;
   1441 	byte tempAlphaIndices[6];
   1442 	ALIGN16( int intColorBlock[16*4] );
   1443 
   1444 	bboxMin[0] = bboxMin[1] = bboxMin[2] = bboxMin[3] = 255;
   1445 	bboxMax[0] = bboxMax[1] = bboxMax[2] = bboxMax[3] = 0;
   1446 
   1447 	// get color bbox
   1448 	for ( i = 0; i < 16; i++ ) {
   1449 		if ( colorBlock[i*4+0] < bboxMin[0] ) {
   1450 			bboxMin[0] = colorBlock[i*4+0];
   1451 		}
   1452 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
   1453 			bboxMin[1] = colorBlock[i*4+1];
   1454 		}
   1455 		if ( colorBlock[i*4+2] < bboxMin[2] ) {
   1456 			bboxMin[2] = colorBlock[i*4+2];
   1457 		}
   1458 		if ( colorBlock[i*4+3] < bboxMin[3] ) {
   1459 			bboxMin[3] = colorBlock[i*4+3];
   1460 		}
   1461 		if ( colorBlock[i*4+0] > bboxMax[0] ) {
   1462 			bboxMax[0] = colorBlock[i*4+0];
   1463 		}
   1464 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
   1465 			bboxMax[1] = colorBlock[i*4+1];
   1466 		}
   1467 		if ( colorBlock[i*4+2] > bboxMax[2] ) {
   1468 			bboxMax[2] = colorBlock[i*4+2];
   1469 		}
   1470 		if ( colorBlock[i*4+3] > bboxMax[3] ) {
   1471 			bboxMax[3] = colorBlock[i*4+3];
   1472 		}
   1473 	}
   1474 
   1475 	for ( int i = 0; i < 64; i++ ) {
   1476 		intColorBlock[i] = colorBlock[i];
   1477 	}
   1478 
   1479 	// decrease range for 565 encoding
   1480 	bboxMin[0] >>= 3;
   1481 	bboxMin[1] >>= 2;
   1482 	bboxMax[0] >>= 3;
   1483 	bboxMax[1] >>= 2;
   1484 
   1485 	// get the minimum distance the end points of the line must be apart along each axis
   1486 	for ( i = 0; i < 4; i++ ) {
   1487 		minAxisDist[i] = 0;
   1488 	}
   1489 
   1490 	// expand the bounding box
   1491 	const int C565_BBOX_EXPAND = 2;
   1492 	const int ALPHA_BBOX_EXPAND = 32;
   1493 
   1494 	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
   1495 	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
   1496 	bboxMin[3] = ( bboxMin[3] <= ALPHA_BBOX_EXPAND ) ? 0 : bboxMin[3] - ALPHA_BBOX_EXPAND;
   1497 	bboxMax[0] = ( bboxMax[0] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[0] + C565_BBOX_EXPAND;
   1498 	bboxMax[1] = ( bboxMax[1] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[1] + C565_BBOX_EXPAND;
   1499 	bboxMax[3] = ( bboxMax[3] >= (255)-ALPHA_BBOX_EXPAND ) ? (255) : bboxMax[3] + ALPHA_BBOX_EXPAND;
   1500 
   1501 	for ( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ ) {
   1502 		for ( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- ) {
   1503 			if ( abs( i0 - j0 ) < minAxisDist[0] ) {
   1504 				continue;
   1505 			}
   1506 
   1507 			for ( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
   1508 				for ( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
   1509 					if ( abs( i1 - j1 ) < minAxisDist[1] ) {
   1510 						continue;
   1511 					}
   1512 
   1513 					tmin[0] = (byte)j0 << 3;
   1514 					tmin[1] = (byte)j1 << 2;
   1515 					tmin[2] = 0;
   1516 
   1517 					tmax[0] = (byte)i0 << 3;
   1518 					tmax[1] = (byte)i1 << 2;
   1519 					tmax[2] = 0;
   1520 
   1521 					for ( i3 = bboxMin[3]; i3 <= bboxMax[3]; i3++ ) {
   1522 						for ( j3 = bboxMax[3]; j3 >= bboxMin[3]; j3-- ) {
   1523 							if ( abs( i3 - j3 ) < minAxisDist[3] ) {
   1524 								continue;
   1525 							}
   1526 
   1527 							tmin[3] = (byte)j3;
   1528 							tmax[3] = (byte)i3;
   1529 
   1530 							error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
   1531 							if ( error < bestError ) {
   1532 								bestError = error;
   1533 								memcpy( minColor, tmin, 4 );
   1534 								memcpy( maxColor, tmax, 4 );
   1535 								colorIndices = tempColorIndices;
   1536 								memcpy( alphaIndices, tempAlphaIndices, 6 );
   1537 							}
   1538 
   1539 							tmin[3] = (byte)i3;
   1540 							tmax[3] = (byte)j3;
   1541 
   1542 							error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
   1543 							if ( error < bestError ) {
   1544 								bestError = error;
   1545 								memcpy( minColor, tmin, 4 );
   1546 								memcpy( maxColor, tmax, 4 );
   1547 								colorIndices = tempColorIndices;
   1548 								memcpy( alphaIndices, tempAlphaIndices, 6 );
   1549 							}
   1550 						}
   1551 					}
   1552 				}
   1553 			}
   1554 		}
   1555 	}
   1556 
   1557 	return bestError;
   1558 }
   1559 
   1560 /*
   1561 ========================
   1562 idDxtEncoder::GetMinMaxNormalsDXT5HQFast
   1563 
   1564 Uses an exhaustive search to find the two RGB colors that produce the least error when used to 
   1565 compress the 4x4 block. Also finds the minimum and maximum alpha values.
   1566 
   1567 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
   1568 paramO:	minColor	- 4 byte Min color found
   1569 paramO:	maxColor	- 4 byte Max color found
   1570 ========================
   1571 */
   1572 int idDxtEncoder::GetMinMaxNormalsDXT5HQFast( const byte *colorBlock, byte *minColor, byte *maxColor, unsigned int &colorIndices, byte *alphaIndices ) const {
   1573 	int i0, i1, i2, i3, j0, j1, j2, j3;
   1574 	byte bboxMin[4], bboxMax[4], minAxisDist[4];
   1575 	byte tmin[4], tmax[4];
   1576 	int error, bestError = MAX_TYPE( int );
   1577 	unsigned int tempColorIndices;
   1578 	byte tempAlphaIndices[6];
   1579 	ALIGN16( int intColorBlock[16*4] );
   1580 
   1581 	bboxMin[0] = bboxMin[1] = bboxMin[2] = bboxMin[3] = 255;
   1582 	bboxMax[0] = bboxMax[1] = bboxMax[2] = bboxMax[3] = 0;
   1583 
   1584 	// get color bbox
   1585 	for ( int i = 0; i < 16; i++ ) {
   1586 		if ( colorBlock[i*4+0] < bboxMin[0] ) {
   1587 			bboxMin[0] = colorBlock[i*4+0];
   1588 		}
   1589 		if ( colorBlock[i*4+1] < bboxMin[1] ) {
   1590 			bboxMin[1] = colorBlock[i*4+1];
   1591 		}
   1592 		if ( colorBlock[i*4+2] < bboxMin[2] ) {
   1593 			bboxMin[2] = colorBlock[i*4+2];
   1594 		}
   1595 		if ( colorBlock[i*4+3] < bboxMin[3] ) {
   1596 			bboxMin[3] = colorBlock[i*4+3];
   1597 		}
   1598 		if ( colorBlock[i*4+0] > bboxMax[0] ) {
   1599 			bboxMax[0] = colorBlock[i*4+0];
   1600 		}
   1601 		if ( colorBlock[i*4+1] > bboxMax[1] ) {
   1602 			bboxMax[1] = colorBlock[i*4+1];
   1603 		}
   1604 		if ( colorBlock[i*4+2] > bboxMax[2] ) {
   1605 			bboxMax[2] = colorBlock[i*4+2];
   1606 		}
   1607 		if ( colorBlock[i*4+3] > bboxMax[3] ) {
   1608 			bboxMax[3] = colorBlock[i*4+3];
   1609 		}
   1610 	}
   1611 
   1612 	for ( int i = 0; i < 64; i++ ) {
   1613 		intColorBlock[i] = colorBlock[i];
   1614 	}
   1615 
   1616 	// decrease range for 565 encoding
   1617 	bboxMin[0] >>= 3;
   1618 	bboxMin[1] >>= 2;
   1619 	bboxMin[2] >>= 3;
   1620 	bboxMax[0] >>= 3;
   1621 	bboxMax[1] >>= 2;
   1622 	bboxMax[2] >>= 3;
   1623 
   1624 	bboxMin[3] = 0;
   1625 	bboxMax[3] = 255;
   1626 
   1627 	// get the minimum distance the end points of the line must be apart along each axis
   1628 	for ( int i = 0; i < 4; i++ ) {
   1629 		minAxisDist[i] = 0;
   1630 	}
   1631 
   1632 	// expand the bounding box
   1633 	const int C565_BBOX_EXPAND = 1;
   1634 	const int ALPHA_BBOX_EXPAND = 128;
   1635 
   1636 #if 0 // object-space
   1637 	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
   1638 	bboxMax[0] = ( bboxMax[0] >= (255>>3)-C565_BBOX_EXPAND ) ? (255>>3) : bboxMax[0] + C565_BBOX_EXPAND;
   1639 	bboxMin[2] = 0;
   1640 	bboxMax[2] = 0;
   1641 #else
   1642 	bboxMin[0] = 0;
   1643 	bboxMax[0] = 0;
   1644 	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
   1645 	bboxMax[2] = ( bboxMax[2] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[2] + C565_BBOX_EXPAND;
   1646 #endif
   1647 
   1648 	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
   1649 	bboxMax[1] = ( bboxMax[1] >= (255>>2)-C565_BBOX_EXPAND ) ? (255>>2) : bboxMax[1] + C565_BBOX_EXPAND;
   1650 
   1651 	bboxMin[3] = ( bboxMin[3] <= ALPHA_BBOX_EXPAND ) ? 0 : bboxMin[3] - ALPHA_BBOX_EXPAND;
   1652 	bboxMax[3] = ( bboxMax[3] >= (255)-ALPHA_BBOX_EXPAND ) ? (255) : bboxMax[3] + ALPHA_BBOX_EXPAND;
   1653 
   1654 	for ( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ ) {
   1655 		for ( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- ) {
   1656 			if ( abs( i0 - j0 ) < minAxisDist[0] ) {
   1657 				continue;
   1658 			}
   1659 
   1660 			for ( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ ) {
   1661 				for ( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- ) {
   1662 					if ( abs( i1 - j1 ) < minAxisDist[1] ) {
   1663 						continue;
   1664 					}
   1665 
   1666 					for ( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ ) {
   1667 						for ( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- ) {
   1668 							if ( abs( i2 - j2 ) < minAxisDist[2] ) {
   1669 								continue;
   1670 							}
   1671 
   1672 							unsigned short minColor565 = (unsigned short)( ( i0 << 11 ) | ( i1 << 5 ) | i2 );
   1673 							unsigned short maxColor565 = (unsigned short)( ( j0 << 11 ) | ( j1 << 5 ) | j2 );
   1674 
   1675 							if ( minColor565 > maxColor565 ) {
   1676 								SwapValues( minColor565, maxColor565 );
   1677 							}
   1678 
   1679 							error = GetSquareNormalsDXT1Error( intColorBlock, maxColor565, minColor565, bestError, tempColorIndices );
   1680 							if ( error < bestError ) {
   1681 								bestError = error;
   1682 								ColorFrom565( minColor565, minColor );
   1683 								ColorFrom565( maxColor565, maxColor );
   1684 								colorIndices = tempColorIndices;
   1685 							}
   1686 						}
   1687 					}
   1688 				}
   1689 			}
   1690 		}
   1691 	}
   1692 
   1693 	bestError = MAX_TYPE( int );
   1694 
   1695 	memcpy( tmin, minColor, 4 );
   1696 	memcpy( tmax, maxColor, 4 );
   1697 
   1698 	for ( i3 = bboxMin[3]; i3 <= bboxMax[3]; i3++ ) {
   1699 		for ( j3 = bboxMax[3]; j3 >= bboxMin[3]; j3-- ) {
   1700 			if ( abs( i3 - j3 ) < minAxisDist[3] ) {
   1701 				continue;
   1702 			}
   1703 
   1704 			tmin[3] = (byte)j3;
   1705 			tmax[3] = (byte)i3;
   1706 
   1707 			error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
   1708 			if ( error < bestError ) {
   1709 				bestError = error;
   1710 				memcpy( minColor, tmin, 4 );
   1711 				memcpy( maxColor, tmax, 4 );
   1712 				colorIndices = tempColorIndices;
   1713 				memcpy( alphaIndices, tempAlphaIndices, 6 );
   1714 			}
   1715 
   1716 			tmin[3] = (byte)i3;
   1717 			tmax[3] = (byte)j3;
   1718 
   1719 			error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
   1720 			if ( error < bestError ) {
   1721 				bestError = error;
   1722 				memcpy( minColor, tmin, 4 );
   1723 				memcpy( maxColor, tmax, 4 );
   1724 				colorIndices = tempColorIndices;
   1725 				memcpy( alphaIndices, tempAlphaIndices, 6 );
   1726 			}
   1727 		}
   1728 	}
   1729 
   1730 	return bestError;
   1731 }
   1732 
   1733 /*
   1734 ========================
   1735 idDxtEncoder::FindColorIndices
   1736 
   1737 params:	colorBlock	- 16 pixel block for which find color indexes
   1738 paramO:	color0		- Min color found
   1739 paramO:	color1		- Max color found
   1740 return: 4 byte color index block
   1741 ========================
   1742 */
   1743 int idDxtEncoder::FindColorIndices( const byte *colorBlock, const unsigned short color0, const unsigned short color1, unsigned int &result ) const {
   1744 	int i, j;
   1745 	unsigned int indexes[16];
   1746 	byte colors[4][4];
   1747 
   1748 	ColorFrom565( color0, colors[0] );
   1749 	ColorFrom565( color1, colors[1] );
   1750 
   1751 	if ( color0 > color1 ) {
   1752 		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   1753 		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   1754 		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   1755 		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   1756 		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   1757 		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   1758 	} else {
   1759 		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
   1760 		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
   1761 		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
   1762 		colors[3][0] = 0;
   1763 		colors[3][1] = 0;
   1764 		colors[3][2] = 0;
   1765 	}
   1766 
   1767 	int error = 0;
   1768 	for ( i = 0; i < 16; i++ ) {
   1769 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
   1770 		for ( j = 0; j < 4; j++ ) {
   1771 			unsigned int dist = ColorDistance( &colorBlock[i*4], &colors[j][0] );
   1772 			if ( dist < minDist ) {
   1773 				minDist = dist;
   1774 				indexes[i] = j;
   1775 			}
   1776 		}
   1777 		// accumulated error
   1778 		error += minDist;
   1779 	}
   1780 
   1781 	result = 0;
   1782 	for ( i = 0; i < 16; i++ ) {
   1783 		result |= ( indexes[i] << (unsigned int)( i << 1 ) );
   1784 	}
   1785 
   1786 	return error;
   1787 }
   1788 
   1789 /*
   1790 ========================
   1791 idDxtEncoder::FindAlphaIndices
   1792 
   1793 params:	colorBlock	- 16 pixel block for which find alpha indexes
   1794 paramO:	alpha0		- Min alpha found
   1795 paramO:	alpha1		- Max alpha found
   1796 params:	rindexes	- 6 byte alpha index block
   1797 return: error metric for this compression
   1798 ========================
   1799 */
   1800 int idDxtEncoder::FindAlphaIndices( const byte *colorBlock, const int alphaOffset, const byte alpha0, const byte alpha1, byte *rindexes ) const {
   1801 	int i, j;
   1802 	unsigned int indexes[16];
   1803 	byte alphas[8];
   1804 
   1805 	alphas[0] = alpha0;
   1806 	alphas[1] = alpha1;
   1807 	if ( alpha0 > alpha1 ) {
   1808 		alphas[2] = ( 6 * alpha0 + 1 * alpha1 ) / 7;
   1809 		alphas[3] = ( 5 * alpha0 + 2 * alpha1 ) / 7;
   1810 		alphas[4] = ( 4 * alpha0 + 3 * alpha1 ) / 7;
   1811 		alphas[5] = ( 3 * alpha0 + 4 * alpha1 ) / 7;
   1812 		alphas[6] = ( 2 * alpha0 + 5 * alpha1 ) / 7;
   1813 		alphas[7] = ( 1 * alpha0 + 6 * alpha1 ) / 7;
   1814 	} else {
   1815 		alphas[2] = ( 4 * alpha0 + 1 * alpha1 ) / 5;
   1816 		alphas[3] = ( 3 * alpha0 + 2 * alpha1 ) / 5;
   1817 		alphas[4] = ( 2 * alpha0 + 3 * alpha1 ) / 5;
   1818 		alphas[5] = ( 1 * alpha0 + 4 * alpha1 ) / 5;
   1819 		alphas[6] = 0;
   1820 		alphas[7] = 255;
   1821 	}
   1822 
   1823 	int error = 0;
   1824 	for ( i = 0; i < 16; i++ ) {
   1825 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
   1826 		byte a = colorBlock[i*4+alphaOffset];
   1827 		for ( j = 0; j < 8; j++ ) {
   1828 			unsigned int dist = AlphaDistance( a, alphas[j] );
   1829 			if ( dist < minDist ) {
   1830 				minDist = dist;
   1831 				indexes[i] = j;
   1832 			}
   1833 		}
   1834 		error += minDist;
   1835 	}
   1836 
   1837 	rindexes[0] = byte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   1838 	rindexes[1] = byte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   1839 	rindexes[2] = byte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   1840 
   1841 	rindexes[3] = byte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   1842 	rindexes[4] = byte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   1843 	rindexes[5] = byte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   1844 
   1845 	return error;
   1846 }
   1847 
   1848 /*
   1849 ========================
   1850 idDxtEncoder::FindCTX1Indices
   1851 
   1852 params:	colorBlock	- 16 pixel block for which find color indexes
   1853 paramO:	color0		- Min color found
   1854 paramO:	color1		- Max color found
   1855 return: 4 byte color index block
   1856 ========================
   1857 */
   1858 int idDxtEncoder::FindCTX1Indices( const byte *colorBlock, const byte *color0, const byte *color1, unsigned int &result ) const {
   1859 	int i, j;
   1860 	unsigned int indexes[16];
   1861 	byte colors[4][4];
   1862 
   1863 	colors[0][0] = color1[0];
   1864 	colors[0][1] = color1[1];
   1865 	colors[1][0] = color0[0];
   1866 	colors[1][1] = color0[1];
   1867 
   1868 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   1869 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   1870 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   1871 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   1872 
   1873 	int error = 0;
   1874 	for ( i = 0; i < 16; i++ ) {
   1875 		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
   1876 		for ( j = 0; j < 4; j++ ) {
   1877 			unsigned int dist = CTX1Distance( &colorBlock[i*4], &colors[j][0] );
   1878 			if ( dist < minDist ) {
   1879 				minDist = dist;
   1880 				indexes[i] = j;
   1881 			}
   1882 		}
   1883 		// accumulated error
   1884 		error += minDist;
   1885 	}
   1886 
   1887 	result = 0;
   1888 	for ( i = 0; i < 16; i++ ) {
   1889 		result |= ( indexes[i] << (unsigned int)( i << 1 ) );
   1890 	}
   1891 
   1892 	return error;
   1893 }
   1894 
   1895 /*
   1896 ========================
   1897 idDxtEncoder::CompressImageDXT1HQ
   1898 
   1899 params:	inBuf		- image to compress
   1900 paramO:	outBuf		- result of compression
   1901 params:	width		- width of image
   1902 params:	height		- height of image
   1903 ========================
   1904 */
   1905 void idDxtEncoder::CompressImageDXT1HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   1906 	ALIGN16( byte block[64] );
   1907 	unsigned int colorIndices1;
   1908 	unsigned int colorIndices2;
   1909 	byte col1[4];
   1910 	byte col2[4];
   1911 	int error1;
   1912 	int error2;
   1913 
   1914 	this->width = width;
   1915 	this->height = height;
   1916 	this->outData = outBuf;
   1917 
   1918 	if ( width > 4 && ( width & 3 ) != 0 ) {
   1919 		return;
   1920 	}
   1921 	if ( height > 4 && ( height & 3 ) != 0 ) {
   1922 		return;
   1923 	}
   1924 
   1925 	if ( width < 4 || height < 4 ) {
   1926 		WriteTinyColorDXT1( inBuf, width, height );
   1927 		return;
   1928 	}
   1929 
   1930 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   1931 		for ( int i = 0; i < width; i += 4 ) {
   1932 
   1933 			ExtractBlock( inBuf + i * 4, width, block );
   1934 
   1935 			GetMinMaxColorsHQ( block, col1, col2, false );
   1936 
   1937 			// Write out color data. Try and find minimum error for the two encoding methods.
   1938 			unsigned short scol1 = ColorTo565( col1 );
   1939 			unsigned short scol2 = ColorTo565( col2 );
   1940 
   1941 			error1 = FindColorIndices( block, scol1, scol2, colorIndices1 );
   1942 			error2 = FindColorIndices( block, scol2, scol1, colorIndices2 );
   1943 
   1944 			if ( error1 < error2 ) {
   1945 
   1946 				EmitUShort( scol1 );
   1947 				EmitUShort( scol2 );
   1948 				EmitUInt( colorIndices1 );
   1949 
   1950 			} else {
   1951 
   1952 				EmitUShort( scol2 );
   1953 				EmitUShort( scol1 );
   1954 				EmitUInt( colorIndices2 );
   1955 			}
   1956 
   1957 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   1958 		}
   1959 		outData += dstPadding;
   1960 		inBuf += srcPadding;
   1961 	}
   1962 
   1963 	//idLib::Printf( "\r100%%\n" );
   1964 }
   1965 
   1966 /*
   1967 ========================
   1968 idDxtEncoder::CompressImageDXT5HQ
   1969 
   1970 params:	inBuf		- image to compress
   1971 paramO:	outBuf		- result of compression
   1972 params:	width		- width of image
   1973 params:	height		- height of image
   1974 ========================
   1975 */
   1976 void idDxtEncoder::CompressImageDXT5HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   1977 	ALIGN16( byte block[64] );
   1978 	byte alphaIndices1[6];
   1979 	byte alphaIndices2[6];
   1980 	unsigned int colorIndices;
   1981 	byte col1[4];
   1982 	byte col2[4];
   1983 	int error1;
   1984 	int error2;
   1985 
   1986 	this->width = width;
   1987 	this->height = height;
   1988 	this->outData = outBuf;
   1989 
   1990 	if ( width > 4 && ( width & 3 ) != 0 ) {
   1991 		return;
   1992 	}
   1993 	if ( height > 4 && ( height & 3 ) != 0 ) {
   1994 		return;
   1995 	}
   1996 
   1997 	if ( width < 4 || height < 4 ) {
   1998 		WriteTinyColorDXT5( inBuf, width, height );
   1999 		return;
   2000 	}
   2001 
   2002 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2003 		for ( int i = 0; i < width; i += 4 ) {
   2004 
   2005 			ExtractBlock( inBuf + i * 4, width, block );
   2006 
   2007 			GetMinMaxColorsHQ( block, col1, col2, true );
   2008 			GetMinMaxAlphaHQ( block, 3, col1, col2 );
   2009 
   2010 			// Write out alpha data. Try and find minimum error for the two encoding methods.
   2011 			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
   2012 			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
   2013 
   2014 			if ( error1 < error2 ) {
   2015 
   2016 				EmitByte( col1[3] );
   2017 				EmitByte( col2[3] );
   2018 				EmitByte( alphaIndices1[0] );
   2019 				EmitByte( alphaIndices1[1] );
   2020 				EmitByte( alphaIndices1[2] );
   2021 				EmitByte( alphaIndices1[3] );
   2022 				EmitByte( alphaIndices1[4] );
   2023 				EmitByte( alphaIndices1[5] );
   2024 
   2025 			} else {
   2026 
   2027 				EmitByte( col2[3] );
   2028 				EmitByte( col1[3] );
   2029 				EmitByte( alphaIndices2[0] );
   2030 				EmitByte( alphaIndices2[1] );
   2031 				EmitByte( alphaIndices2[2] );
   2032 				EmitByte( alphaIndices2[3] );
   2033 				EmitByte( alphaIndices2[4] );
   2034 				EmitByte( alphaIndices2[5] );
   2035 			}
   2036 
   2037 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   2038 			NV4XHardwareBugFix( col2, col1 );
   2039 #endif
   2040 
   2041 			// Write out color data. Always take the path with 4 interpolated values.
   2042 			unsigned short scol1 = ColorTo565( col1 );
   2043 			unsigned short scol2 = ColorTo565( col2 );
   2044 
   2045 			EmitUShort( scol1 );
   2046 			EmitUShort( scol2 );
   2047 
   2048 			FindColorIndices( block, scol1, scol2, colorIndices );
   2049 			EmitUInt( colorIndices );
   2050 
   2051 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2052 		}
   2053 		outData += dstPadding;
   2054 		inBuf += srcPadding;
   2055 	}
   2056 
   2057 	//idLib::Printf( "\r100%%\n" );
   2058 }
   2059 
   2060 /*
   2061 ========================
   2062 idDxtEncoder::CompressImageCTX1HQ
   2063 
   2064 params:	inBuf		- image to compress
   2065 paramO:	outBuf		- result of compression
   2066 params:	width		- width of image
   2067 params:	height		- height of image
   2068 ========================
   2069 */
   2070 void idDxtEncoder::CompressImageCTX1HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2071 	ALIGN16( byte block[64] );
   2072 	unsigned int colorIndices;
   2073 	byte col1[4];
   2074 	byte col2[4];
   2075 
   2076 	this->width = width;
   2077 	this->height = height;
   2078 	this->outData = outBuf;
   2079 
   2080 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2081 		return;
   2082 	}
   2083 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2084 		return;
   2085 	}
   2086 
   2087 	if ( width < 4 || height < 4 ) {
   2088 		WriteTinyColorCTX1DXT5A( inBuf, width, height );
   2089 		return;
   2090 	}
   2091 
   2092 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2093 		for ( int i = 0; i < width; i += 4 ) {
   2094 
   2095 			ExtractBlock( inBuf + i * 4, width, block );
   2096 
   2097 			GetMinMaxCTX1HQ( block, col1, col2 );
   2098 
   2099 			EmitByte( col2[0] );
   2100 			EmitByte( col2[1] );
   2101 			EmitByte( col1[0] );
   2102 			EmitByte( col1[1] );
   2103 
   2104 			FindCTX1Indices( block, col1, col2, colorIndices );
   2105 			EmitUInt( colorIndices );
   2106 
   2107 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2108 		}
   2109 		outData += dstPadding;
   2110 		inBuf += srcPadding;
   2111 	}
   2112 
   2113 	//idLib::Printf( "\r100%%\n" );
   2114 }
   2115 
   2116 /*
   2117 ========================
   2118 idDxtEncoder::ScaleYCoCg
   2119 
   2120 params:	colorBlock	- 16 pixel block for which find color indexes
   2121 ========================
   2122 */
   2123 void idDxtEncoder::ScaleYCoCg( byte *colorBlock ) const {
   2124 	ALIGN16( byte minColor[4] );
   2125 	ALIGN16( byte maxColor[4] );
   2126 
   2127 	minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
   2128 	maxColor[0] = maxColor[1] = maxColor[2] = maxColor[3] = 0;
   2129 
   2130 	for ( int i = 0; i < 16; i++ ) {
   2131 		if ( colorBlock[i*4+0] < minColor[0] ) {
   2132 			minColor[0] = colorBlock[i*4+0];
   2133 		}
   2134 		if ( colorBlock[i*4+1] < minColor[1] ) {
   2135 			minColor[1] = colorBlock[i*4+1];
   2136 		}
   2137 		if ( colorBlock[i*4+0] > maxColor[0] ) {
   2138 			maxColor[0] = colorBlock[i*4+0];
   2139 		}
   2140 		if ( colorBlock[i*4+1] > maxColor[1] ) {
   2141 			maxColor[1] = colorBlock[i*4+1];
   2142 		}
   2143 	}
   2144 
   2145 	int m0 = abs( minColor[0] - 128 );
   2146 	int m1 = abs( minColor[1] - 128 );
   2147 	int m2 = abs( maxColor[0] - 128 );
   2148 	int m3 = abs( maxColor[1] - 128 );
   2149 
   2150 	if ( m1 > m0 ) m0 = m1;
   2151 	if ( m3 > m2 ) m2 = m3;
   2152 	if ( m2 > m0 ) m0 = m2;
   2153 
   2154 	const int s0 = 128 / 2 - 1;
   2155 	const int s1 = 128 / 4 - 1;
   2156 
   2157 	int scale = 1 + ( m0 <= s0 ) + 2 * ( m0 <= s1 );
   2158 
   2159 	for ( int i = 0; i < 16; i++ ) {
   2160 		colorBlock[i*4+0] = byte( ( colorBlock[i*4+0] - 128 ) * scale + 128 );
   2161 		colorBlock[i*4+1] = byte( ( colorBlock[i*4+1] - 128 ) * scale + 128 );
   2162 		colorBlock[i*4+2] = byte( ( scale - 1 ) << 3 );
   2163 	}
   2164 }
   2165 
   2166 /*
   2167 ========================
   2168 idDxtEncoder::CompressYCoCgDXT5HQ
   2169 
   2170 params:	inBuf		- image to compress
   2171 paramO:	outBuf		- result of compression
   2172 params:	width		- width of image
   2173 params:	height		- height of image
   2174 ========================
   2175 */
   2176 void idDxtEncoder::CompressYCoCgDXT5HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2177 	ALIGN16( byte block[64] );
   2178 	byte alphaIndices1[6];
   2179 	byte alphaIndices2[6];
   2180 	unsigned int colorIndices;
   2181 	byte col1[4];
   2182 	byte col2[4];
   2183 	int error1;
   2184 	int error2;
   2185 
   2186 	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
   2187 
   2188 	this->width = width;
   2189 	this->height = height;
   2190 	this->outData = outBuf;
   2191 
   2192 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2193 		return;
   2194 	}
   2195 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2196 		return;
   2197 	}
   2198 
   2199 	if ( width < 4 || height < 4 ) {
   2200 		WriteTinyColorDXT5( inBuf, width, height );
   2201 		return;
   2202 	}
   2203 
   2204 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2205 		for ( int i = 0; i < width; i += 4 ) {
   2206 
   2207 			ExtractBlock( inBuf + i * 4, width, block );
   2208 			ScaleYCoCg( block );
   2209 
   2210 			GetMinMaxColorsHQ( block, col1, col2, true );
   2211 			GetMinMaxAlphaHQ( block, 3, col1, col2 );
   2212 
   2213 			// Write out alpha data. Try and find minimum error for the two encoding methods.
   2214 			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
   2215 			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
   2216 
   2217 			if ( error1 < error2 ) {
   2218 
   2219 				EmitByte( col1[3] );
   2220 				EmitByte( col2[3] );
   2221 				EmitByte( alphaIndices1[0] );
   2222 				EmitByte( alphaIndices1[1] );
   2223 				EmitByte( alphaIndices1[2] );
   2224 				EmitByte( alphaIndices1[3] );
   2225 				EmitByte( alphaIndices1[4] );
   2226 				EmitByte( alphaIndices1[5] );
   2227 
   2228 			} else {
   2229 
   2230 				EmitByte( col2[3] );
   2231 				EmitByte( col1[3] );
   2232 				EmitByte( alphaIndices2[0] );
   2233 				EmitByte( alphaIndices2[1] );
   2234 				EmitByte( alphaIndices2[2] );
   2235 				EmitByte( alphaIndices2[3] );
   2236 				EmitByte( alphaIndices2[4] );
   2237 				EmitByte( alphaIndices2[5] );
   2238 			}
   2239 
   2240 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   2241 			NV4XHardwareBugFix( col2, col1 );
   2242 #endif
   2243 
   2244 			// Write out color data. Always take the path with 4 interpolated values.
   2245 			unsigned short scol1 = ColorTo565( col1 );
   2246 			unsigned short scol2 = ColorTo565( col2 );
   2247 
   2248 			EmitUShort( scol1 );
   2249 			EmitUShort( scol2 );
   2250 
   2251 			FindColorIndices( block, scol1, scol2, colorIndices );
   2252 			EmitUInt( colorIndices );
   2253 
   2254 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2255 		}
   2256 		outData += dstPadding;
   2257 		inBuf += srcPadding;
   2258 	}
   2259 
   2260 	//idLib::Printf( "\r100%%\n" );
   2261 }
   2262 
   2263 /*
   2264 ========================
   2265 idDxtEncoder::CompressYCoCgCTX1DXT5AHQ
   2266 
   2267 params:	inBuf		- image to compress
   2268 paramO:	outBuf		- result of compression
   2269 params:	width		- width of image
   2270 params:	height		- height of image
   2271 ========================
   2272 */
   2273 void idDxtEncoder::CompressYCoCgCTX1DXT5AHQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2274 	ALIGN16( byte block[64] );
   2275 	byte alphaIndices1[6];
   2276 	byte alphaIndices2[6];
   2277 	unsigned int colorIndices;
   2278 	byte col1[4];
   2279 	byte col2[4];
   2280 	int error1;
   2281 	int error2;
   2282 
   2283 	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
   2284 
   2285 	this->width = width;
   2286 	this->height = height;
   2287 	this->outData = outBuf;
   2288 
   2289 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2290 		return;
   2291 	}
   2292 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2293 		return;
   2294 	}
   2295 
   2296 	if ( width < 4 || height < 4 ) {
   2297 		WriteTinyColorCTX1DXT5A( inBuf, width, height );
   2298 		return;
   2299 	}
   2300 
   2301 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2302 		for ( int i = 0; i < width; i += 4 ) {
   2303 
   2304 			ExtractBlock( inBuf + i * 4, width, block );
   2305 
   2306 			GetMinMaxAlphaHQ( block, 3, col1, col2 );
   2307 
   2308 			// Write out alpha data. Try and find minimum error for the two encoding methods.
   2309 			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
   2310 			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
   2311 
   2312 			if ( error1 < error2 ) {
   2313 
   2314 				EmitByte( col1[3] );
   2315 				EmitByte( col2[3] );
   2316 				EmitByte( alphaIndices1[0] );
   2317 				EmitByte( alphaIndices1[1] );
   2318 				EmitByte( alphaIndices1[2] );
   2319 				EmitByte( alphaIndices1[3] );
   2320 				EmitByte( alphaIndices1[4] );
   2321 				EmitByte( alphaIndices1[5] );
   2322 
   2323 			} else {
   2324 
   2325 				EmitByte( col2[3] );
   2326 				EmitByte( col1[3] );
   2327 				EmitByte( alphaIndices2[0] );
   2328 				EmitByte( alphaIndices2[1] );
   2329 				EmitByte( alphaIndices2[2] );
   2330 				EmitByte( alphaIndices2[3] );
   2331 				EmitByte( alphaIndices2[4] );
   2332 				EmitByte( alphaIndices2[5] );
   2333 			}
   2334 
   2335 			GetMinMaxCTX1HQ( block, col1, col2 );
   2336 
   2337 			EmitByte( col2[0] );
   2338 			EmitByte( col2[1] );
   2339 			EmitByte( col1[0] );
   2340 			EmitByte( col1[1] );
   2341 
   2342 			FindCTX1Indices( block, col1, col2, colorIndices );
   2343 			EmitUInt( colorIndices );
   2344 
   2345 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2346 		}
   2347 		outData += dstPadding;
   2348 		inBuf += srcPadding;
   2349 	}
   2350 
   2351 	//idLib::Printf( "\r100%%\n" );
   2352 }
   2353 
   2354 /*
   2355 ========================
   2356 idDxtEncoder::RotateNormalsDXT1
   2357 ========================
   2358 */
   2359 void idDxtEncoder::RotateNormalsDXT1( byte *block ) const {
   2360 	byte rotatedBlock[64];
   2361 	byte col1[4];
   2362 	byte col2[4];
   2363 	int bestError = MAX_TYPE( int );
   2364 	int bestRotation = 0;
   2365 
   2366 	for ( int i = 0; i < 32; i += 1 ) {
   2367 		int r = ( i << 3 ) | ( i >> 2 );
   2368 		float angle = ( r / 255.0f ) * idMath::PI;
   2369 		float s = sin( angle );
   2370 		float c = cos( angle );
   2371 
   2372 		for ( int j = 0; j < 16; j++ ) {
   2373 			float x = block[j*4+0] / 255.0f * 2.0f - 1.0f;
   2374 			float y = block[j*4+1] / 255.0f * 2.0f - 1.0f;
   2375 			float rx = c * x - s * y;
   2376 			float ry = s * x + c * y;
   2377 			rotatedBlock[j*4+0] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
   2378 			rotatedBlock[j*4+1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
   2379 		}
   2380 
   2381 		int error = GetMinMaxColorsHQ( rotatedBlock, col1, col2, true );
   2382 		if ( error < bestError ) {
   2383 			bestError = error;
   2384 			bestRotation = r;
   2385 		}
   2386 	}
   2387 
   2388 	float angle = ( bestRotation / 255.0f ) * idMath::PI;
   2389 	float s = sin( angle );
   2390 	float c = cos( angle );
   2391 
   2392 	for ( int j = 0; j < 16; j++ ) {
   2393 		float x = block[j*4+0] / 255.0f * 2.0f - 1.0f;
   2394 		float y = block[j*4+1] / 255.0f * 2.0f - 1.0f;
   2395 		float rx = c * x - s * y;
   2396 		float ry = s * x + c * y;
   2397 		block[j*4+0] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
   2398 		block[j*4+1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
   2399 		block[j*4+2] = (byte)bestRotation;
   2400 	}
   2401 }
   2402 
   2403 /*
   2404 ========================
   2405 idDxtEncoder::CompressNormalMapDXT1HQ
   2406 
   2407 params:	inBuf		- image to compress
   2408 paramO:	outBuf		- result of compression
   2409 params:	width		- width of image
   2410 params:	height		- height of image
   2411 ========================
   2412 */
   2413 void idDxtEncoder::CompressNormalMapDXT1HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2414 	ALIGN16( byte block[64] );
   2415 	unsigned int colorIndices;
   2416 	byte col1[4];
   2417 	byte col2[4];
   2418 
   2419 	this->width = width;
   2420 	this->height = height;
   2421 	this->outData = outBuf;
   2422 
   2423 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2424 		return;
   2425 	}
   2426 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2427 		return;
   2428 	}
   2429 
   2430 	if ( width < 4 || height < 4 ) {
   2431 		WriteTinyColorDXT1( inBuf, width, height );
   2432 		return;
   2433 	}
   2434 
   2435 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2436 		for ( int i = 0; i < width; i += 4 ) {
   2437 
   2438 			ExtractBlock( inBuf + i * 4, width, block );
   2439 
   2440 			for ( int k = 0; k < 16; k++ ) {
   2441 				block[k*4+2] = 0;
   2442 			}
   2443 
   2444 			GetMinMaxColorsHQ( block, col1, col2, true );
   2445 
   2446 			// Write out color data. Always take the path with 4 interpolated values.
   2447 			unsigned short scol1 = ColorTo565( col1 );
   2448 			unsigned short scol2 = ColorTo565( col2 );
   2449 
   2450 			EmitUShort( scol1 );
   2451 			EmitUShort( scol2 );
   2452 
   2453 			FindColorIndices( block, scol1, scol2, colorIndices );
   2454 			EmitUInt( colorIndices );
   2455 
   2456 			//idLib::Printf( "\r%3d%%", ( j * width + i * 4 ) * 100 / ( width * height ) );
   2457 		}
   2458 		outData += dstPadding;
   2459 		inBuf += srcPadding;
   2460 	}
   2461 
   2462 	//idLib::Printf( "\r100%%\n" );
   2463 }
   2464 
   2465 /*
   2466 ========================
   2467 idDxtEncoder::CompressNormalMapDXT1RenormalizeHQ
   2468 
   2469 params:	inBuf		- image to compress
   2470 paramO:	outBuf		- result of compression
   2471 params:	width		- width of image
   2472 params:	height		- height of image
   2473 ========================
   2474 */
   2475 void idDxtEncoder::CompressNormalMapDXT1RenormalizeHQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2476 	ALIGN16( byte block[64] );
   2477 	unsigned int colorIndices;
   2478 	byte col1[4];
   2479 	byte col2[4];
   2480 
   2481 	this->width = width;
   2482 	this->height = height;
   2483 	this->outData = outBuf;
   2484 
   2485 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2486 		return;
   2487 	}
   2488 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2489 		return;
   2490 	}
   2491 
   2492 	if ( width < 4 || height < 4 ) {
   2493 		WriteTinyColorDXT1( inBuf, width, height );
   2494 		return;
   2495 	}
   2496 
   2497 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2498 		for ( int i = 0; i < width; i += 4 ) {
   2499 
   2500 			ExtractBlock( inBuf + i * 4, width, block );
   2501 
   2502 			// clear alpha channel
   2503 			for ( int k = 0; k < 16; k++ ) {
   2504 				block[k*4+3] = 0;
   2505 			}
   2506 
   2507 			GetMinMaxNormalsDXT1HQ( block, col1, col2, colorIndices, true );
   2508 
   2509 			// Write out color data. Always take the path with 4 interpolated values.
   2510 			unsigned short scol1 = ColorTo565( col1 );
   2511 			unsigned short scol2 = ColorTo565( col2 );
   2512 
   2513 			EmitUShort( scol1 );
   2514 			EmitUShort( scol2 );
   2515 			EmitUInt( colorIndices );
   2516 
   2517 			////idLib::Printf( "\r%3d%%", ( j * width + i * 4 ) * 100 / ( width * height ) );
   2518 		}
   2519 		outData += dstPadding;
   2520 		inBuf += srcPadding;
   2521 	}
   2522 
   2523 	////idLib::Printf( "\r100%%\n" );
   2524 }
   2525 
   2526 #define USE_SCALE		1
   2527 #define USE_BIAS		1
   2528 
   2529 static int c_blocks;
   2530 static int c_scaled;
   2531 static int c_scaled2x;
   2532 static int c_scaled4x;
   2533 static int c_differentBias;
   2534 static int c_biasHelped;
   2535 
   2536 /*
   2537 ========================
   2538 idDxtEncoder::BiasScaleNormalY
   2539 
   2540 	* scale2x = 33%
   2541 	* scale4x = 23%
   2542 	* bias + scale2x = 30%
   2543 	* bias + scale4x = 55%
   2544 ========================
   2545 */
   2546 void idDxtEncoder::BiasScaleNormalY( byte *colorBlock ) const {
   2547 
   2548 	byte minColor = 255;
   2549 	byte maxColor = 0;
   2550 
   2551 	for ( int i = 0; i < 16; i++ ) {
   2552 		if ( colorBlock[i*4+1] < minColor ) {
   2553 			minColor = colorBlock[i*4+1];
   2554 		}
   2555 		if ( colorBlock[i*4+1] > maxColor ) {
   2556 			maxColor = colorBlock[i*4+1];
   2557 		}
   2558 	}
   2559 
   2560 	int bestBias = 128;
   2561 	int bestRange = Max( abs( minColor - bestBias ), abs( maxColor - bestBias ) );
   2562 #if USE_BIAS
   2563 	for ( int i = 0; i < 32; i++ ) {
   2564 		int bias = ( ( i << 3 ) | ( i >> 2 ) ) - 4;
   2565 		int range = Max( abs( minColor - bias ), abs( maxColor - bias ) );
   2566 		if ( range < bestRange ) {
   2567 			bestRange = range;
   2568 			bestBias = bias;
   2569 		}
   2570 	}
   2571 #endif
   2572 
   2573 	const int s0 = 128 / 2 - 1;
   2574 	const int s1 = 128 / 4 - 1;
   2575 
   2576 #if USE_SCALE
   2577 	int scale = 1 + ( bestRange <= s0 ) + 2 * ( bestRange <= s1 );
   2578 #else
   2579 	int scale = 1;
   2580 #endif
   2581 
   2582 	if ( scale == 1 ) {
   2583 		bestBias = 128;
   2584 	} else {
   2585 		c_scaled++;
   2586 		if ( scale == 2 ) c_scaled2x++;
   2587 		if ( scale == 4 ) c_scaled4x++;
   2588 		if ( bestBias != 128 ) {
   2589 			c_differentBias++;
   2590 			int r = Max( abs( minColor - 128 ), abs( maxColor - 128 ) );
   2591 			int s = 1 + ( r <= s0 ) + 2 * ( r <= s1 );
   2592 			if ( scale > s ) {
   2593 				c_biasHelped++;
   2594 			}
   2595 		}
   2596 	}
   2597 
   2598 	c_blocks++;
   2599 
   2600 	for ( int i = 0; i < 16; i++ ) {
   2601 		colorBlock[i*4+0] = byte( bestBias + 4 );
   2602 		colorBlock[i*4+1] = byte( ( colorBlock[i*4+1] - bestBias ) * scale + 128 );
   2603 		colorBlock[i*4+2] = byte( ( scale - 1 ) << 3 );
   2604 	}
   2605 }
   2606 
   2607 /*
   2608 ========================
   2609 idDxtEncoder::RotateNormalsDXT5
   2610 ========================
   2611 */
   2612 void idDxtEncoder::RotateNormalsDXT5( byte *block ) const {
   2613 	byte rotatedBlock[64];
   2614 	byte col1[4];
   2615 	byte col2[4];
   2616 	int bestError = MAX_TYPE( int );
   2617 	int bestRotation = 0;
   2618 	int bestScale = 1;
   2619 
   2620 	for ( int i = 0; i < 32; i += 1 ) {
   2621 		int r = ( i << 3 ) | ( i >> 2 );
   2622 		float angle = ( r / 255.0f ) * idMath::PI;
   2623 		float s = sin( angle );
   2624 		float c = cos( angle );
   2625 
   2626 		for ( int j = 0; j < 16; j++ ) {
   2627 			float x = block[j*4+3] / 255.0f * 2.0f - 1.0f;
   2628 			float y = block[j*4+1] / 255.0f * 2.0f - 1.0f;
   2629 			float rx = c * x - s * y;
   2630 			float ry = s * x + c * y;
   2631 			rotatedBlock[j*4+3] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
   2632 			rotatedBlock[j*4+1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
   2633 		}
   2634 
   2635 #if USE_SCALE
   2636 		byte minColor = 255;
   2637 		byte maxColor = 0;
   2638 
   2639 		for ( int j = 0; j < 16; j++ ) {
   2640 			if ( rotatedBlock[j*4+1] < minColor ) {
   2641 				minColor = rotatedBlock[j*4+1];
   2642 			}
   2643 			if ( rotatedBlock[j*4+1] > maxColor ) {
   2644 				maxColor = rotatedBlock[j*4+1];
   2645 			}
   2646 		}
   2647 
   2648 		const int s0 = 128 / 2 - 1;
   2649 		const int s1 = 128 / 4 - 1;
   2650 
   2651 		int range = Max( abs( minColor - 128 ), abs( maxColor - 128 ) );
   2652 		int scale = 1 + ( range <= s0 ) + 2 * ( range <= s1 );
   2653 
   2654 		for ( int j = 0; j < 16; j++ ) {
   2655 			rotatedBlock[j*4+1] = byte( ( rotatedBlock[j*4+1] - 128 ) * scale + 128 );
   2656 		}
   2657 #endif
   2658 
   2659 		int errorY = GetMinMaxNormalYHQ( rotatedBlock, col1, col2, true, scale );
   2660 		int errorX = GetMinMaxAlphaHQ( rotatedBlock, 3, col1, col2 );
   2661 		int error = errorX + errorY;
   2662 		if ( error < bestError ) {
   2663 			bestError = error;
   2664 			bestRotation = r;
   2665 			bestScale = scale;
   2666 		}
   2667 	}
   2668 
   2669 	float angle = ( bestRotation / 255.0f ) * idMath::PI;
   2670 	float s = sin( angle );
   2671 	float c = cos( angle );
   2672 
   2673 	for ( int j = 0; j < 16; j++ ) {
   2674 		float x = block[j*4+3] / 255.0f * 2.0f - 1.0f;
   2675 		float y = block[j*4+1] / 255.0f * 2.0f - 1.0f;
   2676 		float rx = c * x - s * y;
   2677 		float ry = s * x + c * y;
   2678 		block[j*4+0] = (byte)bestRotation;
   2679 		block[j*4+1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
   2680 		block[j*4+3] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
   2681 
   2682 #if USE_SCALE
   2683 		block[j*4+1] = byte( ( block[j*4+1] - 128 ) * bestScale + 128 );
   2684 		block[j*4+2] = byte( ( bestScale - 1 ) << 3 );
   2685 #endif
   2686 	}
   2687 }
   2688 
   2689 /*
   2690 ========================
   2691 idDxtEncoder::CompressNormalMapDXT5HQ
   2692 
   2693 params:	inBuf		- image to compress
   2694 paramO:	outBuf		- result of compression
   2695 params:	width		- width of image
   2696 params:	height		- height of image
   2697 ========================
   2698 */
   2699 void idDxtEncoder::CompressNormalMapDXT5HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2700 	ALIGN16( byte block[64] );
   2701 	byte alphaIndices1[6];
   2702 	byte alphaIndices2[6];
   2703 	unsigned int colorIndices;
   2704 	byte col1[4];
   2705 	byte col2[4];
   2706 	int error1;
   2707 	int error2;
   2708 
   2709 	this->width = width;
   2710 	this->height = height;
   2711 	this->outData = outBuf;
   2712 
   2713 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2714 		return;
   2715 	}
   2716 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2717 		return;
   2718 	}
   2719 
   2720 	if ( width < 4 || height < 4 ) {
   2721 		WriteTinyColorDXT5( inBuf, width, height );
   2722 		return;
   2723 	}
   2724 
   2725 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2726 		for ( int i = 0; i < width; i += 4 ) {
   2727 
   2728 			ExtractBlock( inBuf + i * 4, width, block );
   2729 
   2730 			// swizzle components
   2731 			for ( int k = 0; k < 16; k++ ) {
   2732 				block[k*4+3] = block[k*4+0];
   2733 				block[k*4+0] = 0;
   2734 				block[k*4+2] = 0;
   2735 			}
   2736 
   2737 			//BiasScaleNormalY( block );
   2738 			//RotateNormalsDXT5( block );
   2739 
   2740 			GetMinMaxNormalYHQ( block, col1, col2, true, 1 );
   2741 			GetMinMaxAlphaHQ( block, 3, col1, col2 );
   2742 
   2743 			// Write out alpha data. Try and find minimum error for the two encoding methods.
   2744 			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
   2745 			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
   2746 
   2747 			if ( error1 < error2 ) {
   2748 
   2749 				EmitByte( col1[3] );
   2750 				EmitByte( col2[3] );
   2751 				EmitByte( alphaIndices1[0] );
   2752 				EmitByte( alphaIndices1[1] );
   2753 				EmitByte( alphaIndices1[2] );
   2754 				EmitByte( alphaIndices1[3] );
   2755 				EmitByte( alphaIndices1[4] );
   2756 				EmitByte( alphaIndices1[5] );
   2757 
   2758 			} else {
   2759 
   2760 				EmitByte( col2[3] );
   2761 				EmitByte( col1[3] );
   2762 				EmitByte( alphaIndices2[0] );
   2763 				EmitByte( alphaIndices2[1] );
   2764 				EmitByte( alphaIndices2[2] );
   2765 				EmitByte( alphaIndices2[3] );
   2766 				EmitByte( alphaIndices2[4] );
   2767 				EmitByte( alphaIndices2[5] );
   2768 			}
   2769 
   2770 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   2771 			NV4XHardwareBugFix( col2, col1 );
   2772 #endif
   2773 
   2774 			// Write out color data. Always take the path with 4 interpolated values.
   2775 			unsigned short scol1 = ColorTo565( col1 );
   2776 			unsigned short scol2 = ColorTo565( col2 );
   2777 
   2778 			EmitUShort( scol1 );
   2779 			EmitUShort( scol2 );
   2780 
   2781 			FindColorIndices( block, scol1, scol2, colorIndices );
   2782 			EmitUInt( colorIndices );
   2783 
   2784 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2785 		}
   2786 		outData += dstPadding;
   2787 		inBuf += srcPadding;
   2788 	}
   2789 
   2790 	//idLib::Printf( "\r100%%\n" );
   2791 }
   2792 
   2793 /*
   2794 ========================
   2795 idDxtEncoder::CompressNormalMapDXT5RenormalizeHQ
   2796 
   2797 params:	inBuf		- image to compress
   2798 paramO:	outBuf		- result of compression
   2799 params:	width		- width of image
   2800 params:	height		- height of image
   2801 ========================
   2802 */
   2803 void idDxtEncoder::CompressNormalMapDXT5RenormalizeHQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2804 	ALIGN16( byte block[64] );
   2805 	unsigned int colorIndices;
   2806 	byte alphaIndices[6];
   2807 	byte col1[4];
   2808 	byte col2[4];
   2809 
   2810 	this->width = width;
   2811 	this->height = height;
   2812 	this->outData = outBuf;
   2813 
   2814 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2815 		return;
   2816 	}
   2817 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2818 		return;
   2819 	}
   2820 
   2821 	if ( width < 4 || height < 4 ) {
   2822 		WriteTinyColorDXT5( inBuf, width, height );
   2823 		return;
   2824 	}
   2825 
   2826 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2827 		for ( int i = 0; i < width; i += 4 ) {
   2828 
   2829 			ExtractBlock( inBuf + i * 4, width, block );
   2830 
   2831 			// swizzle components
   2832 			for ( int k = 0; k < 16; k++ ) {
   2833 #if 0 // object-space
   2834 				block[k*4+3] = block[k*4+2];
   2835 				block[k*4+2] = 0;
   2836 #else
   2837 				block[k*4+3] = block[k*4+0];
   2838 				block[k*4+0] = 0;
   2839 #endif
   2840 			}
   2841 
   2842 			GetMinMaxNormalsDXT5HQFast( block, col1, col2, colorIndices, alphaIndices );
   2843 
   2844 			EmitByte( col2[3] );
   2845 			EmitByte( col1[3] );
   2846 			EmitByte( alphaIndices[0] );
   2847 			EmitByte( alphaIndices[1] );
   2848 			EmitByte( alphaIndices[2] );
   2849 			EmitByte( alphaIndices[3] );
   2850 			EmitByte( alphaIndices[4] );
   2851 			EmitByte( alphaIndices[5] );
   2852 
   2853 			unsigned short scol1 = ColorTo565( col1 );
   2854 			unsigned short scol2 = ColorTo565( col2 );
   2855 
   2856 			EmitUShort( scol2 );
   2857 			EmitUShort( scol1 );
   2858 			EmitUInt( colorIndices );
   2859 
   2860 			////idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2861 		}
   2862 		outData += dstPadding;
   2863 		inBuf += srcPadding;
   2864 	}
   2865 
   2866 	////idLib::Printf( "\r100%%\n" );
   2867 }
   2868 
   2869 /*
   2870 ========================
   2871 idDxtEncoder::CompressNormalMapDXN2HQ
   2872 
   2873 params:	inBuf		- image to compress
   2874 paramO:	outBuf		- result of compression
   2875 params:	width		- width of image
   2876 params:	height		- height of image
   2877 ========================
   2878 */
   2879 void idDxtEncoder::CompressNormalMapDXN2HQ( const byte *inBuf, byte *outBuf, int width, int height ) {
   2880 	ALIGN16( byte block[64] );
   2881 	byte alphaIndices1[6];
   2882 	byte alphaIndices2[6];
   2883 	byte col1[4];
   2884 	byte col2[4];
   2885 	int error1;
   2886 	int error2;
   2887 
   2888 	this->width = width;
   2889 	this->height = height;
   2890 	this->outData = outBuf;
   2891 
   2892 	if ( width > 4 && ( width & 3 ) != 0 ) {
   2893 		return;
   2894 	}
   2895 	if ( height > 4 && ( height & 3 ) != 0 ) {
   2896 		return;
   2897 	}
   2898 
   2899 	if ( width < 4 || height < 4 ) {
   2900 		WriteTinyColorDXT5( inBuf, width, height );
   2901 		return;
   2902 	}
   2903 
   2904 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   2905 		for ( int i = 0; i < width; i += 4 ) {
   2906 
   2907 			ExtractBlock( inBuf + i * 4, width, block );
   2908 
   2909 			for ( int k = 0; k < 2; k++ ) {
   2910 				GetMinMaxAlphaHQ( block, k, col1, col2 );
   2911 
   2912 				// Write out alpha data. Try and find minimum error for the two encoding methods.
   2913 				error1 = FindAlphaIndices( block, k, col1[k], col2[k], alphaIndices1 );
   2914 				error2 = FindAlphaIndices( block, k, col2[k], col1[k], alphaIndices2 );
   2915 
   2916 				if ( error1 < error2 ) {
   2917 
   2918 					EmitByte( col1[k] );
   2919 					EmitByte( col2[k] );
   2920 					EmitByte( alphaIndices1[0] );
   2921 					EmitByte( alphaIndices1[1] );
   2922 					EmitByte( alphaIndices1[2] );
   2923 					EmitByte( alphaIndices1[3] );
   2924 					EmitByte( alphaIndices1[4] );
   2925 					EmitByte( alphaIndices1[5] );
   2926 
   2927 				} else {
   2928 
   2929 					EmitByte( col2[k] );
   2930 					EmitByte( col1[k] );
   2931 					EmitByte( alphaIndices2[0] );
   2932 					EmitByte( alphaIndices2[1] );
   2933 					EmitByte( alphaIndices2[2] );
   2934 					EmitByte( alphaIndices2[3] );
   2935 					EmitByte( alphaIndices2[4] );
   2936 					EmitByte( alphaIndices2[5] );
   2937 				}
   2938 			}
   2939 
   2940 			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
   2941 		}
   2942 		outData += dstPadding;
   2943 		inBuf += srcPadding;
   2944 	}
   2945 
   2946 	//idLib::Printf( "\r100%%\n" );
   2947 }
   2948 
   2949 /*
   2950 ========================
   2951 idDxtEncoder::GetMinMaxBBox
   2952 
   2953 Takes the extents of the bounding box of the colors in the 4x4 block in RGB space.
   2954 Also finds the minimum and maximum alpha values.
   2955 
   2956 params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
   2957 paramO:	minColor	- 4 byte Min color found
   2958 paramO:	maxColor	- 4 byte Max color found
   2959 ========================
   2960 */
   2961 ID_INLINE void idDxtEncoder::GetMinMaxBBox( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
   2962 
   2963 	minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
   2964 	maxColor[0] = maxColor[1] = maxColor[2] = maxColor[3] = 0;
   2965 
   2966 	for ( int i = 0; i < 16; i++ ) {
   2967 		if ( colorBlock[i*4+0] < minColor[0] ) {
   2968 			minColor[0] = colorBlock[i*4+0];
   2969 		}
   2970 		if ( colorBlock[i*4+1] < minColor[1] ) {
   2971 			minColor[1] = colorBlock[i*4+1];
   2972 		}
   2973 		if ( colorBlock[i*4+2] < minColor[2] ) {
   2974 			minColor[2] = colorBlock[i*4+2];
   2975 		}
   2976 		if ( colorBlock[i*4+3] < minColor[3] ) {
   2977 			minColor[3] = colorBlock[i*4+3];
   2978 		}
   2979 		if ( colorBlock[i*4+0] > maxColor[0] ) {
   2980 			maxColor[0] = colorBlock[i*4+0];
   2981 		}
   2982 		if ( colorBlock[i*4+1] > maxColor[1] ) {
   2983 			maxColor[1] = colorBlock[i*4+1];
   2984 		}
   2985 		if ( colorBlock[i*4+2] > maxColor[2] ) {
   2986 			maxColor[2] = colorBlock[i*4+2];
   2987 		}
   2988 		if ( colorBlock[i*4+3] > maxColor[3] ) {
   2989 			maxColor[3] = colorBlock[i*4+3];
   2990 		}
   2991 	}
   2992 }
   2993 
   2994 /*
   2995 ========================
   2996 idDxtEncoder::InsetColorsBBox
   2997 ========================
   2998 */
   2999 ID_INLINE void idDxtEncoder::InsetColorsBBox( byte *minColor, byte *maxColor ) const {
   3000 	byte inset[4];
   3001 
   3002 	inset[0] = ( maxColor[0] - minColor[0] ) >> INSET_COLOR_SHIFT;
   3003 	inset[1] = ( maxColor[1] - minColor[1] ) >> INSET_COLOR_SHIFT;
   3004 	inset[2] = ( maxColor[2] - minColor[2] ) >> INSET_COLOR_SHIFT;
   3005 	inset[3] = ( maxColor[3] - minColor[3] ) >> INSET_ALPHA_SHIFT;
   3006 
   3007 	minColor[0] = ( minColor[0] + inset[0] <= 255 ) ? minColor[0] + inset[0] : 255;
   3008 	minColor[1] = ( minColor[1] + inset[1] <= 255 ) ? minColor[1] + inset[1] : 255;
   3009 	minColor[2] = ( minColor[2] + inset[2] <= 255 ) ? minColor[2] + inset[2] : 255;
   3010 	minColor[3] = ( minColor[3] + inset[3] <= 255 ) ? minColor[3] + inset[3] : 255;
   3011 
   3012 	maxColor[0] = ( maxColor[0] >= inset[0] ) ? maxColor[0] - inset[0] : 0;
   3013 	maxColor[1] = ( maxColor[1] >= inset[1] ) ? maxColor[1] - inset[1] : 0;
   3014 	maxColor[2] = ( maxColor[2] >= inset[2] ) ? maxColor[2] - inset[2] : 0;
   3015 	maxColor[3] = ( maxColor[3] >= inset[3] ) ? maxColor[3] - inset[3] : 0;
   3016 }
   3017 
   3018 /*
   3019 ========================
   3020 idDxtEncoder::SelectColorsDiagonal
   3021 ========================
   3022 */
   3023 void idDxtEncoder::SelectColorsDiagonal( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
   3024 
   3025 	byte mid0 = byte( ( (int) minColor[0] + maxColor[0] + 1 ) >> 1 );
   3026 	byte mid1 = byte( ( (int) minColor[1] + maxColor[1] + 1 ) >> 1 );
   3027 	byte mid2 = byte( ( (int) minColor[2] + maxColor[2] + 1 ) >> 1 );
   3028 
   3029 #if 0
   3030 
   3031 	// using the covariance is the best way to select the diagonal
   3032 	int side0 = 0;
   3033 	int side1 = 0;
   3034 	for ( int i = 0; i < 16; i++ ) {
   3035 		int b0 = colorBlock[i*4+0] - mid0;
   3036 		int b1 = colorBlock[i*4+1] - mid1;
   3037 		int b2 = colorBlock[i*4+2] - mid2;
   3038 		side0 += ( b0 * b1 );
   3039 		side1 += ( b1 * b2 );
   3040 	}
   3041 	byte mask0 = -( side0 < 0 );
   3042 	byte mask1 = -( side1 < 0 );
   3043 
   3044 #else
   3045 
   3046 	// calculating the covariance of just the sign bits is much faster and gives almost the same result
   3047 	int side0 = 0;
   3048 	int side1 = 0;
   3049 	for ( int i = 0; i < 16; i++ ) {
   3050 		byte b0 = colorBlock[i*4+0] >= mid0;
   3051 		byte b1 = colorBlock[i*4+1] >= mid1;
   3052 		byte b2 = colorBlock[i*4+2] >= mid2;
   3053 		side0 += ( b0 ^ b1 );
   3054 		side1 += ( b1 ^ b2 );
   3055 	}
   3056 	byte mask0 = -( side0 > 8 );
   3057 	byte mask1 = -( side1 > 8 );
   3058 
   3059 #endif
   3060 
   3061 	byte c0 = minColor[0];
   3062 	byte c1 = maxColor[0];
   3063 	byte c2 = minColor[2];
   3064 	byte c3 = maxColor[2];
   3065 
   3066 	c0 ^= c1;
   3067 	mask0 &= c0;
   3068 	c1 ^= mask0;
   3069 	c0 ^= c1;
   3070 
   3071 	c2 ^= c3;
   3072 	mask1 &= c2;
   3073 	c3 ^= mask1;
   3074 	c2 ^= c3;
   3075 
   3076 	minColor[0] = c0;
   3077 	maxColor[0] = c1;
   3078 	minColor[2] = c2;
   3079 	maxColor[2] = c3;
   3080 
   3081 	if ( ColorTo565( minColor ) > ColorTo565( maxColor ) ) {
   3082 		SwapValues( minColor[0], maxColor[0] );
   3083 		SwapValues( minColor[1], maxColor[1] );
   3084 		SwapValues( minColor[2], maxColor[2] );
   3085 	}
   3086 }
   3087 
   3088 /*
   3089 ========================
   3090 idDxtEncoder::EmitColorIndices
   3091 
   3092 params:	colorBlock	- 16 pixel block for which find color indexes
   3093 paramO:	minColor	- Min color found
   3094 paramO:	maxColor	- Max color found
   3095 return: 4 byte color index block
   3096 ========================
   3097 */
   3098 void idDxtEncoder::EmitColorIndices( const byte *colorBlock, const byte *minColor, const byte *maxColor ) {
   3099 #if 1
   3100 
   3101 	ALIGN16( uint16 colors[4][4] );
   3102 	unsigned int result = 0;
   3103 
   3104 	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
   3105 	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
   3106 	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 5 );
   3107 	colors[0][3] = 0;
   3108 	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
   3109 	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
   3110 	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 5 );
   3111 	colors[1][3] = 0;
   3112 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   3113 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   3114 	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   3115 	colors[2][3] = 0;
   3116 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   3117 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   3118 	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   3119 	colors[3][3] = 0;
   3120 
   3121 	// uses sum of absolute differences instead of squared distance to find the best match
   3122 	for ( int i = 15; i >= 0; i-- ) {
   3123 		int c0, c1, c2, c3, m, d0, d1, d2, d3;
   3124 
   3125 		c0 = colorBlock[i*4+0];
   3126 		c1 = colorBlock[i*4+1];
   3127 		c2 = colorBlock[i*4+2];
   3128 		c3 = colorBlock[i*4+3];
   3129 
   3130 		m = colors[0][0] - c0;
   3131 		d0 = abs( m );
   3132 		m = colors[1][0] - c0;
   3133 		d1 = abs( m );
   3134 		m = colors[2][0] - c0;
   3135 		d2 = abs( m );
   3136 		m = colors[3][0] - c0;
   3137 		d3 = abs( m );
   3138 
   3139 		m = colors[0][1] - c1;
   3140 		d0 += abs( m );
   3141 		m = colors[1][1] - c1;
   3142 		d1 += abs( m );
   3143 		m = colors[2][1] - c1;
   3144 		d2 += abs( m );
   3145 		m = colors[3][1] - c1;
   3146 		d3 += abs( m );
   3147 
   3148 		m = colors[0][2] - c2;
   3149 		d0 += abs( m );
   3150 		m = colors[1][2] - c2;
   3151 		d1 += abs( m );
   3152 		m = colors[2][2] - c2;
   3153 		d2 += abs( m );
   3154 		m = colors[3][2] - c2;
   3155 		d3 += abs( m );
   3156 
   3157 #if 0
   3158 		int b0 = d0 > d2;
   3159 		int b1 = d1 > d3;
   3160 		int b2 = d0 > d3;
   3161 		int b3 = d1 > d2;
   3162 		int b4 = d0 > d1;
   3163 		int b5 = d2 > d3;
   3164 
   3165 		result |= ( ( !b3 & b4 ) | ( b2 & b5 ) | ( ( ( b0 & b3 ) | ( b1 & b2 ) ) << 1 ) ) << ( i << 1 );
   3166 #else
   3167 		bool b0 = d0 > d3;
   3168 		bool b1 = d1 > d2;
   3169 		bool b2 = d0 > d2;
   3170 		bool b3 = d1 > d3;
   3171 		bool b4 = d2 > d3;
   3172 
   3173 		int x0 = b1 & b2;
   3174 		int x1 = b0 & b3;
   3175 		int x2 = b0 & b4;
   3176 
   3177 		result |= ( x2 | ( ( x0 | x1 ) << 1 ) ) << ( i << 1 );
   3178 #endif
   3179 	}
   3180 
   3181 	EmitUInt( result );
   3182 
   3183 #elif 1
   3184 
   3185 	byte colors[4][4];
   3186 	unsigned int indexes[16];
   3187 
   3188 	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 6 );
   3189 	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 5 );
   3190 	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 6 );
   3191 	colors[0][3] = 0;
   3192 	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 6 );
   3193 	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 5 );
   3194 	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 6 );
   3195 	colors[1][3] = 0;
   3196 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   3197 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   3198 	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   3199 	colors[2][3] = 0;
   3200 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   3201 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   3202 	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   3203 	colors[3][3] = 0;
   3204 
   3205 	for ( int i = 0; i < 16; i++ ) {
   3206 		int c0, c1, c2, m, d, minDist;
   3207 
   3208 		c0 = colorBlock[i*4+0];
   3209 		c1 = colorBlock[i*4+1];
   3210 		c2 = colorBlock[i*4+2];
   3211 
   3212 		m = colors[0][0] - c0;
   3213 		d = m * m;
   3214 		m = colors[0][1] - c1;
   3215 		d += m * m;
   3216 		m = colors[0][2] - c2;
   3217 		d += m * m;
   3218 
   3219 		minDist = d;
   3220 		indexes[i] = 0;
   3221 
   3222 		m = colors[1][0] - c0;
   3223 		d = m * m;
   3224 		m = colors[1][1] - c1;
   3225 		d += m * m;
   3226 		m = colors[1][2] - c2;
   3227 		d += m * m;
   3228 
   3229 		if ( d < minDist ) {
   3230 			minDist = d;
   3231 			indexes[i] = 1;
   3232 		}
   3233 
   3234 		m = colors[2][0] - c0;
   3235 		d = m * m;
   3236 		m = colors[2][1] - c1;
   3237 		d += m * m;
   3238 		m = colors[2][2] - c2;
   3239 		d += m * m;
   3240 
   3241 		if ( d < minDist ) {
   3242 			minDist = d;
   3243 			indexes[i] = 2;
   3244 		}
   3245 
   3246 		m = colors[3][0] - c0;
   3247 		d = m * m;
   3248 		m = colors[3][1] - c1;
   3249 		d += m * m;
   3250 		m = colors[3][2] - c2;
   3251 		d += m * m;
   3252 
   3253 		if ( d < minDist ) {
   3254 			minDist = d;
   3255 			indexes[i] = 3;
   3256 		}
   3257 	}
   3258 
   3259 	unsigned int result = 0;
   3260 	for ( int i = 0; i < 16; i++ ) {
   3261 		result |= ( indexes[i] << (unsigned int)( i << 1 ) );
   3262 	}
   3263 
   3264 	EmitUInt( result );
   3265 
   3266 #else
   3267 
   3268 	byte colors[4][4];
   3269 	unsigned int indexes[16];
   3270 
   3271 	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 6 );
   3272 	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 5 );
   3273 	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 6 );
   3274 	colors[0][3] = 0;
   3275 	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 6 );
   3276 	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 5 );
   3277 	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 6 );
   3278 	colors[1][3] = 0;
   3279 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   3280 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   3281 	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
   3282 	colors[2][3] = 0;
   3283 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   3284 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   3285 	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
   3286 	colors[3][3] = 0;
   3287 
   3288 	for ( int i = 0; i < 16; i++ ) {
   3289 		unsigned int minDist = (255*255)*4;
   3290 		for ( int j = 0; j < 4; j++ ) {
   3291 			unsigned int dist = ColorDistance( &colorBlock[i*4], &colors[j][0] );
   3292 			if ( dist < minDist ) {
   3293 				minDist = dist;
   3294 				indexes[i] = j;
   3295 			}
   3296 		}
   3297 	}
   3298 
   3299 	unsigned int result = 0;
   3300 	for ( int i = 0; i < 16; i++ ) {
   3301 		result |= ( indexes[i] << (unsigned int)( i << 1 ) );
   3302 	}
   3303 
   3304 	EmitUInt( result );
   3305 
   3306 #endif
   3307 }
   3308 
   3309 /*
   3310 ========================
   3311 idDxtEncoder::EmitColorAlphaIndices
   3312 
   3313 params:	colorBlock	- 16 pixel block for which find color indexes
   3314 paramO:	minColor	- Min color found
   3315 paramO:	maxColor	- Max color found
   3316 return: 4 byte color index block
   3317 ========================
   3318 */
   3319 void idDxtEncoder::EmitColorAlphaIndices( const byte *colorBlock, const byte *minColor, const byte *maxColor ) {
   3320 	ALIGN16( uint16 colors[4][4] );
   3321 	unsigned int result = 0;
   3322 
   3323 	colors[0][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
   3324 	colors[0][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
   3325 	colors[0][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 5 );
   3326 	colors[0][3] = 255;
   3327 	colors[1][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
   3328 	colors[1][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
   3329 	colors[1][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 5 );
   3330 	colors[1][3] = 255;
   3331 	colors[2][0] = ( colors[0][0] + colors[1][0] ) / 2;
   3332 	colors[2][1] = ( colors[0][1] + colors[1][1] ) / 2;
   3333 	colors[2][2] = ( colors[0][2] + colors[1][2] ) / 2;
   3334 	colors[2][3] = 255;
   3335 	colors[3][0] = 0;
   3336 	colors[3][1] = 0;
   3337 	colors[3][2] = 0;
   3338 	colors[3][3] = 0;
   3339 
   3340 	// uses sum of absolute differences instead of squared distance to find the best match
   3341 	for ( int i = 15; i >= 0; i-- ) {
   3342 		int c0, c1, c2, c3, m, d0, d1, d2;
   3343 
   3344 		c0 = colorBlock[i*4+0];
   3345 		c1 = colorBlock[i*4+1];
   3346 		c2 = colorBlock[i*4+2];
   3347 		c3 = colorBlock[i*4+3];
   3348 
   3349 		m = colors[0][0] - c0;
   3350 		d0 = abs( m );
   3351 		m = colors[1][0] - c0;
   3352 		d1 = abs( m );
   3353 		m = colors[2][0] - c0;
   3354 		d2 = abs( m );
   3355 
   3356 		m = colors[0][1] - c1;
   3357 		d0 += abs( m );
   3358 		m = colors[1][1] - c1;
   3359 		d1 += abs( m );
   3360 		m = colors[2][1] - c1;
   3361 		d2 += abs( m );
   3362 
   3363 		m = colors[0][2] - c2;
   3364 		d0 += abs( m );
   3365 		m = colors[1][2] - c2;
   3366 		d1 += abs( m );
   3367 		m = colors[2][2] - c2;
   3368 		d2 += abs( m );
   3369 
   3370 		unsigned int b0 = d2 > d0;
   3371 		unsigned int b1 = d2 > d1;
   3372 		unsigned int b2 = d1 > d0;
   3373 		unsigned int b3 = c3 < 128;
   3374 
   3375 		result |= ( ( ( b0 & b1 | b3 ) << 1 ) | ( b2 ^ b1 | b3 ) ) << ( i << 1 );
   3376 	}
   3377 
   3378 	EmitUInt( result );
   3379 }
   3380 
   3381 /*
   3382 ========================
   3383 idDxtEncoder::EmitCTX1Indices
   3384 
   3385 params:	colorBlock	- 16 pixel block for which find color indexes
   3386 paramO:	minColor	- Min color found
   3387 paramO:	maxColor	- Max color found
   3388 return: 4 byte color index block
   3389 ========================
   3390 */
   3391 void idDxtEncoder::EmitCTX1Indices( const byte *colorBlock, const byte *minColor, const byte *maxColor ) {
   3392 	ALIGN16( uint16 colors[4][2] );
   3393 	unsigned int result = 0;
   3394 
   3395 	colors[0][0] = maxColor[0];
   3396 	colors[0][1] = maxColor[1];
   3397 	colors[1][0] = minColor[0];
   3398 	colors[1][1] = minColor[1];
   3399 
   3400 	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
   3401 	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
   3402 	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
   3403 	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
   3404 
   3405 	for ( int i = 15; i >= 0; i-- ) {
   3406 		int c0, c1, m, d0, d1, d2, d3;
   3407 
   3408 		c0 = colorBlock[i*4+0];
   3409 		c1 = colorBlock[i*4+1];
   3410 
   3411 		m = colors[0][0] - c0;
   3412 		d0 = abs( m );
   3413 		m = colors[1][0] - c0;
   3414 		d1 = abs( m );
   3415 		m = colors[2][0] - c0;
   3416 		d2 = abs( m );
   3417 		m = colors[3][0] - c0;
   3418 		d3 = abs( m );
   3419 
   3420 		m = colors[0][1] - c1;
   3421 		d0 += abs( m );
   3422 		m = colors[1][1] - c1;
   3423 		d1 += abs( m );
   3424 		m = colors[2][1] - c1;
   3425 		d2 += abs( m );
   3426 		m = colors[3][1] - c1;
   3427 		d3 += abs( m );
   3428 
   3429 		bool b0 = d0 > d3;
   3430 		bool b1 = d1 > d2;
   3431 		bool b2 = d0 > d2;
   3432 		bool b3 = d1 > d3;
   3433 		bool b4 = d2 > d3;
   3434 
   3435 		int x0 = b1 & b2;
   3436 		int x1 = b0 & b3;
   3437 		int x2 = b0 & b4;
   3438 
   3439 		result |= ( x2 | ( ( x0 | x1 ) << 1 ) ) << ( i << 1 );
   3440 	}
   3441 
   3442 	EmitUInt( result );
   3443 }
   3444 
   3445 /*
   3446 ========================
   3447 idDxtEncoder::EmitAlphaIndices
   3448 
   3449 params:	colorBlock	- 16 pixel block for which find alpha indexes
   3450 paramO:	minAlpha	- Min alpha found
   3451 paramO:	maxAlpha	- Max alpha found
   3452 ========================
   3453 */
   3454 void idDxtEncoder::EmitAlphaIndices( const byte *colorBlock, const int offset, const byte minAlpha, const byte maxAlpha ) {
   3455 
   3456 	assert( maxAlpha >= minAlpha );
   3457 
   3458 	const int ALPHA_RANGE = 7;
   3459 
   3460 #if 1
   3461 
   3462 	byte ab1, ab2, ab3, ab4, ab5, ab6, ab7;
   3463 	ALIGN16( byte indexes[16] );
   3464 
   3465 	ab1 = ( 13 * maxAlpha +  1 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3466 	ab2 = ( 11 * maxAlpha +  3 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3467 	ab3 = (  9 * maxAlpha +  5 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3468 	ab4 = (  7 * maxAlpha +  7 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3469 	ab5 = (  5 * maxAlpha +  9 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3470 	ab6 = (  3 * maxAlpha + 11 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3471 	ab7 = (  1 * maxAlpha + 13 * minAlpha + ALPHA_RANGE ) / (ALPHA_RANGE*2);
   3472 
   3473 	colorBlock += offset;
   3474 
   3475 	for ( int i = 0; i < 16; i++ ) {
   3476 		byte a = colorBlock[i*4];
   3477 		int b1 = ( a >= ab1 );
   3478 		int b2 = ( a >= ab2 );
   3479 		int b3 = ( a >= ab3 );
   3480 		int b4 = ( a >= ab4 );
   3481 		int b5 = ( a >= ab5 );
   3482 		int b6 = ( a >= ab6 );
   3483 		int b7 = ( a >= ab7 );
   3484 		int index = ( 8 - b1 - b2 - b3 - b4 - b5 - b6 - b7 ) & 7;
   3485 		indexes[i] = byte( index ^ ( 2 > index ) );
   3486 	}
   3487 
   3488 	EmitByte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   3489 	EmitByte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   3490 	EmitByte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   3491 
   3492 	EmitByte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   3493 	EmitByte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   3494 	EmitByte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   3495 
   3496 #elif 0
   3497 
   3498 	ALIGN16( byte indexes[16] );
   3499 	byte delta = maxAlpha - minAlpha;
   3500 	byte half = delta >> 1;
   3501 	byte bias = delta / ( 2 * ALPHA_RANGE );
   3502 	byte bottom = minAlpha + bias;
   3503 	byte top = maxAlpha - bias;
   3504 
   3505 	colorBlock += offset;
   3506 
   3507 	for ( int i = 0; i < 16; i++ ) {
   3508 		byte a = colorBlock[i*4];
   3509 		if ( a <= bottom ) {
   3510 			indexes[i] = 1;
   3511 		} else if ( a >= top ) {
   3512 			indexes[i] = 0;
   3513 		} else {
   3514 			indexes[i] = (ALPHA_RANGE+1) + ( ( minAlpha - a ) * ALPHA_RANGE - half ) / delta;
   3515 		}
   3516 	}
   3517 
   3518 	EmitByte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   3519 	EmitByte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   3520 	EmitByte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   3521 
   3522 	EmitByte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   3523 	EmitByte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   3524 	EmitByte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   3525 
   3526 #elif 0
   3527 
   3528 	ALIGN16( byte indexes[16] );
   3529 	byte delta = maxAlpha - minAlpha;
   3530 	byte half = delta >> 1;
   3531 	byte bias = delta / ( 2 * ALPHA_RANGE );
   3532 	byte bottom = minAlpha + bias;
   3533 	byte top = maxAlpha - bias;
   3534 
   3535 	colorBlock += offset;
   3536 
   3537 	for ( int i = 0; i < 16; i++ ) {
   3538 		byte a = colorBlock[i*4];
   3539 		int index = (ALPHA_RANGE+1) + ( ( minAlpha - a ) * ALPHA_RANGE - half ) / delta;
   3540 		int c0 = a > bottom;
   3541 		int c1 = a < top;
   3542 		indexes[i] = ( index & -( c0 & c1 ) ) | ( c0 ^ 1 );
   3543 	}
   3544 
   3545 	EmitByte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   3546 	EmitByte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   3547 	EmitByte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   3548 
   3549 	EmitByte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   3550 	EmitByte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   3551 	EmitByte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   3552 
   3553 #else
   3554 
   3555 	ALIGN16( byte indexes[16] );
   3556 	ALIGN16( byte alphas[8] );
   3557 
   3558 	alphas[0] = maxAlpha;
   3559 	alphas[1] = minAlpha;
   3560 	alphas[2] = ( 6 * maxAlpha + 1 * minAlpha ) / ALPHA_RANGE;
   3561 	alphas[3] = ( 5 * maxAlpha + 2 * minAlpha ) / ALPHA_RANGE;
   3562 	alphas[4] = ( 4 * maxAlpha + 3 * minAlpha ) / ALPHA_RANGE;
   3563 	alphas[5] = ( 3 * maxAlpha + 4 * minAlpha ) / ALPHA_RANGE;
   3564 	alphas[6] = ( 2 * maxAlpha + 5 * minAlpha ) / ALPHA_RANGE;
   3565 	alphas[7] = ( 1 * maxAlpha + 6 * minAlpha ) / ALPHA_RANGE;
   3566 
   3567 	colorBlock += offset;
   3568 
   3569 	for ( int i = 0; i < 16; i++ ) {
   3570 		int minDist = INT_MAX;
   3571 		byte a = colorBlock[i*4];
   3572 		for ( int j = 0; j < 8; j++ ) {
   3573 			int dist = abs( a - alphas[j] );
   3574 			if ( dist < minDist ) {
   3575 				minDist = dist;
   3576 				indexes[i] = j;
   3577 			}
   3578 		}
   3579 	}
   3580 
   3581 	EmitByte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   3582 	EmitByte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   3583 	EmitByte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   3584 
   3585 	EmitByte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   3586 	EmitByte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   3587 	EmitByte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   3588 
   3589 #endif
   3590 }
   3591 
   3592 /*
   3593 ========================
   3594 idDxtEncoder::CompressImageDXT1Fast_Generic
   3595 
   3596 params:	inBuf		- image to compress
   3597 paramO:	outBuf		- result of compression
   3598 params:	width		- width of image
   3599 params:	height		- height of image
   3600 ========================
   3601 */
   3602 void idDxtEncoder::CompressImageDXT1Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   3603 	ALIGN16( byte block[64] );
   3604 	ALIGN16( byte minColor[4] );
   3605 	ALIGN16( byte maxColor[4] );
   3606 
   3607 	assert( width >= 4 && ( width & 3 ) == 0 );
   3608 	assert( height >= 4 && ( height & 3 ) == 0 );
   3609 
   3610 	this->width = width;
   3611 	this->height = height;
   3612 	this->outData = outBuf;
   3613 
   3614 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   3615 		for ( int i = 0; i < width; i += 4 ) {
   3616 
   3617 			ExtractBlock( inBuf + i * 4, width, block );
   3618 
   3619 			GetMinMaxBBox( block, minColor, maxColor );
   3620 			//SelectColorsDiagonal( block, minColor, maxColor );
   3621 			InsetColorsBBox( minColor, maxColor );
   3622 
   3623 			EmitUShort( ColorTo565( maxColor ) );
   3624 			EmitUShort( ColorTo565( minColor ) );
   3625 
   3626 			EmitColorIndices( block, minColor, maxColor );
   3627 		}
   3628 		outData += dstPadding;
   3629 		inBuf += srcPadding;
   3630 	}
   3631 }
   3632 
   3633 /*
   3634 ========================
   3635 idDxtEncoder::CompressImageDXT1AlphaFast_Generic
   3636 
   3637 params:	inBuf		- image to compress
   3638 paramO:	outBuf		- result of compression
   3639 params:	width		- width of image
   3640 params:	height		- height of image
   3641 ========================
   3642 */
   3643 void idDxtEncoder::CompressImageDXT1AlphaFast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   3644 	ALIGN16( byte block[64] );
   3645 	ALIGN16( byte minColor[4] );
   3646 	ALIGN16( byte maxColor[4] );
   3647 
   3648 	assert( width >= 4 && ( width & 3 ) == 0 );
   3649 	assert( height >= 4 && ( height & 3 ) == 0 );
   3650 
   3651 	this->width = width;
   3652 	this->height = height;
   3653 	this->outData = outBuf;
   3654 
   3655 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   3656 		for ( int i = 0; i < width; i += 4 ) {
   3657 
   3658 			ExtractBlock( inBuf + i * 4, width, block );
   3659 
   3660 			GetMinMaxBBox( block, minColor, maxColor );
   3661 			byte minAlpha = minColor[3];
   3662 			//SelectColorsDiagonal( block, minColor, maxColor );
   3663 			InsetColorsBBox( minColor, maxColor );
   3664 
   3665 			if ( minAlpha >= 128 ) {
   3666 				EmitUShort( ColorTo565( maxColor ) );
   3667 				EmitUShort( ColorTo565( minColor ) );
   3668 				EmitColorIndices( block, minColor, maxColor );
   3669 			} else {
   3670 				EmitUShort( ColorTo565( minColor ) );
   3671 				EmitUShort( ColorTo565( maxColor ) );
   3672 				EmitColorAlphaIndices( block, minColor, maxColor );
   3673 			}
   3674 		}
   3675 		outData += dstPadding;
   3676 		inBuf += srcPadding;
   3677 	}
   3678 }
   3679 
   3680 /*
   3681 ========================
   3682 idDxtEncoder::CompressImageDXT5Fast_Generic
   3683 
   3684 params:	inBuf		- image to compress
   3685 paramO:	outBuf		- result of compression
   3686 params:	width		- width of image
   3687 params:	height		- height of image
   3688 ========================
   3689 */
   3690 void idDxtEncoder::CompressImageDXT5Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   3691 	ALIGN16( byte block[64] );
   3692 	ALIGN16( byte minColor[4] );
   3693 	ALIGN16( byte maxColor[4] );
   3694 
   3695 	assert( width >= 4 && ( width & 3 ) == 0 );
   3696 	assert( height >= 4 && ( height & 3 ) == 0 );
   3697 
   3698 	this->width = width;
   3699 	this->height = height;
   3700 	this->outData = outBuf;
   3701 
   3702 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   3703 		for ( int i = 0; i < width; i += 4 ) {
   3704 
   3705 			ExtractBlock( inBuf + i * 4, width, block );
   3706 
   3707 			GetMinMaxBBox( block, minColor, maxColor );
   3708 			//SelectColorsDiagonal( block, minColor, maxColor );
   3709 			InsetColorsBBox( minColor, maxColor );
   3710 
   3711 			EmitByte( maxColor[3] );
   3712 			EmitByte( minColor[3] );
   3713 
   3714 			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
   3715 
   3716 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   3717 			// the colors are already always guaranteed to be sorted properly
   3718 #endif
   3719 
   3720 			EmitUShort( ColorTo565( maxColor ) );
   3721 			EmitUShort( ColorTo565( minColor ) );
   3722 
   3723 			EmitColorIndices( block, minColor, maxColor );
   3724 		}
   3725 		outData += dstPadding;
   3726 		inBuf += srcPadding;
   3727 	}
   3728 }
   3729 
   3730 /*
   3731 ========================
   3732 idDxtEncoder::ScaleYCoCg
   3733 ========================
   3734 */
   3735 void idDxtEncoder::ScaleYCoCg( byte *colorBlock, byte *minColor, byte *maxColor ) const {
   3736 	int m0 = abs( minColor[0] - 128 );
   3737 	int m1 = abs( minColor[1] - 128 );
   3738 	int m2 = abs( maxColor[0] - 128 );
   3739 	int m3 = abs( maxColor[1] - 128 );
   3740 
   3741 	if ( m1 > m0 ) m0 = m1;
   3742 	if ( m3 > m2 ) m2 = m3;
   3743 	if ( m2 > m0 ) m0 = m2;
   3744 
   3745 	const int s0 = 128 / 2 - 1;
   3746 	const int s1 = 128 / 4 - 1;
   3747 
   3748 	int mask0 = -( m0 <= s0 );
   3749 	int mask1 = -( m0 <= s1 );
   3750 	int scale = 1 + ( 1 & mask0 ) + ( 2 & mask1 );
   3751 
   3752 	minColor[0] = byte( ( minColor[0] - 128 ) * scale + 128 );
   3753 	minColor[1] = byte( ( minColor[1] - 128 ) * scale + 128 );
   3754 	minColor[2] = byte( ( scale - 1 ) << 3 );
   3755 	maxColor[0] = byte( ( maxColor[0] - 128 ) * scale + 128 );
   3756 	maxColor[1] = byte( ( maxColor[1] - 128 ) * scale + 128 );
   3757 	maxColor[2] = byte( ( scale - 1 ) << 3 );
   3758 
   3759 	for ( int i = 0; i < 16; i++ ) {
   3760 		colorBlock[i*4+0] = byte( ( colorBlock[i*4+0] - 128 ) * scale + 128 );
   3761 		colorBlock[i*4+1] = byte( ( colorBlock[i*4+1] - 128 ) * scale + 128 );
   3762 	}
   3763 }
   3764 
   3765 /*
   3766 ========================
   3767 idDxtEncoder::InsetYCoCgBBox
   3768 ========================
   3769 */
   3770 ID_INLINE void idDxtEncoder::InsetYCoCgBBox( byte *minColor, byte *maxColor ) const {
   3771 
   3772 #if 0
   3773 
   3774 	byte inset[4];
   3775 
   3776 	inset[0] = ( maxColor[0] - minColor[0] ) >> INSET_COLOR_SHIFT;
   3777 	inset[1] = ( maxColor[1] - minColor[1] ) >> INSET_COLOR_SHIFT;
   3778 	inset[3] = ( maxColor[3] - minColor[3] ) >> INSET_ALPHA_SHIFT;
   3779 
   3780 	minColor[0] = ( minColor[0] + inset[0] <= 255 ) ? minColor[0] + inset[0] : 255;
   3781 	minColor[1] = ( minColor[1] + inset[1] <= 255 ) ? minColor[1] + inset[1] : 255;
   3782 	minColor[3] = ( minColor[3] + inset[3] <= 255 ) ? minColor[3] + inset[3] : 255;
   3783 
   3784 	maxColor[0] = ( maxColor[0] >= inset[0] ) ? maxColor[0] - inset[0] : 0;
   3785 	maxColor[1] = ( maxColor[1] >= inset[1] ) ? maxColor[1] - inset[1] : 0;
   3786 	maxColor[3] = ( maxColor[3] >= inset[3] ) ? maxColor[3] - inset[3] : 0;
   3787 
   3788 	minColor[0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
   3789 	minColor[1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
   3790 
   3791 	maxColor[0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
   3792 	maxColor[1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
   3793 
   3794 #elif 0
   3795 
   3796 	float inset[4];
   3797 	float minf[4];
   3798 	float maxf[4];
   3799 
   3800 	for ( int i = 0; i < 4; i++ ) {
   3801 		minf[i] = minColor[i] / 255.0f;
   3802 		maxf[i] = maxColor[i] / 255.0f;
   3803 	}
   3804 
   3805 	inset[0] = ( maxf[0] - minf[0] ) / 16.0f;
   3806 	inset[1] = ( maxf[1] - minf[1] ) / 16.0f;
   3807 	inset[2] = ( maxf[2] - minf[2] ) / 16.0f;
   3808 	inset[3] = ( maxf[3] - minf[3] ) / 32.0f;
   3809 
   3810 	for ( int i = 0; i < 4; i++ ) {
   3811 		minf[i] = ( minf[i] + inset[i] <= 1.0f ) ? minf[i] + inset[i] : 1.0f;
   3812 		maxf[i] = ( maxf[i] >= inset[i] ) ? maxf[i] - inset[i] : 0;
   3813 	}
   3814 
   3815 	minColor[0] = ((int)floor( minf[0] * 31 )) & ( ( 1 << 5 ) - 1 );
   3816 	minColor[1] = ((int)floor( minf[1] * 63 )) & ( ( 1 << 6 ) - 1 );
   3817 
   3818 	maxColor[0] = ((int)ceil( maxf[0] * 31 )) & ( ( 1 << 5 ) - 1 );
   3819 	maxColor[1] = ((int)ceil( maxf[1] * 63 )) & ( ( 1 << 6 ) - 1 );
   3820 
   3821 	minColor[0] = ( minColor[0] << 3 ) | ( minColor[0] >> 2 );
   3822 	minColor[1] = ( minColor[1] << 2 ) | ( minColor[1] >> 4 );
   3823 
   3824 	maxColor[0] = ( maxColor[0] << 3 ) | ( maxColor[0] >> 2 );
   3825 	maxColor[1] = ( maxColor[1] << 2 ) | ( maxColor[1] >> 4 );
   3826 
   3827 	minColor[3] = (int)floor( minf[3] * 255.0f );
   3828 	maxColor[3] = (int)ceil( maxf[3] * 255.0f );
   3829 
   3830 #elif 0
   3831 
   3832 	int inset[4];
   3833 	int mini[4];
   3834 	int maxi[4];
   3835 
   3836 	inset[0] = ( maxColor[0] - minColor[0] );
   3837 	inset[1] = ( maxColor[1] - minColor[1] );
   3838 	inset[3] = ( maxColor[3] - minColor[3] );
   3839 
   3840 	mini[0] = ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0];
   3841 	mini[1] = ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1];
   3842 	mini[3] = ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3];
   3843 
   3844 	maxi[0] = ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0];
   3845 	maxi[1] = ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1];
   3846 	maxi[3] = ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3];
   3847 
   3848 	mini[0] = ( mini[0] - ((1<<(3))-1) ) >> (INSET_COLOR_SHIFT+3);
   3849 	mini[1] = ( mini[1] - ((1<<(3))-1) ) >> (INSET_COLOR_SHIFT+2);
   3850 	mini[3] = ( mini[3] - ((1<<(2))-1) ) >> (INSET_ALPHA_SHIFT+0);
   3851 
   3852 	maxi[0] = ( maxi[0] + ((1<<(3))-1) ) >> (INSET_COLOR_SHIFT+3);
   3853 	maxi[1] = ( maxi[1] + ((1<<(3))-1) ) >> (INSET_COLOR_SHIFT+2);
   3854 	maxi[3] = ( maxi[3] + ((1<<(2))-1) ) >> (INSET_ALPHA_SHIFT+0);
   3855 
   3856 	if ( mini[0] < 0 ) mini[0] = 0;
   3857 	if ( mini[1] < 0 ) mini[1] = 0;
   3858 	if ( mini[3] < 0 ) mini[3] = 0;
   3859 
   3860 	if ( maxi[0] > 31 ) maxi[0] = 31;
   3861 	if ( maxi[1] > 63 ) maxi[1] = 63;
   3862 	if ( maxi[3] > 255 ) maxi[3] = 255;
   3863 
   3864 	minColor[0] = ( mini[0] << 3 ) | ( mini[0] >> 2 );
   3865 	minColor[1] = ( mini[1] << 2 ) | ( mini[1] >> 4 );
   3866 	minColor[3] = mini[3];
   3867 
   3868 	maxColor[0] = ( maxi[0] << 3 ) | ( maxi[0] >> 2 );
   3869 	maxColor[1] = ( maxi[1] << 2 ) | ( maxi[1] >> 4 );
   3870 	maxColor[3] = maxi[3];
   3871 
   3872 #elif 1
   3873 
   3874 	int inset[4];
   3875 	int mini[4];
   3876 	int maxi[4];
   3877 
   3878 	inset[0] = ( maxColor[0] - minColor[0] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   3879 	inset[1] = ( maxColor[1] - minColor[1] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   3880 	inset[3] = ( maxColor[3] - minColor[3] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
   3881 
   3882 	mini[0] = ( ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0] ) >> INSET_COLOR_SHIFT;
   3883 	mini[1] = ( ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
   3884 	mini[3] = ( ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
   3885 
   3886 	maxi[0] = ( ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0] ) >> INSET_COLOR_SHIFT;
   3887 	maxi[1] = ( ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
   3888 	maxi[3] = ( ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
   3889 
   3890 	mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
   3891 	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
   3892 	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
   3893 
   3894 	maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
   3895 	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
   3896 	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
   3897 
   3898 	minColor[0] = byte( ( mini[0] & C565_5_MASK ) | ( mini[0] >> 5 ) );
   3899 	minColor[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
   3900 	minColor[3] = byte( mini[3] );
   3901 
   3902 	maxColor[0] = byte( ( maxi[0] & C565_5_MASK ) | ( maxi[0] >> 5 ) );
   3903 	maxColor[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
   3904 	maxColor[3] = byte( maxi[3] );
   3905 
   3906 #endif
   3907 }
   3908 
   3909 /*
   3910 ========================
   3911 idDxtEncoder::InsetYCoCgAlpaBBox
   3912 ========================
   3913 */
   3914 ID_INLINE void idDxtEncoder::InsetYCoCgAlpaBBox( byte *minColor, byte *maxColor ) const {
   3915 	int inset[4];
   3916 	int mini[4];
   3917 	int maxi[4];
   3918 
   3919 	inset[0] = ( maxColor[0] - minColor[0] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   3920 	inset[1] = ( maxColor[1] - minColor[1] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   3921 	inset[2] = ( maxColor[2] - minColor[2] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   3922 	inset[3] = ( maxColor[3] - minColor[3] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
   3923 
   3924 	mini[0] = ( ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0] ) >> INSET_COLOR_SHIFT;
   3925 	mini[1] = ( ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
   3926 	mini[2] = ( ( minColor[2] << INSET_COLOR_SHIFT ) + inset[2] ) >> INSET_COLOR_SHIFT;
   3927 	mini[3] = ( ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
   3928 
   3929 	maxi[0] = ( ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0] ) >> INSET_COLOR_SHIFT;
   3930 	maxi[1] = ( ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
   3931 	maxi[2] = ( ( maxColor[2] << INSET_COLOR_SHIFT ) - inset[2] ) >> INSET_COLOR_SHIFT;
   3932 	maxi[3] = ( ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
   3933 
   3934 	mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
   3935 	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
   3936 	mini[2] = ( mini[2] >= 0 ) ? mini[2] : 0;
   3937 	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
   3938 
   3939 	maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
   3940 	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
   3941 	maxi[2] = ( maxi[2] <= 255 ) ? maxi[2] : 255;
   3942 	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
   3943 
   3944 	minColor[0] = byte( ( mini[0] & C565_5_MASK ) | ( mini[0] >> 5 ) );
   3945 	minColor[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
   3946 	minColor[2] = byte( ( mini[2] & C565_5_MASK ) | ( mini[2] >> 5 ) );
   3947 	minColor[3] = byte( mini[3] );
   3948 
   3949 	maxColor[0] = byte( ( maxi[0] & C565_5_MASK ) | ( maxi[0] >> 5 ) );
   3950 	maxColor[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
   3951 	maxColor[2] = byte( ( maxi[2] & C565_5_MASK ) | ( maxi[2] >> 5 ) );
   3952 	maxColor[3] = byte( maxi[3] );
   3953 }
   3954 
   3955 /*
   3956 ========================
   3957 idDxtEncoder::SelectYCoCgDiagonal
   3958 ========================
   3959 */
   3960 void idDxtEncoder::SelectYCoCgDiagonal( const byte *colorBlock, byte *minColor, byte *maxColor ) const {
   3961 	byte side = 0;
   3962 
   3963 	byte mid0 = byte( ( (int) minColor[0] + maxColor[0] + 1 ) >> 1 );
   3964 	byte mid1 = byte( ( (int) minColor[1] + maxColor[1] + 1 ) >> 1 );
   3965 
   3966 	for ( int i = 0; i < 16; i++ ) {
   3967 		byte b0 = colorBlock[i*4+0] >= mid0;
   3968 		byte b1 = colorBlock[i*4+1] >= mid1;
   3969 		side += ( b0 ^ b1 );
   3970 	}
   3971 
   3972 	byte mask = -( side > 8 );
   3973 
   3974 #if defined NVIDIA_7X_HARDWARE_BUG_FIX
   3975 	mask &= -( minColor[0] != maxColor[0] );
   3976 #endif
   3977 
   3978 	byte c0 = minColor[1];
   3979 	byte c1 = maxColor[1];
   3980 
   3981 	c0 ^= c1;
   3982 	mask &= c0;
   3983 	c1 ^= mask;
   3984 	c0 ^= c1;
   3985 
   3986 	minColor[1] = c0;
   3987 	maxColor[1] = c1;
   3988 }
   3989 
   3990 /*
   3991 ========================
   3992 idDxtEncoder::CompressYCoCgDXT5Fast_Generic
   3993 
   3994 params:	inBuf		- image to compress
   3995 paramO:	outBuf		- result of compression
   3996 params:	width		- width of image
   3997 params:	height		- height of image
   3998 ========================
   3999 */
   4000 void idDxtEncoder::CompressYCoCgDXT5Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   4001 	ALIGN16( byte block[64] );
   4002 	ALIGN16( byte minColor[4] );
   4003 	ALIGN16( byte maxColor[4] );
   4004 
   4005 	//assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
   4006 
   4007 	assert( width >= 4 && ( width & 3 ) == 0 );
   4008 	assert( height >= 4 && ( height & 3 ) == 0 );
   4009 
   4010 	this->width = width;
   4011 	this->height = height;
   4012 	this->outData = outBuf;
   4013 
   4014 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4015 		for ( int i = 0; i < width; i += 4 ) {
   4016 
   4017 			ExtractBlock( inBuf + i * 4, width, block );
   4018 
   4019 			GetMinMaxBBox( block, minColor, maxColor );
   4020 			ScaleYCoCg( block, minColor, maxColor );
   4021 			InsetYCoCgBBox( minColor, maxColor );
   4022 			SelectYCoCgDiagonal( block, minColor, maxColor );
   4023 
   4024 			EmitByte( maxColor[3] );
   4025 			EmitByte( minColor[3] );
   4026 
   4027 			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
   4028 
   4029 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   4030 			// the colors are already sorted when selecting the diagonal
   4031 #endif
   4032 
   4033 			EmitUShort( ColorTo565( maxColor ) );
   4034 			EmitUShort( ColorTo565( minColor ) );
   4035 
   4036 			EmitColorIndices( block, minColor, maxColor );
   4037 		}
   4038 		outData += dstPadding;
   4039 		inBuf += srcPadding;
   4040 	}
   4041 }
   4042 
   4043 /*
   4044 ========================
   4045 idDxtEncoder::CompressYCoCgAlphaDXT5Fast
   4046 
   4047 params:	inBuf		- image to compress
   4048 paramO:	outBuf		- result of compression
   4049 params:	width		- width of image
   4050 params:	height		- height of image
   4051 ========================
   4052 */
   4053 void idDxtEncoder::CompressYCoCgAlphaDXT5Fast( const byte *inBuf, byte *outBuf, int width, int height ) {
   4054 	ALIGN16( byte block[64] );
   4055 	ALIGN16( byte minColor[4] );
   4056 	ALIGN16( byte maxColor[4] );
   4057 
   4058 	assert( width >= 4 && ( width & 3 ) == 0 );
   4059 	assert( height >= 4 && ( height & 3 ) == 0 );
   4060 
   4061 	this->width = width;
   4062 	this->height = height;
   4063 	this->outData = outBuf;
   4064 
   4065 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4066 		for ( int i = 0; i < width; i += 4 ) {
   4067 
   4068 			ExtractBlock( inBuf + i * 4, width, block );
   4069 
   4070 			// scale down the chroma of texels that are close to gray with low luminance
   4071 			for ( int k = 0; k < 16; k++ ) {
   4072 				if ( abs( block[k*4+0] - 132 ) <= 8 &&
   4073 						abs( block[k*4+2] - 132 ) <= 8 &&
   4074 							block[k*4+3] < 96 ) {
   4075 					block[k*4+0] = ( block[k*4+0] - 132 ) / 2 + 132;
   4076 					block[k*4+2] = ( block[k*4+2] - 132 ) / 2 + 132;
   4077 				}
   4078 			}
   4079 
   4080 			GetMinMaxBBox( block, minColor, maxColor );
   4081 			InsetYCoCgAlpaBBox( minColor, maxColor );
   4082 			SelectColorsDiagonal( block, minColor, maxColor );
   4083 
   4084 			EmitByte( maxColor[3] );
   4085 			EmitByte( minColor[3] );
   4086 
   4087 			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
   4088 
   4089 #ifdef NVIDIA_7X_HARDWARE_BUG_FIX
   4090 			// the colors are already sorted when selecting the diagonal
   4091 #endif
   4092 
   4093 			EmitUShort( ColorTo565( maxColor ) );
   4094 			EmitUShort( ColorTo565( minColor ) );
   4095 
   4096 			EmitColorIndices( block, minColor, maxColor );
   4097 		}
   4098 		outData += dstPadding;
   4099 		inBuf += srcPadding;
   4100 	}
   4101 }
   4102 
   4103 /*
   4104 ========================
   4105 idDxtEncoder::CompressYCoCgCTX1DXT5AFast_Generic
   4106 
   4107 params:	inBuf		- image to compress
   4108 paramO:	outBuf		- result of compression
   4109 params:	width		- width of image
   4110 params:	height		- height of image
   4111 ========================
   4112 */
   4113 void idDxtEncoder::CompressYCoCgCTX1DXT5AFast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   4114 	ALIGN16( byte block[64] );
   4115 	ALIGN16( byte minColor[4] );
   4116 	ALIGN16( byte maxColor[4] );
   4117 
   4118 	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
   4119 
   4120 	assert( width >= 4 && ( width & 3 ) == 0 );
   4121 	assert( height >= 4 && ( height & 3 ) == 0 );
   4122 
   4123 	this->width = width;
   4124 	this->height = height;
   4125 	this->outData = outBuf;
   4126 
   4127 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4128 		for ( int i = 0; i < width; i += 4 ) {
   4129 
   4130 			ExtractBlock( inBuf + i * 4, width, block );
   4131 
   4132 			GetMinMaxBBox( block, minColor, maxColor );
   4133 			SelectYCoCgDiagonal( block, minColor, maxColor );
   4134 			InsetColorsBBox( minColor, maxColor );
   4135 
   4136 			EmitByte( maxColor[3] );
   4137 			EmitByte( minColor[3] );
   4138 
   4139 			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
   4140 
   4141 			EmitByte( maxColor[0] );
   4142 			EmitByte( maxColor[1] );
   4143 			EmitByte( minColor[0] );
   4144 			EmitByte( minColor[1] );
   4145 
   4146 			EmitCTX1Indices( block, minColor, maxColor );
   4147 		}
   4148 		outData += dstPadding;
   4149 		inBuf += srcPadding;
   4150 	}
   4151 }
   4152 
   4153 /*
   4154 ========================
   4155 idDxtEncoder::EmitGreenIndices
   4156 
   4157 params:	block		- block for which to find green indices
   4158 paramO:	minGreen	- Min green found
   4159 paramO:	maxGreen	- Max green found
   4160 ========================
   4161 */
   4162 void idDxtEncoder::EmitGreenIndices( const byte *block, const int offset, const byte minGreen, const byte maxGreen ) {
   4163 
   4164 	assert( maxGreen >= minGreen );
   4165 
   4166 	const int COLOR_RANGE = 3;
   4167 
   4168 #if 1
   4169 
   4170 	byte yb1 = ( 5 * maxGreen + 1 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
   4171 	byte yb2 = ( 3 * maxGreen + 3 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
   4172 	byte yb3 = ( 1 * maxGreen + 5 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
   4173 
   4174 	unsigned int result = 0;
   4175 
   4176 	block += offset;
   4177 
   4178 	for ( int i = 15; i >= 0; i-- ) {
   4179 		result <<= 2;
   4180 		byte y = block[i*4];
   4181 		int b1 = ( y >= yb1 );
   4182 		int b2 = ( y >= yb2 );
   4183 		int b3 = ( y >= yb3 );
   4184 		int index = ( 4 - b1 - b2 - b3 ) & 3;
   4185 		index ^= ( 2 > index );
   4186 		result |= index;
   4187 	}
   4188 
   4189 	EmitUInt( result );
   4190 
   4191 #else
   4192 
   4193 	byte green[4];
   4194 
   4195 	green[0] = maxGreen;
   4196 	green[1] = minGreen;
   4197 	green[2] = ( 2 * green[0] + 1 * green[1] ) / 3;
   4198 	green[3] = ( 1 * green[0] + 2 * green[1] ) / 3;
   4199 
   4200 	unsigned int result = 0;
   4201 
   4202 	block += offset;
   4203 
   4204 	for ( int i = 15; i >= 0; i-- ) {
   4205 		result <<= 2;
   4206 		byte y = block[i*4];
   4207 		int minDist = INT_MAX;
   4208 		int index;
   4209 		for ( int j = 0; j < 4; j++ ) {
   4210 			int dist = abs( y - green[j] );
   4211 			if ( dist < minDist ) {
   4212 				minDist = dist;
   4213 				index = j;
   4214 			}
   4215 		}
   4216 		result |= index;
   4217 	}
   4218 
   4219 	EmitUInt( result );
   4220 
   4221 #endif
   4222 }
   4223 
   4224 /*
   4225 ========================
   4226 idDxtEncoder::InsetNormalsBBoxDXT5
   4227 ========================
   4228 */
   4229 void idDxtEncoder::InsetNormalsBBoxDXT5( byte *minNormal, byte *maxNormal ) const {
   4230 	int inset[4];
   4231 	int mini[4];
   4232 	int maxi[4];
   4233 
   4234 	inset[3] = ( maxNormal[3] - minNormal[3] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
   4235 	inset[1] = ( maxNormal[1] - minNormal[1] ) - ((1<<(INSET_COLOR_SHIFT-1))-1);
   4236 
   4237 	mini[3] = ( ( minNormal[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
   4238 	mini[1] = ( ( minNormal[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
   4239 
   4240 	maxi[3] = ( ( maxNormal[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
   4241 	maxi[1] = ( ( maxNormal[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
   4242 
   4243 	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
   4244 	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
   4245 
   4246 	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
   4247 	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
   4248 
   4249 	minNormal[3] = byte( mini[3] );
   4250 	minNormal[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
   4251 
   4252 	maxNormal[3] = byte( maxi[3] );
   4253 	maxNormal[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
   4254 }
   4255 
   4256 /*
   4257 ========================
   4258 idDxtEncoder::InsetNormalsBBox3Dc
   4259 ========================
   4260 */
   4261 void idDxtEncoder::InsetNormalsBBox3Dc( byte *minNormal, byte *maxNormal ) const {
   4262     int inset[4];
   4263     int mini[4];
   4264     int maxi[4];
   4265 
   4266     inset[0] = ( maxNormal[0] - minNormal[0] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
   4267     inset[1] = ( maxNormal[1] - minNormal[1] ) - ((1<<(INSET_ALPHA_SHIFT-1))-1);
   4268 
   4269     mini[0] = ( ( minNormal[0] << INSET_ALPHA_SHIFT ) + inset[0] ) >> INSET_ALPHA_SHIFT;
   4270     mini[1] = ( ( minNormal[1] << INSET_ALPHA_SHIFT ) + inset[1] ) >> INSET_ALPHA_SHIFT;
   4271 
   4272     maxi[0] = ( ( maxNormal[0] << INSET_ALPHA_SHIFT ) - inset[0] ) >> INSET_ALPHA_SHIFT;
   4273     maxi[1] = ( ( maxNormal[1] << INSET_ALPHA_SHIFT ) - inset[1] ) >> INSET_ALPHA_SHIFT;
   4274 
   4275     mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
   4276     mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
   4277 
   4278     maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
   4279     maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
   4280 
   4281     minNormal[0] = (byte)mini[0];
   4282     minNormal[1] = (byte)mini[1];
   4283 
   4284     maxNormal[0] = (byte)maxi[0];
   4285     maxNormal[1] = (byte)maxi[1];
   4286 }
   4287 
   4288 /*
   4289 ========================
   4290 idDxtEncoder::CompressNormalMapDXT5Fast_Generic
   4291 
   4292 params:	inBuf		- image to compress
   4293 paramO:	outBuf		- result of compression
   4294 params:	width		- width of image
   4295 params:	height		- height of image
   4296 ========================
   4297 */
   4298 void idDxtEncoder::CompressNormalMapDXT5Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   4299 	ALIGN16( byte block[64] );
   4300 	ALIGN16( byte normal1[4] );
   4301 	ALIGN16( byte normal2[4] );
   4302 
   4303 	assert( width >= 4 && ( width & 3 ) == 0 );
   4304 	assert( height >= 4 && ( height & 3 ) == 0 );
   4305 
   4306 	this->width = width;
   4307 	this->height = height;
   4308 	this->outData = outBuf;
   4309 
   4310 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4311 		for ( int i = 0; i < width; i += 4 ) {
   4312 
   4313 			ExtractBlock( inBuf + i * 4, width, block );
   4314 
   4315 			GetMinMaxBBox( block, normal1, normal2 );
   4316 			InsetNormalsBBoxDXT5( normal1, normal2 );
   4317 
   4318 			// Write out Nx into alpha channel.
   4319 			EmitByte( normal2[3] );
   4320 			EmitByte( normal1[3] );
   4321 			EmitAlphaIndices( block, 3, normal1[3], normal2[3] );
   4322 
   4323 			// Write out Ny into green channel.
   4324 			EmitUShort( ColorTo565( block[0], normal2[1], block[2] ) );
   4325 			EmitUShort( ColorTo565( block[0], normal1[1], block[2] ) );
   4326 			EmitGreenIndices( block, 1, normal1[1], normal2[1] );
   4327 		}
   4328 		outData += dstPadding;
   4329 		inBuf += srcPadding;
   4330 	}
   4331 }
   4332 
   4333 /*
   4334 ========================
   4335 idDxtEncoder::CompressImageDXN1Fast_Generic
   4336 
   4337 params:	inBuf		- image to compress
   4338 paramO:	outBuf		- result of compression
   4339 params:	width		- width of image
   4340 params:	height		- height of image
   4341 ========================
   4342 */
   4343 void idDxtEncoder::CompressImageDXN1Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   4344 	ALIGN16( byte block[64] );
   4345 	ALIGN16( byte min[4] );
   4346 	ALIGN16( byte max[4] );
   4347 
   4348 	assert( width >= 4 && ( width & 3 ) == 0 );
   4349 	assert( height >= 4 && ( height & 3 ) == 0 );
   4350 
   4351 	this->width = width;
   4352 	this->height = height;
   4353 	this->outData = outBuf;
   4354 
   4355 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4356 		for ( int i = 0; i < width; i += 4 ) {
   4357 
   4358 			ExtractBlock( inBuf + i * 4, width, block );
   4359 
   4360 			GetMinMaxBBox( block, min, max );
   4361 			InsetNormalsBBox3Dc( min, max );
   4362 
   4363 			// Write out an alpha channel.
   4364 			EmitByte( max[0] );
   4365 			EmitByte( min[0] );
   4366 			EmitAlphaIndices( block, 0, min[0], max[0] );
   4367 		}
   4368 		outData += dstPadding;
   4369 		inBuf += srcPadding;
   4370 	}
   4371 }
   4372 
   4373 /*
   4374 ========================
   4375 idDxtEncoder::CompressNormalMapDXN2Fast_Generic
   4376 
   4377 params:	inBuf		- image to compress
   4378 paramO:	outBuf		- result of compression
   4379 params:	width		- width of image
   4380 params:	height		- height of image
   4381 ========================
   4382 */
   4383 void idDxtEncoder::CompressNormalMapDXN2Fast_Generic( const byte *inBuf, byte *outBuf, int width, int height ) {
   4384 	ALIGN16( byte block[64] );
   4385 	ALIGN16( byte normal1[4] );
   4386 	ALIGN16( byte normal2[4] );
   4387 
   4388 	assert( width >= 4 && ( width & 3 ) == 0 );
   4389 	assert( height >= 4 && ( height & 3 ) == 0 );
   4390 
   4391 	this->width = width;
   4392 	this->height = height;
   4393 	this->outData = outBuf;
   4394 
   4395 	for ( int j = 0; j < height; j += 4, inBuf += width * 4*4 ) {
   4396 		for ( int i = 0; i < width; i += 4 ) {
   4397 
   4398 			ExtractBlock( inBuf + i * 4, width, block );
   4399 
   4400 			GetMinMaxBBox( block, normal1, normal2 );
   4401 			InsetNormalsBBox3Dc( normal1, normal2 );
   4402 
   4403 			// Write out Nx as an alpha channel.
   4404 			EmitByte( normal2[0] );
   4405 			EmitByte( normal1[0] );
   4406 			EmitAlphaIndices( block, 0, normal1[0], normal2[0] );
   4407 
   4408 			// Write out Ny as an alpha channel.
   4409 			EmitByte( normal2[1] );
   4410 			EmitByte( normal1[1] );
   4411 			EmitAlphaIndices( block, 1, normal1[1], normal2[1] );
   4412 		}
   4413 		outData += dstPadding;
   4414 		inBuf += srcPadding;
   4415 	}
   4416 }
   4417 
   4418 /*
   4419 ========================
   4420 idDxtEncoder::DecodeDXNAlphaValues
   4421 ========================
   4422 */
   4423 void idDxtEncoder::DecodeDXNAlphaValues( const byte *inBuf, byte *values ) {
   4424 	int i;
   4425 	unsigned int indices;
   4426 	byte alphas[8];
   4427 
   4428 	if ( inBuf[0] <= inBuf[1] ) {
   4429 		alphas[0] = inBuf[0];
   4430 		alphas[1] = inBuf[1];
   4431 		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
   4432 		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
   4433 		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
   4434 		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
   4435 		alphas[6] = 0;
   4436 		alphas[7] = 255;
   4437 	} else {
   4438 		alphas[0] = inBuf[0];
   4439 		alphas[1] = inBuf[1];
   4440 		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
   4441 		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
   4442 		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
   4443 		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
   4444 		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
   4445 		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
   4446 	}
   4447 
   4448 	indices = (int)inBuf[2] | ( (int)inBuf[3] << 8 ) | ( (int)inBuf[4] << 16 );
   4449 	for ( i = 0; i < 8; i++ ) {
   4450 		values[i] = alphas[indices & 7];
   4451 		indices >>= 3;
   4452 	}
   4453 
   4454 	indices = (int)inBuf[5] | ( (int)inBuf[6] << 8 ) | ( (int)inBuf[7] << 16 );
   4455 	for ( i = 8; i < 16; i++ ) {
   4456 		values[i] = alphas[indices & 7];
   4457 		indices >>= 3;
   4458 	}
   4459 }
   4460 
   4461 /*
   4462 ========================
   4463 idDxtEncoder::EncodeNormalRGBIndices
   4464 
   4465 params:	values	- 16 normal block for which to find normal Y indices
   4466 paramO:	min		- Min grayscale value
   4467 paramO:	max		- Max grayscale value
   4468 ========================
   4469 */
   4470 void idDxtEncoder::EncodeNormalRGBIndices( byte *outBuf, const byte min, const byte max, const byte *values ) {
   4471 
   4472 	const int COLOR_RANGE = 3;
   4473 
   4474 	byte maskedMin, maskedMax, mid, yb1, yb2, yb3;
   4475 
   4476 	maskedMax = max & C565_6_MASK;
   4477 	maskedMin = min & C565_6_MASK;
   4478 	mid = ( maskedMax - maskedMin ) / ( 2 * COLOR_RANGE );
   4479 
   4480 	yb1 = maskedMax - mid;
   4481 	yb2 = ( 2 * maskedMax + 1 * maskedMin ) / COLOR_RANGE - mid;
   4482 	yb3 = ( 1 * maskedMax + 2 * maskedMin ) / COLOR_RANGE - mid;
   4483 
   4484 	unsigned int result = 0;
   4485 
   4486 	for ( int i = 15; i >= 0; i-- ) {
   4487 		result <<= 2;
   4488 		byte y = values[i];
   4489 		int b1 = ( y >= yb1 );
   4490 		int b2 = ( y >= yb2 );
   4491 		int b3 = ( y >= yb3 );
   4492 		int index = ( 4 - b1 - b2 - b3 ) & 3;
   4493 		index ^= ( 2 > index );
   4494 		result |= index;
   4495 	}
   4496 
   4497 	unsigned short maskedMax5 = (max & C565_5_MASK) >> 3;
   4498 	unsigned short maskedMin5 = (min & C565_5_MASK) >> 3;
   4499 	
   4500 	unsigned short smax = (maskedMax5 << 11) | (maskedMax << 3) | maskedMax5;
   4501 	unsigned short smin = (maskedMin5 << 11) | (maskedMin << 3) | maskedMin5;
   4502 
   4503 	outBuf[0] = byte( ( smax >> 0 ) & 0xFF );
   4504 	outBuf[1] = byte( ( smax >> 8 ) & 0xFF );
   4505 	outBuf[2] = byte( ( smin >> 0 ) & 0xFF );
   4506 	outBuf[3] = byte( ( smin >> 8 ) & 0xFF );
   4507 
   4508 	outBuf[4] = byte( ( result >>  0 ) & 0xFF );
   4509 	outBuf[5] = byte( ( result >>  8 ) & 0xFF );
   4510 	outBuf[6] = byte( ( result >> 16 ) & 0xFF );
   4511 	outBuf[7] = byte( ( result >> 24 ) & 0xFF );
   4512 }
   4513 
   4514 /*
   4515 ========================
   4516 idDxtEncoder::ConvertNormalMapDXN2_DXT5
   4517 
   4518 params:	inBuf		- normal map compressed in DXN2 format
   4519 paramO:	outBuf		- result of compression in DXT5 format
   4520 params:	width		- width of image
   4521 params:	height		- height of image
   4522 ========================
   4523 */
   4524 void idDxtEncoder::ConvertNormalMapDXN2_DXT5( const byte *inBuf, byte *outBuf, int width, int height ) {
   4525 	ALIGN16( byte values[16] );
   4526 
   4527 	this->width = width;
   4528 	this->height = height;
   4529 	this->outData = outBuf;
   4530 
   4531 	if ( width > 4 && ( width & 3 ) != 0 ) {
   4532 		return;
   4533 	}
   4534 	if ( height > 4 && ( height & 3 ) != 0 ) {
   4535 		return;
   4536 	}
   4537 
   4538 	if ( width < 4 || height < 4 ) {
   4539 		assert( 0 );
   4540 		return;
   4541 	}
   4542 
   4543 	for ( int j = 0; j < height; j += 4 ) {
   4544 		for ( int i = 0; i < width; i += 4, inBuf += 16, outBuf += 16 ) {
   4545 
   4546 			// decode normal Y stored as a DXT5 alpha channel
   4547 			DecodeDXNAlphaValues( inBuf + 0, values );
   4548 
   4549 			// copy normal X
   4550 			memcpy( outBuf + 0, inBuf + 8, 8 );
   4551 
   4552 			// get the min/max Y
   4553 			byte minNormalY = 255;
   4554 			byte maxNormalY = 0;
   4555 			for ( int i = 0; i < 16; i++ ) {
   4556 				if ( values[i] < minNormalY ) {
   4557 					minNormalY = values[i];
   4558 				}
   4559 				if ( values[i] > maxNormalY ) {
   4560 					maxNormalY = values[i];
   4561 				}
   4562 			}
   4563 
   4564 			// encode normal Y into DXT5 color channels
   4565 			EncodeNormalRGBIndices( outBuf + 8, minNormalY, maxNormalY, values );
   4566 		}
   4567 		outData += dstPadding;
   4568 		inBuf += srcPadding;
   4569 	}
   4570 }
   4571 
   4572 /*
   4573 ========================
   4574 idDxtEncoder::DecodeNormalYValues
   4575 ========================
   4576 */
   4577 void idDxtEncoder::DecodeNormalYValues( const byte *inBuf, byte &min, byte &max, byte *values ) {
   4578 	int i;
   4579 	unsigned int indexes;
   4580 	unsigned short normal0, normal1;
   4581 	byte normalsY[4];
   4582 
   4583 	normal0 = inBuf[0] | (inBuf[1] << 8);
   4584 	normal1 = inBuf[2] | (inBuf[3] << 8);
   4585 
   4586 	assert( normal0 >= normal1 );
   4587 
   4588 	normalsY[0] = GreenFrom565( normal0 );
   4589 	normalsY[1] = GreenFrom565( normal1 );
   4590 	normalsY[2] = ( 2 * normalsY[0] + 1 * normalsY[1] ) / 3;
   4591 	normalsY[3] = ( 1 * normalsY[0] + 2 * normalsY[1] ) / 3;
   4592 
   4593 	indexes = (unsigned int)inBuf[4] | ((unsigned int)inBuf[5]<<8) | ((unsigned int)inBuf[6]<<16) | ((unsigned int)inBuf[7]<<24);
   4594 	for ( i = 0; i < 16; i++ ) {
   4595 		values[i] = normalsY[indexes & 3];
   4596 		indexes >>= 2;
   4597 	}
   4598 
   4599 	max = normalsY[0];
   4600 	min = normalsY[1];
   4601 }
   4602 
   4603 /*
   4604 ========================
   4605 idDxtEncoder::EncodeDXNAlphaValues
   4606 ========================
   4607 */
   4608 void idDxtEncoder::EncodeDXNAlphaValues( byte *outBuf, const byte min, const byte max, const byte *values ) {
   4609 	int i;
   4610 	byte alphas[8];
   4611 	int j;
   4612 	unsigned int indexes[16];
   4613 
   4614 	alphas[0] = max;
   4615 	alphas[1] = min;
   4616 	alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
   4617 	alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
   4618 	alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
   4619 	alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
   4620 	alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
   4621 	alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
   4622 
   4623 	int error = 0;
   4624 	for ( i = 0; i < 16; i++ ) {
   4625 		int minDist = MAX_TYPE( int );
   4626 		byte a = values[i];
   4627 		for ( j = 0; j < 8; j++ ) {
   4628 			int dist = AlphaDistance( a, alphas[j] );
   4629 			if ( dist < minDist ) {
   4630 				minDist = dist;
   4631 				indexes[i] = j;
   4632 			}
   4633 		}
   4634 		error += minDist;
   4635 	}
   4636 
   4637 	outBuf[0] = max;
   4638 	outBuf[1] = min;
   4639 
   4640 	outBuf[2] = byte( (indexes[ 0] >> 0) | (indexes[ 1] << 3) | (indexes[ 2] << 6) );
   4641 	outBuf[3] = byte( (indexes[ 2] >> 2) | (indexes[ 3] << 1) | (indexes[ 4] << 4) | (indexes[ 5] << 7) );
   4642 	outBuf[4] = byte( (indexes[ 5] >> 1) | (indexes[ 6] << 2) | (indexes[ 7] << 5) );
   4643 
   4644 	outBuf[5] = byte( (indexes[ 8] >> 0) | (indexes[ 9] << 3) | (indexes[10] << 6) );
   4645 	outBuf[6] = byte( (indexes[10] >> 2) | (indexes[11] << 1) | (indexes[12] << 4) | (indexes[13] << 7) );
   4646 	outBuf[7] = byte( (indexes[13] >> 1) | (indexes[14] << 2) | (indexes[15] << 5) );
   4647 }
   4648 
   4649 /*
   4650 ========================
   4651 idDxtEncoder::ConvertNormalMapDXT5_DXN2
   4652 
   4653 params:	inBuf		- image to compress
   4654 paramO:	outBuf		- result of compression
   4655 params:	width		- width of image
   4656 params:	height		- height of image
   4657 ========================
   4658 */
   4659 void idDxtEncoder::ConvertNormalMapDXT5_DXN2( const byte *inBuf, byte *outBuf, int width, int height ) {
   4660 	ALIGN16( byte values[16] );
   4661 	byte minNormalY, maxNormalY;
   4662 
   4663 	this->width = width;
   4664 	this->height = height;
   4665 	this->outData = outBuf;
   4666 
   4667 	if ( width > 4 && ( width & 3 ) != 0 ) {
   4668 		return;
   4669 	}
   4670 	if ( height > 4 && ( height & 3 ) != 0 ) {
   4671 		return;
   4672 	}
   4673 
   4674 	if ( width < 4 || height < 4 ) {
   4675 		assert( 0 );
   4676 		return;
   4677 	}
   4678 
   4679 	for ( int j = 0; j < height; j += 4 ) {
   4680 		for ( int i = 0; i < width; i += 4, inBuf += 16, outBuf += 16 ) {
   4681 
   4682 			// decode normal Y stored as a DXT5 alpha channel
   4683 			DecodeNormalYValues( inBuf + 8, minNormalY, maxNormalY, values );
   4684 
   4685 			memcpy( outBuf + 8, inBuf + 0, 8 );
   4686 
   4687 			// encode normal Y into DXT5 green channel
   4688 			EncodeDXNAlphaValues( outBuf + 0, minNormalY, maxNormalY, values );
   4689 		}
   4690 		outData += dstPadding;
   4691 		inBuf += srcPadding;
   4692 	}
   4693 }
   4694 
   4695 /*
   4696 ========================
   4697 idDxtEncoder::ConvertImageDXN1_DXT1
   4698 
   4699 params:	inBuf		- normal map compressed in DXN1 format
   4700 paramO:	outBuf		- result of compression in DXT1 format
   4701 params:	width		- width of image
   4702 params:	height		- height of image
   4703 ========================
   4704 */
   4705 void idDxtEncoder::ConvertImageDXN1_DXT1( const byte *inBuf, byte *outBuf, int width, int height ) {
   4706 	ALIGN16( byte values[16] );
   4707 
   4708 	this->width = width;
   4709 	this->height = height;
   4710 	this->outData = outBuf;
   4711 
   4712 	if ( width > 4 && ( width & 3 ) != 0 ) {
   4713 		return;
   4714 	}
   4715 	if ( height > 4 && ( height & 3 ) != 0 ) {
   4716 		return;
   4717 	}
   4718 
   4719 	if ( width < 4 || height < 4 ) {
   4720 		assert( 0 );
   4721 		return;
   4722 	}
   4723 
   4724 	for ( int j = 0; j < height; j += 4 ) {
   4725 		for ( int i = 0; i < width; i += 4, inBuf += 8, outBuf += 8 ) {
   4726 
   4727 			// decode single channel stored as a DXT5 alpha channel
   4728 			DecodeDXNAlphaValues( inBuf + 0, values );
   4729 
   4730 			// get the min/max
   4731 			byte min = 255;
   4732 			byte max = 0;
   4733 			for ( int i = 0; i < 16; i++ ) {
   4734 				if ( values[i] < min ) {
   4735 					min = values[i];
   4736 				}
   4737 				if ( values[i] > max ) {
   4738 					max = values[i];
   4739 				}
   4740 			}
   4741 
   4742 			// encode single channel into DXT1
   4743 			EncodeNormalRGBIndices( outBuf + 0, min, max, values );
   4744 		}
   4745 		outData += dstPadding;
   4746 		inBuf += srcPadding;
   4747 	}
   4748 }