LZW.CPP - CnC_Remastered_Collection - Command and Conquer: Red Alert

LZW.CPP (16866B)
      1 //
      2 // Copyright 2020 Electronic Arts Inc.
      3 //
      4 // TiberianDawn.DLL and RedAlert.dll and corresponding source code is free 
      5 // software: you can redistribute it and/or modify it under the terms of 
      6 // the GNU General Public License as published by the Free Software Foundation, 
      7 // either version 3 of the License, or (at your option) any later version.
      8 
      9 // TiberianDawn.DLL and RedAlert.dll and corresponding source code is distributed 
     10 // in the hope that it will be useful, but with permitted additional restrictions 
     11 // under Section 7 of the GPL. See the GNU General Public License in LICENSE.TXT 
     12 // distributed with this program. You should have received a copy of the 
     13 // GNU General Public License along with permitted additional restrictions 
     14 // with this program. If not, see https://github.com/electronicarts/CnC_Remastered_Collection
     15 
     16 /* $Header: /CounterStrike/LZW.CPP 1     3/03/97 10:25a Joe_bostic $ */
     17 /***********************************************************************************************
     18  ***              C O N F I D E N T I A L  ---  W E S T W O O D  S T U D I O S               ***
     19  ***********************************************************************************************
     20  *                                                                                             *
     21  *                 Project Name : Command & Conquer                                            *
     22  *                                                                                             *
     23  *                    File Name : LZW.CPP                                                      *
     24  *                                                                                             *
     25  *                   Programmer : Joe L. Bostic                                                *
     26  *                                                                                             *
     27  *                   Start Date : 08/28/96                                                     *
     28  *                                                                                             *
     29  *                  Last Update : August 28, 1996 [JLB]                                        *
     30  *                                                                                             *
     31  *---------------------------------------------------------------------------------------------*
     32  * Functions:                                                                                  *
     33  *   Find_Child_Node -- Find a matching dictionary entry.                                      *
     34  * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
     35 
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <string.h>
     39 
     40 #include	"xstraw.h"
     41 #include	"xpipe.h"
     42 #include	"buff.h"
     43 #include	"lzw.h"
     44 
     45 
     46 LZWEngine::LZWEngine(void)
     47 {
     48 	Reset();
     49 }
     50 
     51 
     52 void LZWEngine::Reset(void)
     53 {
     54 	for (int i = 0; i < TABLE_SIZE; i++) {
     55 		dict[i].Make_Unused();
     56 	}
     57 }
     58 
     59 int LZWEngine::Compress(Buffer const & input, Buffer const & output)
     60 {
     61 	BufferStraw instraw(input);
     62 	BufferPipe outpipe(output);
     63 
     64 	int outcount = 0;
     65 	CodeType string_code = END_OF_STREAM;
     66 	CodeType next_code = FIRST_CODE;
     67 
     68 	string_code = 0;
     69 	if (instraw.Get(&string_code, sizeof(char)) == 0) {
     70 		string_code = END_OF_STREAM;
     71 	}
     72 
     73 	for (;;) {
     74 
     75 		/*
     76 		**	Fetch a character from the source data stream. If exhausted,
     77 		**	then break out of the process loop so that the final code
     78 		**	can be written out.
     79 		*/
     80 		unsigned char character;
     81 		if (instraw.Get(&character, sizeof(character)) == 0) break;
     82 
     83 		/*
     84 		**	See if there is a match for the current code and current
     85 		**	character. A match indicates that there is already a
     86 		**	dictionary entry that fully represents the character
     87 		**	sequence.
     88 		*/
     89 		int index = Find_Child_Node(string_code, character);
     90 
     91 		/*
     92 		**	If a code match was found, then set the current code
     93 		**	value to this code value that represents the concatenation
     94 		**	of the previous code value and the current character.
     95 		*/
     96 		if (index != -1 && dict[index].CodeValue != -1) {
     97 			string_code = dict[index].CodeValue;
     98 		} else {
     99 
    100 			/*
    101 			**	Since no exact match was found, then create a new code
    102 			**	entry that represents the current code and character
    103 			**	value concatenated. This presumes there is room in the
    104 			**	code table.
    105 			*/
    106 			if (index != -1 && next_code <= MAX_CODE) {
    107 				dict[index] = CodeClass(next_code, string_code, character);
    108 				next_code++;
    109 			}
    110 
    111 			/*
    112 			**	Output the code to the compression stream and reset the
    113 			**	current code value to match the current character. This
    114 			**	has the effect of clearing out the current character
    115 			**	sequence scan in preparation for building a new one. It
    116 			**	also ensures that the character will be written out.
    117 			*/
    118 			outcount += outpipe.Put(&string_code, sizeof(string_code));
    119 			string_code = character;
    120 		}
    121 	}
    122 
    123 	outcount += outpipe.Put(&string_code, sizeof(string_code));
    124 	if (string_code != END_OF_STREAM) {
    125 		string_code = END_OF_STREAM;
    126 		outcount += outpipe.Put(&string_code, sizeof(string_code));
    127 	}
    128 
    129 	return(outcount);
    130 }
    131 
    132 
    133 int LZWEngine::Uncompress(Buffer const & input, Buffer const & output)
    134 {
    135 	int outcount = 0;
    136 	BufferStraw instraw(input);
    137 	BufferPipe outpipe(output);
    138 
    139 	CodeType old_code;
    140 	if (instraw.Get(&old_code, sizeof(old_code)) == 0) {
    141 		return(outcount);
    142 	}
    143 
    144 	unsigned char character = (unsigned char)old_code;
    145 	outcount += outpipe.Put(&character, sizeof(character));
    146 
    147 	unsigned int count;
    148 	CodeType new_code;
    149 	CodeType next_code = FIRST_CODE;
    150 	for (;;) {
    151 		if (instraw.Get(&new_code, sizeof(new_code)) == 0) break;
    152 
    153 		if (new_code == END_OF_STREAM) break;
    154 
    155 		/*
    156 		** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
    157 		** case which generates an undefined code.  It handles it by decoding
    158 		** the last code, and adding a single character to the end of the decode string.
    159 		*/
    160 		if (new_code >= next_code) {
    161 			decode_stack[0] = character;
    162 			count = 1;
    163 			count += Decode_String(&decode_stack[1], old_code);
    164 		} else {
    165 			count = Decode_String(decode_stack, new_code);
    166 		}
    167 
    168 		character = decode_stack[count-1];
    169 		while (count > 0) {
    170 			--count;
    171 			outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0]));
    172 		}
    173 
    174 		/*
    175 		**	Add the new code sequence to the dictionary (presuming there is still
    176 		**	room).
    177 		*/
    178 		if (next_code <= MAX_CODE) {
    179 			dict[next_code] = CodeClass(next_code, old_code, character);
    180 			next_code++;
    181 		}
    182 		old_code = new_code;
    183 	}
    184 
    185 	return(outcount);
    186 }
    187 
    188 
    189 int LZWEngine::Make_LZW_Hash(CodeType code, char character)
    190 {
    191 	return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
    192 }
    193 
    194 
    195 int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character)
    196 {
    197 	/*
    198 	**	Fetch the first try index for the code and character.
    199 	*/
    200 	int hash_index = Make_LZW_Hash(parent_code, child_character);
    201 
    202 	/*
    203 	**	Base the hash-miss-try-again-displacement value on the current
    204 	**	index. [Shouldn't the value be some large prime number???].
    205 	*/
    206 	int offset = 1;
    207 	if (hash_index != 0) {
    208 		offset = TABLE_SIZE - hash_index;
    209 	}
    210 
    211 	/*
    212 	**	Keep offsetting through the dictionary until an exact match is
    213 	**	found for the code and character specified.
    214 	*/
    215 	int initial = hash_index;
    216 	while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
    217 
    218 		/*
    219 		**	Stop searching if an unused index is found since this means that
    220 		**	a match doesn't exist in the table at all.
    221 		*/
    222 		if (dict[hash_index].Is_Unused()) break;
    223 
    224 		/*
    225 		**	Bump the hash index to another value such that sequential bumps
    226 		**	will not result in the same index value until all of the table
    227 		**	has been scanned.
    228 		*/
    229 		hash_index -= offset;
    230 		if (hash_index < 0) {
    231 			hash_index += TABLE_SIZE;
    232 		}
    233 
    234 		/*
    235 		**	If the entire table has been scanned and no match or unused
    236 		**	entry was found, then return a special value indicating this
    237 		**	condition.
    238 		*/
    239 		if (initial == hash_index) {
    240 			hash_index = -1;
    241 			break;
    242 		}
    243 	}
    244 	return(hash_index);
    245 }
    246 
    247 
    248 int LZWEngine::Decode_String(char * ptr, CodeType code)
    249 {
    250 	int count = 0;
    251 	while (code > 255) {
    252 		*ptr++ = dict[code].CharValue;
    253 		count++;
    254 		code = dict[code].ParentCode;
    255 	}
    256 	*ptr = (char)code;
    257 	count++;
    258 	return(count);
    259 }
    260 
    261 
    262 int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff)
    263 {
    264 	LZWEngine lzw;
    265 	return(lzw.Uncompress(inbuff, outbuff));
    266 }
    267 
    268 
    269 int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff)
    270 {
    271 	LZWEngine lzw;
    272 	return(lzw.Compress(inbuff, outbuff));
    273 }
    274 
    275 
    276 
    277 
    278 
    279 #ifdef NEVER
    280 
    281 
    282 /*
    283  * Constants used throughout the program.  BITS defines how many bits
    284  * will be in a code.  TABLE_SIZE defines the size of the dictionary
    285  * table.
    286  */
    287 #define BITS                       12
    288 #define MAX_CODE                   ( ( 1 << BITS ) - 1 )
    289 #define TABLE_SIZE                 5021
    290 #define END_OF_STREAM              256
    291 #define FIRST_CODE                 257
    292 #define UNUSED                     -1
    293 
    294 typedef unsigned short CodeType;
    295 
    296 /*
    297  * This data structure defines the dictionary.  Each entry in the dictionary
    298  * has a code value.  This is the code emitted by the compressor.  Each
    299  * code is actually made up of two pieces:  a parent_code, and a
    300  * character.  Code values of less than 256 are actually plain
    301  * text codes.
    302  */
    303 struct CodeClass
    304 {
    305 	CodeType CodeValue;
    306 	CodeType ParentCode;
    307 	char CharValue;
    308 
    309 	CodeClass(void) {}
    310 	CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {}
    311 
    312 	void Make_Unused(void) {CodeValue = UNUSED;}
    313 	bool Is_Unused(void) const {return(CodeValue == UNUSED);}
    314 	bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);}
    315 };
    316 CodeClass dict[TABLE_SIZE];
    317 
    318 char decode_stack[TABLE_SIZE];
    319 
    320 inline int Make_LZW_Hash(CodeType code, char character)
    321 {
    322 	return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
    323 }
    324 
    325 
    326 /***********************************************************************************************
    327  * Find_Child_Node -- Find a matching dictionary entry.                                        *
    328  *                                                                                             *
    329  *    This hashing routine is responsible for finding the table location                       *
    330  *    for a string/character combination.  The table index is created                          *
    331  *    by using an exclusive OR combination of the prefix and character.                        *
    332  *    This code also has to check for collisions, and handles them by                          *
    333  *    jumping around in the table.                                                             *
    334  *                                                                                             *
    335  * INPUT:   parent_code -- The code of the parent string sequence.                             *
    336  *                                                                                             *
    337  *          character   -- The current character.                                              *
    338  *                                                                                             *
    339  * OUTPUT:  Returns with the index for the matching dictionary entry. If no matching index     *
    340  *          could be found, then it returns with the index to an unused dictionary entry. If   *
    341  *          there are also no unused entries in the dictionary, then -1 is returned.           *
    342  *                                                                                             *
    343  * WARNINGS:   none                                                                            *
    344  *                                                                                             *
    345  * HISTORY:                                                                                    *
    346  *   08/28/1996 JLB : Created.                                                                 *
    347  *=============================================================================================*/
    348 static int Find_Child_Node(CodeType parent_code, char child_character)
    349 {
    350 	/*
    351 	**	Fetch the first try index for the code and character.
    352 	*/
    353 	int hash_index = Make_LZW_Hash(parent_code, child_character);
    354 
    355 	/*
    356 	**	Base the hash-miss-try-again-displacement value on the current
    357 	**	index. [Shouldn't the value be some large prime number???].
    358 	*/
    359 	int offset = 1;
    360 	if (hash_index != 0) {
    361 		offset = TABLE_SIZE - hash_index;
    362 	}
    363 
    364 	/*
    365 	**	Keep offsetting through the dictionary until an exact match is
    366 	**	found for the code and character specified.
    367 	*/
    368 	int initial = hash_index;
    369 	while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
    370 
    371 		/*
    372 		**	Stop searching if an unused index is found since this means that
    373 		**	a match doesn't exist in the table at all.
    374 		*/
    375 		if (dict[hash_index].Is_Unused()) break;
    376 
    377 		/*
    378 		**	Bump the hash index to another value such that sequential bumps
    379 		**	will not result in the same index value until all of the table
    380 		**	has been scanned.
    381 		*/
    382 		hash_index -= offset;
    383 		if (hash_index < 0) {
    384 			hash_index += TABLE_SIZE;
    385 		}
    386 
    387 		/*
    388 		**	If the entire table has been scanned and no match or unused
    389 		**	entry was found, then return a special value indicating this
    390 		**	condition.
    391 		*/
    392 		if (initial == hash_index) {
    393 			hash_index = -1;
    394 			break;
    395 		}
    396 	}
    397 	return(hash_index);
    398 }
    399 
    400 
    401 /*
    402  * This routine decodes a string from the dictionary, and stores it
    403  * in the decode_stack data structure.  It returns a count to the
    404  * calling program of how many characters were placed in the stack.
    405  */
    406 static int Decode_String(char * ptr, CodeType code)
    407 {
    408 	int count = 0;
    409 	while (code > 255) {
    410 		*ptr++ = dict[code].CharValue;
    411 		count++;
    412 		code = dict[code].ParentCode;
    413 	}
    414 	*ptr = (char)code;
    415 	count++;
    416 	return(count);
    417 }
    418 
    419 
    420 /*
    421  * The compressor is short and simple.  It reads in new symbols one
    422  * at a time from the input file.  It then  checks to see if the
    423  * combination of the current symbol and the current code are already
    424  * defined in the dictionary.  If they are not, they are added to the
    425  * dictionary, and we start over with a new one symbol code.  If they
    426  * are, the code for the combination of the code and character becomes
    427  * our new code.
    428  */
    429 
    430 int LZW_Compress(Buffer & inbuff, Buffer & outbuff)
    431 {
    432 	BufferStraw input(inbuff);
    433 	BufferPipe output(outbuff);
    434 
    435 	for (int i = 0; i < TABLE_SIZE; i++) {
    436 		dict[i].Make_Unused();
    437 //		dict[i].code_value = UNUSED;
    438 	}
    439 
    440 	int outcount = 0;
    441 	CodeType string_code = END_OF_STREAM;
    442 	CodeType next_code = FIRST_CODE;
    443 	for (;;) {
    444 		char character;
    445 
    446 		if (input.Get(&character, sizeof(character)) == 0) break;
    447 
    448 		int index = Find_Child_Node(string_code, character);
    449 
    450 		if (index == -1) {
    451 			string_code = character;
    452 			outcount += output.Put(&string_code, sizeof(string_code));
    453 		} else {
    454 
    455 			if (dict[index].CodeValue != -1) {
    456 				string_code = dict[ index ].CodeValue;
    457 			} else {
    458 				if (next_code <= MAX_CODE) {
    459 					dict[index] = CodeClass(next_code++, string_code, character);
    460 				}
    461 				outcount += output.Put(&string_code, sizeof(string_code));
    462 				string_code = character;
    463 			}
    464 		}
    465 	}
    466 
    467 	outcount += output.Put(&string_code, sizeof(string_code));
    468 	string_code = END_OF_STREAM;
    469 	outcount += output.Put(&string_code, sizeof(string_code));
    470 
    471 	return(outcount);
    472 }
    473 
    474 
    475 /*
    476  * The file expander operates much like the encoder.  It has to
    477  * read in codes, the convert the codes to a string of characters.
    478  * The only catch in the whole operation occurs when the encoder
    479  * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence.  When this
    480  * occurs, the encoder outputs a code that is not presently defined
    481  * in the table.  This is handled as an exception.
    482  */
    483 int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff)
    484 {
    485 	int outcount = 0;
    486 	BufferStraw input(inbuff);
    487 	BufferPipe output(outbuff);
    488 
    489 	CodeType old_code;
    490 	if (input.Get(&old_code, sizeof(old_code)) == 0) {
    491 		return(outcount);
    492 	}
    493 
    494 	char character = (char)old_code;
    495 	outcount += output.Put(&character, sizeof(character));
    496 
    497 	unsigned int count;
    498 	CodeType new_code;
    499 	CodeType next_code = FIRST_CODE;
    500 	for (;;) {
    501 		if (input.Get(&new_code, sizeof(new_code)) == 0) break;
    502 
    503 		/*
    504 		** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
    505 		** case which generates an undefined code.  It handles it by decoding
    506 		** the last code, and adding a single character to the end of the decode string.
    507 		*/
    508 		if (new_code >= next_code) {
    509 			decode_stack[0] = character;
    510 			count = 1;
    511 			count += Decode_String(&decode_stack[1], old_code);
    512 		} else {
    513 			count = Decode_String(decode_stack, new_code);
    514 		}
    515 
    516 		character = decode_stack[count-1];
    517 		while (count > 0) {
    518 			--count;
    519 			outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0]));
    520 		}
    521 
    522 		/*
    523 		**	Add the new code sequence to the dictionary (presuming there is still
    524 		**	room).
    525 		*/
    526 		if (next_code <= MAX_CODE) {
    527 			dict[next_code] = CodeClass(next_code, old_code, character);
    528 			next_code++;
    529 		}
    530 		old_code = new_code;
    531 	}
    532 
    533 	return(outcount);
    534 }
    535 
    536 #endif
	CnC_Remastered_Collection Command and Conquer: Red Alert
	Log \| Files \| Refs \| README \| LICENSE