LZW.CPP (16866B)
1 // 2 // Copyright 2020 Electronic Arts Inc. 3 // 4 // TiberianDawn.DLL and RedAlert.dll and corresponding source code is free 5 // software: you can redistribute it and/or modify it under the terms of 6 // the GNU General Public License as published by the Free Software Foundation, 7 // either version 3 of the License, or (at your option) any later version. 8 9 // TiberianDawn.DLL and RedAlert.dll and corresponding source code is distributed 10 // in the hope that it will be useful, but with permitted additional restrictions 11 // under Section 7 of the GPL. See the GNU General Public License in LICENSE.TXT 12 // distributed with this program. You should have received a copy of the 13 // GNU General Public License along with permitted additional restrictions 14 // with this program. If not, see https://github.com/electronicarts/CnC_Remastered_Collection 15 16 /* $Header: /CounterStrike/LZW.CPP 1 3/03/97 10:25a Joe_bostic $ */ 17 /*********************************************************************************************** 18 *** C O N F I D E N T I A L --- W E S T W O O D S T U D I O S *** 19 *********************************************************************************************** 20 * * 21 * Project Name : Command & Conquer * 22 * * 23 * File Name : LZW.CPP * 24 * * 25 * Programmer : Joe L. Bostic * 26 * * 27 * Start Date : 08/28/96 * 28 * * 29 * Last Update : August 28, 1996 [JLB] * 30 * * 31 *---------------------------------------------------------------------------------------------* 32 * Functions: * 33 * Find_Child_Node -- Find a matching dictionary entry. * 34 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #include "xstraw.h" 41 #include "xpipe.h" 42 #include "buff.h" 43 #include "lzw.h" 44 45 46 LZWEngine::LZWEngine(void) 47 { 48 Reset(); 49 } 50 51 52 void LZWEngine::Reset(void) 53 { 54 for (int i = 0; i < TABLE_SIZE; i++) { 55 dict[i].Make_Unused(); 56 } 57 } 58 59 int LZWEngine::Compress(Buffer const & input, Buffer const & output) 60 { 61 BufferStraw instraw(input); 62 BufferPipe outpipe(output); 63 64 int outcount = 0; 65 CodeType string_code = END_OF_STREAM; 66 CodeType next_code = FIRST_CODE; 67 68 string_code = 0; 69 if (instraw.Get(&string_code, sizeof(char)) == 0) { 70 string_code = END_OF_STREAM; 71 } 72 73 for (;;) { 74 75 /* 76 ** Fetch a character from the source data stream. If exhausted, 77 ** then break out of the process loop so that the final code 78 ** can be written out. 79 */ 80 unsigned char character; 81 if (instraw.Get(&character, sizeof(character)) == 0) break; 82 83 /* 84 ** See if there is a match for the current code and current 85 ** character. A match indicates that there is already a 86 ** dictionary entry that fully represents the character 87 ** sequence. 88 */ 89 int index = Find_Child_Node(string_code, character); 90 91 /* 92 ** If a code match was found, then set the current code 93 ** value to this code value that represents the concatenation 94 ** of the previous code value and the current character. 95 */ 96 if (index != -1 && dict[index].CodeValue != -1) { 97 string_code = dict[index].CodeValue; 98 } else { 99 100 /* 101 ** Since no exact match was found, then create a new code 102 ** entry that represents the current code and character 103 ** value concatenated. This presumes there is room in the 104 ** code table. 105 */ 106 if (index != -1 && next_code <= MAX_CODE) { 107 dict[index] = CodeClass(next_code, string_code, character); 108 next_code++; 109 } 110 111 /* 112 ** Output the code to the compression stream and reset the 113 ** current code value to match the current character. This 114 ** has the effect of clearing out the current character 115 ** sequence scan in preparation for building a new one. It 116 ** also ensures that the character will be written out. 117 */ 118 outcount += outpipe.Put(&string_code, sizeof(string_code)); 119 string_code = character; 120 } 121 } 122 123 outcount += outpipe.Put(&string_code, sizeof(string_code)); 124 if (string_code != END_OF_STREAM) { 125 string_code = END_OF_STREAM; 126 outcount += outpipe.Put(&string_code, sizeof(string_code)); 127 } 128 129 return(outcount); 130 } 131 132 133 int LZWEngine::Uncompress(Buffer const & input, Buffer const & output) 134 { 135 int outcount = 0; 136 BufferStraw instraw(input); 137 BufferPipe outpipe(output); 138 139 CodeType old_code; 140 if (instraw.Get(&old_code, sizeof(old_code)) == 0) { 141 return(outcount); 142 } 143 144 unsigned char character = (unsigned char)old_code; 145 outcount += outpipe.Put(&character, sizeof(character)); 146 147 unsigned int count; 148 CodeType new_code; 149 CodeType next_code = FIRST_CODE; 150 for (;;) { 151 if (instraw.Get(&new_code, sizeof(new_code)) == 0) break; 152 153 if (new_code == END_OF_STREAM) break; 154 155 /* 156 ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER 157 ** case which generates an undefined code. It handles it by decoding 158 ** the last code, and adding a single character to the end of the decode string. 159 */ 160 if (new_code >= next_code) { 161 decode_stack[0] = character; 162 count = 1; 163 count += Decode_String(&decode_stack[1], old_code); 164 } else { 165 count = Decode_String(decode_stack, new_code); 166 } 167 168 character = decode_stack[count-1]; 169 while (count > 0) { 170 --count; 171 outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0])); 172 } 173 174 /* 175 ** Add the new code sequence to the dictionary (presuming there is still 176 ** room). 177 */ 178 if (next_code <= MAX_CODE) { 179 dict[next_code] = CodeClass(next_code, old_code, character); 180 next_code++; 181 } 182 old_code = new_code; 183 } 184 185 return(outcount); 186 } 187 188 189 int LZWEngine::Make_LZW_Hash(CodeType code, char character) 190 { 191 return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code); 192 } 193 194 195 int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character) 196 { 197 /* 198 ** Fetch the first try index for the code and character. 199 */ 200 int hash_index = Make_LZW_Hash(parent_code, child_character); 201 202 /* 203 ** Base the hash-miss-try-again-displacement value on the current 204 ** index. [Shouldn't the value be some large prime number???]. 205 */ 206 int offset = 1; 207 if (hash_index != 0) { 208 offset = TABLE_SIZE - hash_index; 209 } 210 211 /* 212 ** Keep offsetting through the dictionary until an exact match is 213 ** found for the code and character specified. 214 */ 215 int initial = hash_index; 216 while (!dict[hash_index].Is_Matching(parent_code, child_character)) { 217 218 /* 219 ** Stop searching if an unused index is found since this means that 220 ** a match doesn't exist in the table at all. 221 */ 222 if (dict[hash_index].Is_Unused()) break; 223 224 /* 225 ** Bump the hash index to another value such that sequential bumps 226 ** will not result in the same index value until all of the table 227 ** has been scanned. 228 */ 229 hash_index -= offset; 230 if (hash_index < 0) { 231 hash_index += TABLE_SIZE; 232 } 233 234 /* 235 ** If the entire table has been scanned and no match or unused 236 ** entry was found, then return a special value indicating this 237 ** condition. 238 */ 239 if (initial == hash_index) { 240 hash_index = -1; 241 break; 242 } 243 } 244 return(hash_index); 245 } 246 247 248 int LZWEngine::Decode_String(char * ptr, CodeType code) 249 { 250 int count = 0; 251 while (code > 255) { 252 *ptr++ = dict[code].CharValue; 253 count++; 254 code = dict[code].ParentCode; 255 } 256 *ptr = (char)code; 257 count++; 258 return(count); 259 } 260 261 262 int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff) 263 { 264 LZWEngine lzw; 265 return(lzw.Uncompress(inbuff, outbuff)); 266 } 267 268 269 int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff) 270 { 271 LZWEngine lzw; 272 return(lzw.Compress(inbuff, outbuff)); 273 } 274 275 276 277 278 279 #ifdef NEVER 280 281 282 /* 283 * Constants used throughout the program. BITS defines how many bits 284 * will be in a code. TABLE_SIZE defines the size of the dictionary 285 * table. 286 */ 287 #define BITS 12 288 #define MAX_CODE ( ( 1 << BITS ) - 1 ) 289 #define TABLE_SIZE 5021 290 #define END_OF_STREAM 256 291 #define FIRST_CODE 257 292 #define UNUSED -1 293 294 typedef unsigned short CodeType; 295 296 /* 297 * This data structure defines the dictionary. Each entry in the dictionary 298 * has a code value. This is the code emitted by the compressor. Each 299 * code is actually made up of two pieces: a parent_code, and a 300 * character. Code values of less than 256 are actually plain 301 * text codes. 302 */ 303 struct CodeClass 304 { 305 CodeType CodeValue; 306 CodeType ParentCode; 307 char CharValue; 308 309 CodeClass(void) {} 310 CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {} 311 312 void Make_Unused(void) {CodeValue = UNUSED;} 313 bool Is_Unused(void) const {return(CodeValue == UNUSED);} 314 bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);} 315 }; 316 CodeClass dict[TABLE_SIZE]; 317 318 char decode_stack[TABLE_SIZE]; 319 320 inline int Make_LZW_Hash(CodeType code, char character) 321 { 322 return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code); 323 } 324 325 326 /*********************************************************************************************** 327 * Find_Child_Node -- Find a matching dictionary entry. * 328 * * 329 * This hashing routine is responsible for finding the table location * 330 * for a string/character combination. The table index is created * 331 * by using an exclusive OR combination of the prefix and character. * 332 * This code also has to check for collisions, and handles them by * 333 * jumping around in the table. * 334 * * 335 * INPUT: parent_code -- The code of the parent string sequence. * 336 * * 337 * character -- The current character. * 338 * * 339 * OUTPUT: Returns with the index for the matching dictionary entry. If no matching index * 340 * could be found, then it returns with the index to an unused dictionary entry. If * 341 * there are also no unused entries in the dictionary, then -1 is returned. * 342 * * 343 * WARNINGS: none * 344 * * 345 * HISTORY: * 346 * 08/28/1996 JLB : Created. * 347 *=============================================================================================*/ 348 static int Find_Child_Node(CodeType parent_code, char child_character) 349 { 350 /* 351 ** Fetch the first try index for the code and character. 352 */ 353 int hash_index = Make_LZW_Hash(parent_code, child_character); 354 355 /* 356 ** Base the hash-miss-try-again-displacement value on the current 357 ** index. [Shouldn't the value be some large prime number???]. 358 */ 359 int offset = 1; 360 if (hash_index != 0) { 361 offset = TABLE_SIZE - hash_index; 362 } 363 364 /* 365 ** Keep offsetting through the dictionary until an exact match is 366 ** found for the code and character specified. 367 */ 368 int initial = hash_index; 369 while (!dict[hash_index].Is_Matching(parent_code, child_character)) { 370 371 /* 372 ** Stop searching if an unused index is found since this means that 373 ** a match doesn't exist in the table at all. 374 */ 375 if (dict[hash_index].Is_Unused()) break; 376 377 /* 378 ** Bump the hash index to another value such that sequential bumps 379 ** will not result in the same index value until all of the table 380 ** has been scanned. 381 */ 382 hash_index -= offset; 383 if (hash_index < 0) { 384 hash_index += TABLE_SIZE; 385 } 386 387 /* 388 ** If the entire table has been scanned and no match or unused 389 ** entry was found, then return a special value indicating this 390 ** condition. 391 */ 392 if (initial == hash_index) { 393 hash_index = -1; 394 break; 395 } 396 } 397 return(hash_index); 398 } 399 400 401 /* 402 * This routine decodes a string from the dictionary, and stores it 403 * in the decode_stack data structure. It returns a count to the 404 * calling program of how many characters were placed in the stack. 405 */ 406 static int Decode_String(char * ptr, CodeType code) 407 { 408 int count = 0; 409 while (code > 255) { 410 *ptr++ = dict[code].CharValue; 411 count++; 412 code = dict[code].ParentCode; 413 } 414 *ptr = (char)code; 415 count++; 416 return(count); 417 } 418 419 420 /* 421 * The compressor is short and simple. It reads in new symbols one 422 * at a time from the input file. It then checks to see if the 423 * combination of the current symbol and the current code are already 424 * defined in the dictionary. If they are not, they are added to the 425 * dictionary, and we start over with a new one symbol code. If they 426 * are, the code for the combination of the code and character becomes 427 * our new code. 428 */ 429 430 int LZW_Compress(Buffer & inbuff, Buffer & outbuff) 431 { 432 BufferStraw input(inbuff); 433 BufferPipe output(outbuff); 434 435 for (int i = 0; i < TABLE_SIZE; i++) { 436 dict[i].Make_Unused(); 437 // dict[i].code_value = UNUSED; 438 } 439 440 int outcount = 0; 441 CodeType string_code = END_OF_STREAM; 442 CodeType next_code = FIRST_CODE; 443 for (;;) { 444 char character; 445 446 if (input.Get(&character, sizeof(character)) == 0) break; 447 448 int index = Find_Child_Node(string_code, character); 449 450 if (index == -1) { 451 string_code = character; 452 outcount += output.Put(&string_code, sizeof(string_code)); 453 } else { 454 455 if (dict[index].CodeValue != -1) { 456 string_code = dict[ index ].CodeValue; 457 } else { 458 if (next_code <= MAX_CODE) { 459 dict[index] = CodeClass(next_code++, string_code, character); 460 } 461 outcount += output.Put(&string_code, sizeof(string_code)); 462 string_code = character; 463 } 464 } 465 } 466 467 outcount += output.Put(&string_code, sizeof(string_code)); 468 string_code = END_OF_STREAM; 469 outcount += output.Put(&string_code, sizeof(string_code)); 470 471 return(outcount); 472 } 473 474 475 /* 476 * The file expander operates much like the encoder. It has to 477 * read in codes, the convert the codes to a string of characters. 478 * The only catch in the whole operation occurs when the encoder 479 * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this 480 * occurs, the encoder outputs a code that is not presently defined 481 * in the table. This is handled as an exception. 482 */ 483 int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff) 484 { 485 int outcount = 0; 486 BufferStraw input(inbuff); 487 BufferPipe output(outbuff); 488 489 CodeType old_code; 490 if (input.Get(&old_code, sizeof(old_code)) == 0) { 491 return(outcount); 492 } 493 494 char character = (char)old_code; 495 outcount += output.Put(&character, sizeof(character)); 496 497 unsigned int count; 498 CodeType new_code; 499 CodeType next_code = FIRST_CODE; 500 for (;;) { 501 if (input.Get(&new_code, sizeof(new_code)) == 0) break; 502 503 /* 504 ** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER 505 ** case which generates an undefined code. It handles it by decoding 506 ** the last code, and adding a single character to the end of the decode string. 507 */ 508 if (new_code >= next_code) { 509 decode_stack[0] = character; 510 count = 1; 511 count += Decode_String(&decode_stack[1], old_code); 512 } else { 513 count = Decode_String(decode_stack, new_code); 514 } 515 516 character = decode_stack[count-1]; 517 while (count > 0) { 518 --count; 519 outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0])); 520 } 521 522 /* 523 ** Add the new code sequence to the dictionary (presuming there is still 524 ** room). 525 */ 526 if (next_code <= MAX_CODE) { 527 dict[next_code] = CodeClass(next_code, old_code, character); 528 next_code++; 529 } 530 old_code = new_code; 531 } 532 533 return(outcount); 534 } 535 536 #endif