libmio0.c (15234B)
1 #include <stdio.h> 2 #include <string.h> 3 #include <stdlib.h> 4 #if defined(_WIN32) || defined(_WIN64) 5 #include <io.h> 6 #include <fcntl.h> 7 #endif 8 9 #include "libmio0.h" 10 #include "utils.h" 11 12 // defines 13 14 #define MIO0_VERSION "0.1" 15 16 #define GET_BIT(buf, bit) ((buf)[(bit) / 8] & (1 << (7 - ((bit) % 8)))) 17 18 // types 19 typedef struct 20 { 21 int *indexes; 22 int allocated; 23 int count; 24 int start; 25 } lookback; 26 27 // functions 28 #define LOOKBACK_COUNT 256 29 #define LOOKBACK_INIT_SIZE 128 30 static lookback *lookback_init(void) 31 { 32 lookback *lb = malloc(LOOKBACK_COUNT * sizeof(*lb)); 33 for (int i = 0; i < LOOKBACK_COUNT; i++) { 34 lb[i].allocated = LOOKBACK_INIT_SIZE; 35 lb[i].indexes = malloc(lb[i].allocated * sizeof(*lb[i].indexes)); 36 lb[i].count = 0; 37 lb[i].start = 0; 38 } 39 return lb; 40 } 41 42 static void lookback_free(lookback *lb) 43 { 44 for (int i = 0; i < LOOKBACK_COUNT; i++) { 45 free(lb[i].indexes); 46 } 47 free(lb); 48 } 49 50 static inline void lookback_push(lookback *lkbk, unsigned char val, int index) 51 { 52 lookback *lb = &lkbk[val]; 53 if (lb->count == lb->allocated) { 54 lb->allocated *= 4; 55 lb->indexes = realloc(lb->indexes, lb->allocated * sizeof(*lb->indexes)); 56 } 57 lb->indexes[lb->count++] = index; 58 } 59 60 static void PUT_BIT(unsigned char *buf, int bit, int val) 61 { 62 unsigned char mask = 1 << (7 - (bit % 8)); 63 unsigned int offset = bit / 8; 64 buf[offset] = (buf[offset] & ~(mask)) | (val ? mask : 0); 65 } 66 67 // used to find longest matching stream in buffer 68 // buf: buffer 69 // start_offset: offset in buf to look back from 70 // max_search: max number of bytes to find 71 // found_offset: returned offset found (0 if none found) 72 // returns max length of matching stream (0 if none found) 73 static int find_longest(const unsigned char *buf, int start_offset, int max_search, int *found_offset, lookback *lkbk) 74 { 75 int best_length = 0; 76 int best_offset = 0; 77 int cur_length; 78 int search_len; 79 int farthest, off, i; 80 int lb_idx; 81 const unsigned char first = buf[start_offset]; 82 lookback *lb = &lkbk[first]; 83 84 // buf 85 // | off start max 86 // V |+i-> |+i-> | 87 // |--------------raw-data-----------------| 88 // |+i-> | |+i-> 89 // +cur_length 90 91 // check at most the past 4096 values 92 farthest = MAX(start_offset - 4096, 0); 93 // find starting index 94 for (lb_idx = lb->start; lb_idx < lb->count && lb->indexes[lb_idx] < farthest; lb_idx++) {} 95 lb->start = lb_idx; 96 for ( ; lb_idx < lb->count && lb->indexes[lb_idx] < start_offset; lb_idx++) { 97 off = lb->indexes[lb_idx]; 98 // check at most requested max or up until start 99 search_len = MIN(max_search, start_offset - off); 100 for (i = 0; i < search_len; i++) { 101 if (buf[start_offset + i] != buf[off + i]) { 102 break; 103 } 104 } 105 cur_length = i; 106 // if matched up until start, continue matching in already matched parts 107 if (cur_length == search_len) { 108 // check at most requested max less current length 109 search_len = max_search - cur_length; 110 for (i = 0; i < search_len; i++) { 111 if (buf[start_offset + cur_length + i] != buf[off + i]) { 112 break; 113 } 114 } 115 cur_length += i; 116 } 117 if (cur_length > best_length) { 118 best_offset = start_offset - off; 119 best_length = cur_length; 120 } 121 } 122 123 // return best reverse offset and length (may be 0) 124 *found_offset = best_offset; 125 return best_length; 126 } 127 128 // decode MIO0 header 129 // returns 1 if valid header, 0 otherwise 130 int mio0_decode_header(const unsigned char *buf, mio0_header_t *head) 131 { 132 if (!memcmp(buf, "MIO0", 4)) { 133 head->dest_size = read_u32_be(&buf[4]); 134 head->comp_offset = read_u32_be(&buf[8]); 135 head->uncomp_offset = read_u32_be(&buf[12]); 136 return 1; 137 } 138 return 0; 139 } 140 141 void mio0_encode_header(unsigned char *buf, const mio0_header_t *head) 142 { 143 memcpy(buf, "MIO0", 4); 144 write_u32_be(&buf[4], head->dest_size); 145 write_u32_be(&buf[8], head->comp_offset); 146 write_u32_be(&buf[12], head->uncomp_offset); 147 } 148 149 int mio0_decode(const unsigned char *in, unsigned char *out, unsigned int *end) 150 { 151 mio0_header_t head; 152 unsigned int bytes_written = 0; 153 int bit_idx = 0; 154 int comp_idx = 0; 155 int uncomp_idx = 0; 156 int valid; 157 158 // extract header 159 valid = mio0_decode_header(in, &head); 160 // verify MIO0 header 161 if (!valid) { 162 return -2; 163 } 164 165 // decode data 166 while (bytes_written < head.dest_size) { 167 if (GET_BIT(&in[MIO0_HEADER_LENGTH], bit_idx)) { 168 // 1 - pull uncompressed data 169 out[bytes_written] = in[head.uncomp_offset + uncomp_idx]; 170 bytes_written++; 171 uncomp_idx++; 172 } else { 173 // 0 - read compressed data 174 int idx; 175 int length; 176 int i; 177 const unsigned char *vals = &in[head.comp_offset + comp_idx]; 178 comp_idx += 2; 179 length = ((vals[0] & 0xF0) >> 4) + 3; 180 idx = ((vals[0] & 0x0F) << 8) + vals[1] + 1; 181 for (i = 0; i < length; i++) { 182 out[bytes_written] = out[bytes_written - idx]; 183 bytes_written++; 184 } 185 } 186 bit_idx++; 187 } 188 189 if (end) { 190 *end = head.uncomp_offset + uncomp_idx; 191 } 192 193 return bytes_written; 194 } 195 196 int mio0_encode(const unsigned char *in, unsigned int length, unsigned char *out) 197 { 198 unsigned char *bit_buf; 199 unsigned char *comp_buf; 200 unsigned char *uncomp_buf; 201 unsigned int bit_length; 202 unsigned int comp_offset; 203 unsigned int uncomp_offset; 204 unsigned int bytes_proc = 0; 205 int bytes_written; 206 int bit_idx = 0; 207 int comp_idx = 0; 208 int uncomp_idx = 0; 209 lookback *lookbacks; 210 211 // initialize lookback buffer 212 lookbacks = lookback_init(); 213 214 // allocate some temporary buffers worst case size 215 bit_buf = malloc((length + 7) / 8); // 1-bit/byte 216 comp_buf = malloc(length); // 16-bits/2bytes 217 uncomp_buf = malloc(length); // all uncompressed 218 memset(bit_buf, 0, (length + 7) / 8); 219 220 // encode data 221 // special case for first byte 222 lookback_push(lookbacks, in[0], 0); 223 uncomp_buf[uncomp_idx] = in[0]; 224 uncomp_idx += 1; 225 bytes_proc += 1; 226 PUT_BIT(bit_buf, bit_idx++, 1); 227 while (bytes_proc < length) { 228 int offset; 229 int max_length = MIN(length - bytes_proc, 18); 230 int longest_match = find_longest(in, bytes_proc, max_length, &offset, lookbacks); 231 // push current byte before checking next longer match 232 lookback_push(lookbacks, in[bytes_proc], bytes_proc); 233 if (longest_match > 2) { 234 int lookahead_offset; 235 // lookahead to next byte to see if longer match 236 int lookahead_length = MIN(length - bytes_proc - 1, 18); 237 int lookahead_match = find_longest(in, bytes_proc + 1, lookahead_length, &lookahead_offset, lookbacks); 238 // better match found, use uncompressed + lookahead compressed 239 if ((longest_match + 1) < lookahead_match) { 240 // uncompressed byte 241 uncomp_buf[uncomp_idx] = in[bytes_proc]; 242 uncomp_idx++; 243 PUT_BIT(bit_buf, bit_idx, 1); 244 bytes_proc++; 245 longest_match = lookahead_match; 246 offset = lookahead_offset; 247 bit_idx++; 248 lookback_push(lookbacks, in[bytes_proc], bytes_proc); 249 } 250 // first byte already pushed above 251 for (int i = 1; i < longest_match; i++) { 252 lookback_push(lookbacks, in[bytes_proc + i], bytes_proc + i); 253 } 254 // compressed block 255 comp_buf[comp_idx] = (((longest_match - 3) & 0x0F) << 4) | 256 (((offset - 1) >> 8) & 0x0F); 257 comp_buf[comp_idx + 1] = (offset - 1) & 0xFF; 258 comp_idx += 2; 259 PUT_BIT(bit_buf, bit_idx, 0); 260 bytes_proc += longest_match; 261 } else { 262 // uncompressed byte 263 uncomp_buf[uncomp_idx] = in[bytes_proc]; 264 uncomp_idx++; 265 PUT_BIT(bit_buf, bit_idx, 1); 266 bytes_proc++; 267 } 268 bit_idx++; 269 } 270 271 // compute final sizes and offsets 272 // +7 so int division accounts for all bits 273 bit_length = ((bit_idx + 7) / 8); 274 // compressed data after control bits and aligned to 4-byte boundary 275 comp_offset = ALIGN(MIO0_HEADER_LENGTH + bit_length, 4); 276 uncomp_offset = comp_offset + comp_idx; 277 bytes_written = uncomp_offset + uncomp_idx; 278 279 // output header 280 memcpy(out, "MIO0", 4); 281 write_u32_be(&out[4], length); 282 write_u32_be(&out[8], comp_offset); 283 write_u32_be(&out[12], uncomp_offset); 284 // output data 285 memcpy(&out[MIO0_HEADER_LENGTH], bit_buf, bit_length); 286 memcpy(&out[comp_offset], comp_buf, comp_idx); 287 memcpy(&out[uncomp_offset], uncomp_buf, uncomp_idx); 288 289 // free allocated buffers 290 free(bit_buf); 291 free(comp_buf); 292 free(uncomp_buf); 293 lookback_free(lookbacks); 294 295 return bytes_written; 296 } 297 298 static FILE *mio0_open_out_file(const char *out_file) { 299 if (strcmp(out_file, "-") == 0) { 300 #if defined(_WIN32) || defined(_WIN64) 301 _setmode(_fileno(stdout), _O_BINARY); 302 #endif 303 return stdout; 304 } else { 305 return fopen(out_file, "wb"); 306 } 307 } 308 309 int mio0_decode_file(const char *in_file, unsigned long offset, const char *out_file) 310 { 311 mio0_header_t head; 312 FILE *in; 313 FILE *out; 314 unsigned char *in_buf = NULL; 315 unsigned char *out_buf = NULL; 316 long file_size; 317 int ret_val = 0; 318 size_t bytes_read; 319 int bytes_decoded; 320 int bytes_written; 321 int valid; 322 323 in = fopen(in_file, "rb"); 324 if (in == NULL) { 325 return 1; 326 } 327 328 // allocate buffer to read from offset to end of file 329 fseek(in, 0, SEEK_END); 330 file_size = ftell(in); 331 in_buf = malloc(file_size - offset); 332 fseek(in, offset, SEEK_SET); 333 334 // read bytes 335 bytes_read = fread(in_buf, 1, file_size - offset, in); 336 if (bytes_read != file_size - offset) { 337 ret_val = 2; 338 goto free_all; 339 } 340 341 // verify header 342 valid = mio0_decode_header(in_buf, &head); 343 if (!valid) { 344 ret_val = 3; 345 goto free_all; 346 } 347 out_buf = malloc(head.dest_size); 348 349 // decompress MIO0 encoded data 350 bytes_decoded = mio0_decode(in_buf, out_buf, NULL); 351 if (bytes_decoded < 0) { 352 ret_val = 3; 353 goto free_all; 354 } 355 356 // open output file 357 out = mio0_open_out_file(out_file); 358 if (out == NULL) { 359 ret_val = 4; 360 goto free_all; 361 } 362 363 // write data to file 364 bytes_written = fwrite(out_buf, 1, bytes_decoded, out); 365 if (bytes_written != bytes_decoded) { 366 ret_val = 5; 367 } 368 369 // clean up 370 if (out != stdout) { 371 fclose(out); 372 } 373 free_all: 374 if (out_buf) { 375 free(out_buf); 376 } 377 if (in_buf) { 378 free(in_buf); 379 } 380 fclose(in); 381 382 return ret_val; 383 } 384 385 int mio0_encode_file(const char *in_file, const char *out_file) 386 { 387 FILE *in; 388 FILE *out; 389 unsigned char *in_buf = NULL; 390 unsigned char *out_buf = NULL; 391 size_t file_size; 392 size_t bytes_read; 393 int bytes_encoded; 394 int bytes_written; 395 int ret_val = 0; 396 397 in = fopen(in_file, "rb"); 398 if (in == NULL) { 399 return 1; 400 } 401 402 // allocate buffer to read entire contents of files 403 fseek(in, 0, SEEK_END); 404 file_size = ftell(in); 405 fseek(in, 0, SEEK_SET); 406 in_buf = malloc(file_size); 407 408 // read bytes 409 bytes_read = fread(in_buf, 1, file_size, in); 410 if (bytes_read != file_size) { 411 ret_val = 2; 412 goto free_all; 413 } 414 415 // allocate worst case length 416 out_buf = malloc(MIO0_HEADER_LENGTH + ((file_size+7)/8) + file_size); 417 418 // compress data in MIO0 format 419 bytes_encoded = mio0_encode(in_buf, file_size, out_buf); 420 421 // open output file 422 out = mio0_open_out_file(out_file); 423 if (out == NULL) { 424 ret_val = 4; 425 goto free_all; 426 } 427 428 // write data to file 429 bytes_written = fwrite(out_buf, 1, bytes_encoded, out); 430 if (bytes_written != bytes_encoded) { 431 ret_val = 5; 432 } 433 434 // clean up 435 if (out != stdout) { 436 fclose(out); 437 } 438 free_all: 439 if (out_buf) { 440 free(out_buf); 441 } 442 if (in_buf) { 443 free(in_buf); 444 } 445 fclose(in); 446 447 return ret_val; 448 } 449 450 // mio0 standalone executable 451 #ifdef MIO0_STANDALONE 452 typedef struct 453 { 454 char *in_filename; 455 char *out_filename; 456 unsigned int offset; 457 int compress; 458 } arg_config; 459 460 static arg_config default_config = 461 { 462 NULL, 463 NULL, 464 0, 465 1 466 }; 467 468 static void print_usage(void) 469 { 470 ERROR("Usage: mio0 [-c / -d] [-o OFFSET] FILE [OUTPUT]\n" 471 "\n" 472 "mio0 v" MIO0_VERSION ": MIO0 compression and decompression tool\n" 473 "\n" 474 "Optional arguments:\n" 475 " -c compress raw data into MIO0 (default: compress)\n" 476 " -d decompress MIO0 into raw data\n" 477 " -o OFFSET starting offset in FILE (default: 0)\n" 478 "\n" 479 "File arguments:\n" 480 " FILE input file\n" 481 " [OUTPUT] output file (default: FILE.out), \"-\" for stdout\n"); 482 exit(1); 483 } 484 485 // parse command line arguments 486 static void parse_arguments(int argc, char *argv[], arg_config *config) 487 { 488 int i; 489 int file_count = 0; 490 if (argc < 2) { 491 print_usage(); 492 exit(1); 493 } 494 for (i = 1; i < argc; i++) { 495 if (argv[i][0] == '-' && argv[i][1] != '\0') { 496 switch (argv[i][1]) { 497 case 'c': 498 config->compress = 1; 499 break; 500 case 'd': 501 config->compress = 0; 502 break; 503 case 'o': 504 if (++i >= argc) { 505 print_usage(); 506 } 507 config->offset = strtoul(argv[i], NULL, 0); 508 break; 509 default: 510 print_usage(); 511 break; 512 } 513 } else { 514 switch (file_count) { 515 case 0: 516 config->in_filename = argv[i]; 517 break; 518 case 1: 519 config->out_filename = argv[i]; 520 break; 521 default: // too many 522 print_usage(); 523 break; 524 } 525 file_count++; 526 } 527 } 528 if (file_count < 1) { 529 print_usage(); 530 } 531 } 532 533 int main(int argc, char *argv[]) 534 { 535 char out_filename[FILENAME_MAX]; 536 arg_config config; 537 int ret_val; 538 539 // get configuration from arguments 540 config = default_config; 541 parse_arguments(argc, argv, &config); 542 if (config.out_filename == NULL) { 543 config.out_filename = out_filename; 544 sprintf(config.out_filename, "%s.out", config.in_filename); 545 } 546 547 // operation 548 if (config.compress) { 549 ret_val = mio0_encode_file(config.in_filename, config.out_filename); 550 } else { 551 ret_val = mio0_decode_file(config.in_filename, config.offset, config.out_filename); 552 } 553 554 switch (ret_val) { 555 case 1: 556 ERROR("Error opening input file \"%s\"\n", config.in_filename); 557 break; 558 case 2: 559 ERROR("Error reading from input file \"%s\"\n", config.in_filename); 560 break; 561 case 3: 562 ERROR("Error decoding MIO0 data. Wrong offset (0x%X)?\n", config.offset); 563 break; 564 case 4: 565 ERROR("Error opening output file \"%s\"\n", config.out_filename); 566 break; 567 case 5: 568 ERROR("Error writing bytes to output file \"%s\"\n", config.out_filename); 569 break; 570 } 571 572 return ret_val; 573 } 574 #endif // MIO0_STANDALONE 575