sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

libmio0.c (15234B)


      1 #include <stdio.h>
      2 #include <string.h>
      3 #include <stdlib.h>
      4 #if defined(_WIN32) || defined(_WIN64)
      5 #include <io.h>
      6 #include <fcntl.h>
      7 #endif
      8 
      9 #include "libmio0.h"
     10 #include "utils.h"
     11 
     12 // defines
     13 
     14 #define MIO0_VERSION "0.1"
     15 
     16 #define GET_BIT(buf, bit) ((buf)[(bit) / 8] & (1 << (7 - ((bit) % 8))))
     17 
     18 // types
     19 typedef struct
     20 {
     21    int *indexes;
     22    int allocated;
     23    int count;
     24    int start;
     25 } lookback;
     26 
     27 // functions
     28 #define LOOKBACK_COUNT 256
     29 #define LOOKBACK_INIT_SIZE 128
     30 static lookback *lookback_init(void)
     31 {
     32    lookback *lb = malloc(LOOKBACK_COUNT * sizeof(*lb));
     33    for (int i = 0; i < LOOKBACK_COUNT; i++) {
     34       lb[i].allocated = LOOKBACK_INIT_SIZE;
     35       lb[i].indexes = malloc(lb[i].allocated * sizeof(*lb[i].indexes));
     36       lb[i].count = 0;
     37       lb[i].start = 0;
     38    }
     39    return lb;
     40 }
     41 
     42 static void lookback_free(lookback *lb)
     43 {
     44    for (int i = 0; i < LOOKBACK_COUNT; i++) {
     45       free(lb[i].indexes);
     46    }
     47    free(lb);
     48 }
     49 
     50 static inline void lookback_push(lookback *lkbk, unsigned char val, int index)
     51 {
     52    lookback *lb = &lkbk[val];
     53    if (lb->count == lb->allocated) {
     54       lb->allocated *= 4;
     55       lb->indexes = realloc(lb->indexes, lb->allocated * sizeof(*lb->indexes));
     56    }
     57    lb->indexes[lb->count++] = index;
     58 }
     59 
     60 static void PUT_BIT(unsigned char *buf, int bit, int val)
     61 {
     62    unsigned char mask = 1 << (7 - (bit % 8));
     63    unsigned int offset = bit / 8;
     64    buf[offset] = (buf[offset] & ~(mask)) | (val ? mask : 0);
     65 }
     66 
     67 // used to find longest matching stream in buffer
     68 // buf: buffer
     69 // start_offset: offset in buf to look back from
     70 // max_search: max number of bytes to find
     71 // found_offset: returned offset found (0 if none found)
     72 // returns max length of matching stream (0 if none found)
     73 static int find_longest(const unsigned char *buf, int start_offset, int max_search, int *found_offset, lookback *lkbk)
     74 {
     75    int best_length = 0;
     76    int best_offset = 0;
     77    int cur_length;
     78    int search_len;
     79    int farthest, off, i;
     80    int lb_idx;
     81    const unsigned char first = buf[start_offset];
     82    lookback *lb = &lkbk[first];
     83 
     84    // buf
     85    //  |    off        start                  max
     86    //  V     |+i->       |+i->                 |
     87    //  |--------------raw-data-----------------|
     88    //        |+i->       |      |+i->
     89    //                       +cur_length
     90 
     91    // check at most the past 4096 values
     92    farthest = MAX(start_offset - 4096, 0);
     93    // find starting index
     94    for (lb_idx = lb->start; lb_idx < lb->count && lb->indexes[lb_idx] < farthest; lb_idx++) {}
     95    lb->start = lb_idx;
     96    for ( ; lb_idx < lb->count && lb->indexes[lb_idx] < start_offset; lb_idx++) {
     97       off = lb->indexes[lb_idx];
     98       // check at most requested max or up until start
     99       search_len = MIN(max_search, start_offset - off);
    100       for (i = 0; i < search_len; i++) {
    101          if (buf[start_offset + i] != buf[off + i]) {
    102             break;
    103          }
    104       }
    105       cur_length = i;
    106       // if matched up until start, continue matching in already matched parts
    107       if (cur_length == search_len) {
    108          // check at most requested max less current length
    109          search_len = max_search - cur_length;
    110          for (i = 0; i < search_len; i++) {
    111             if (buf[start_offset + cur_length + i] != buf[off + i]) {
    112                break;
    113             }
    114          }
    115          cur_length += i;
    116       }
    117       if (cur_length > best_length) {
    118          best_offset = start_offset - off;
    119          best_length = cur_length;
    120       }
    121    }
    122 
    123    // return best reverse offset and length (may be 0)
    124    *found_offset = best_offset;
    125    return best_length;
    126 }
    127 
    128 // decode MIO0 header
    129 // returns 1 if valid header, 0 otherwise
    130 int mio0_decode_header(const unsigned char *buf, mio0_header_t *head)
    131 {
    132    if (!memcmp(buf, "MIO0", 4)) {
    133       head->dest_size = read_u32_be(&buf[4]);
    134       head->comp_offset = read_u32_be(&buf[8]);
    135       head->uncomp_offset = read_u32_be(&buf[12]);
    136       return 1;
    137    }
    138    return 0;
    139 }
    140 
    141 void mio0_encode_header(unsigned char *buf, const mio0_header_t *head)
    142 {
    143    memcpy(buf, "MIO0", 4);
    144    write_u32_be(&buf[4], head->dest_size);
    145    write_u32_be(&buf[8], head->comp_offset);
    146    write_u32_be(&buf[12], head->uncomp_offset);
    147 }
    148 
    149 int mio0_decode(const unsigned char *in, unsigned char *out, unsigned int *end)
    150 {
    151    mio0_header_t head;
    152    unsigned int bytes_written = 0;
    153    int bit_idx = 0;
    154    int comp_idx = 0;
    155    int uncomp_idx = 0;
    156    int valid;
    157 
    158    // extract header
    159    valid = mio0_decode_header(in, &head);
    160    // verify MIO0 header
    161    if (!valid) {
    162       return -2;
    163    }
    164 
    165    // decode data
    166    while (bytes_written < head.dest_size) {
    167       if (GET_BIT(&in[MIO0_HEADER_LENGTH], bit_idx)) {
    168          // 1 - pull uncompressed data
    169          out[bytes_written] = in[head.uncomp_offset + uncomp_idx];
    170          bytes_written++;
    171          uncomp_idx++;
    172       } else {
    173          // 0 - read compressed data
    174          int idx;
    175          int length;
    176          int i;
    177          const unsigned char *vals = &in[head.comp_offset + comp_idx];
    178          comp_idx += 2;
    179          length = ((vals[0] & 0xF0) >> 4) + 3;
    180          idx = ((vals[0] & 0x0F) << 8) + vals[1] + 1;
    181          for (i = 0; i < length; i++) {
    182             out[bytes_written] = out[bytes_written - idx];
    183             bytes_written++;
    184          }
    185       }
    186       bit_idx++;
    187    }
    188 
    189    if (end) {
    190       *end = head.uncomp_offset + uncomp_idx;
    191    }
    192 
    193    return bytes_written;
    194 }
    195 
    196 int mio0_encode(const unsigned char *in, unsigned int length, unsigned char *out)
    197 {
    198    unsigned char *bit_buf;
    199    unsigned char *comp_buf;
    200    unsigned char *uncomp_buf;
    201    unsigned int bit_length;
    202    unsigned int comp_offset;
    203    unsigned int uncomp_offset;
    204    unsigned int bytes_proc = 0;
    205    int bytes_written;
    206    int bit_idx = 0;
    207    int comp_idx = 0;
    208    int uncomp_idx = 0;
    209    lookback *lookbacks;
    210 
    211    // initialize lookback buffer
    212    lookbacks = lookback_init();
    213 
    214    // allocate some temporary buffers worst case size
    215    bit_buf = malloc((length + 7) / 8); // 1-bit/byte
    216    comp_buf = malloc(length); // 16-bits/2bytes
    217    uncomp_buf = malloc(length); // all uncompressed
    218    memset(bit_buf, 0, (length + 7) / 8);
    219 
    220    // encode data
    221    // special case for first byte
    222    lookback_push(lookbacks, in[0], 0);
    223    uncomp_buf[uncomp_idx] = in[0];
    224    uncomp_idx += 1;
    225    bytes_proc += 1;
    226    PUT_BIT(bit_buf, bit_idx++, 1);
    227    while (bytes_proc < length) {
    228       int offset;
    229       int max_length = MIN(length - bytes_proc, 18);
    230       int longest_match = find_longest(in, bytes_proc, max_length, &offset, lookbacks);
    231       // push current byte before checking next longer match
    232       lookback_push(lookbacks, in[bytes_proc], bytes_proc);
    233       if (longest_match > 2) {
    234          int lookahead_offset;
    235          // lookahead to next byte to see if longer match
    236          int lookahead_length = MIN(length - bytes_proc - 1, 18);
    237          int lookahead_match = find_longest(in, bytes_proc + 1, lookahead_length, &lookahead_offset, lookbacks);
    238          // better match found, use uncompressed + lookahead compressed
    239          if ((longest_match + 1) < lookahead_match) {
    240             // uncompressed byte
    241             uncomp_buf[uncomp_idx] = in[bytes_proc];
    242             uncomp_idx++;
    243             PUT_BIT(bit_buf, bit_idx, 1);
    244             bytes_proc++;
    245             longest_match = lookahead_match;
    246             offset = lookahead_offset;
    247             bit_idx++;
    248             lookback_push(lookbacks, in[bytes_proc], bytes_proc);
    249          }
    250          // first byte already pushed above
    251          for (int i = 1; i < longest_match; i++) {
    252             lookback_push(lookbacks, in[bytes_proc + i], bytes_proc + i);
    253          }
    254          // compressed block
    255          comp_buf[comp_idx] = (((longest_match - 3) & 0x0F) << 4) |
    256                               (((offset - 1) >> 8) & 0x0F);
    257          comp_buf[comp_idx + 1] = (offset - 1) & 0xFF;
    258          comp_idx += 2;
    259          PUT_BIT(bit_buf, bit_idx, 0);
    260          bytes_proc += longest_match;
    261       } else {
    262          // uncompressed byte
    263          uncomp_buf[uncomp_idx] = in[bytes_proc];
    264          uncomp_idx++;
    265          PUT_BIT(bit_buf, bit_idx, 1);
    266          bytes_proc++;
    267       }
    268       bit_idx++;
    269    }
    270 
    271    // compute final sizes and offsets
    272    // +7 so int division accounts for all bits
    273    bit_length = ((bit_idx + 7) / 8);
    274    // compressed data after control bits and aligned to 4-byte boundary
    275    comp_offset = ALIGN(MIO0_HEADER_LENGTH + bit_length, 4);
    276    uncomp_offset = comp_offset + comp_idx;
    277    bytes_written = uncomp_offset + uncomp_idx;
    278 
    279    // output header
    280    memcpy(out, "MIO0", 4);
    281    write_u32_be(&out[4], length);
    282    write_u32_be(&out[8], comp_offset);
    283    write_u32_be(&out[12], uncomp_offset);
    284    // output data
    285    memcpy(&out[MIO0_HEADER_LENGTH], bit_buf, bit_length);
    286    memcpy(&out[comp_offset], comp_buf, comp_idx);
    287    memcpy(&out[uncomp_offset], uncomp_buf, uncomp_idx);
    288 
    289    // free allocated buffers
    290    free(bit_buf);
    291    free(comp_buf);
    292    free(uncomp_buf);
    293    lookback_free(lookbacks);
    294 
    295    return bytes_written;
    296 }
    297 
    298 static FILE *mio0_open_out_file(const char *out_file) {
    299    if (strcmp(out_file, "-") == 0) {
    300 #if defined(_WIN32) || defined(_WIN64)
    301       _setmode(_fileno(stdout), _O_BINARY);
    302 #endif
    303       return stdout;
    304    } else {
    305       return fopen(out_file, "wb");
    306    }
    307 }
    308 
    309 int mio0_decode_file(const char *in_file, unsigned long offset, const char *out_file)
    310 {
    311    mio0_header_t head;
    312    FILE *in;
    313    FILE *out;
    314    unsigned char *in_buf = NULL;
    315    unsigned char *out_buf = NULL;
    316    long file_size;
    317    int ret_val = 0;
    318    size_t bytes_read;
    319    int bytes_decoded;
    320    int bytes_written;
    321    int valid;
    322 
    323    in = fopen(in_file, "rb");
    324    if (in == NULL) {
    325       return 1;
    326    }
    327 
    328    // allocate buffer to read from offset to end of file
    329    fseek(in, 0, SEEK_END);
    330    file_size = ftell(in);
    331    in_buf = malloc(file_size - offset);
    332    fseek(in, offset, SEEK_SET);
    333 
    334    // read bytes
    335    bytes_read = fread(in_buf, 1, file_size - offset, in);
    336    if (bytes_read != file_size - offset) {
    337       ret_val = 2;
    338       goto free_all;
    339    }
    340 
    341    // verify header
    342    valid = mio0_decode_header(in_buf, &head);
    343    if (!valid) {
    344       ret_val = 3;
    345       goto free_all;
    346    }
    347    out_buf = malloc(head.dest_size);
    348 
    349    // decompress MIO0 encoded data
    350    bytes_decoded = mio0_decode(in_buf, out_buf, NULL);
    351    if (bytes_decoded < 0) {
    352       ret_val = 3;
    353       goto free_all;
    354    }
    355 
    356    // open output file
    357    out = mio0_open_out_file(out_file);
    358    if (out == NULL) {
    359       ret_val = 4;
    360       goto free_all;
    361    }
    362 
    363    // write data to file
    364    bytes_written = fwrite(out_buf, 1, bytes_decoded, out);
    365    if (bytes_written != bytes_decoded) {
    366       ret_val = 5;
    367    }
    368 
    369    // clean up
    370    if (out != stdout) {
    371       fclose(out);
    372    }
    373 free_all:
    374    if (out_buf) {
    375       free(out_buf);
    376    }
    377    if (in_buf) {
    378       free(in_buf);
    379    }
    380    fclose(in);
    381 
    382    return ret_val;
    383 }
    384 
    385 int mio0_encode_file(const char *in_file, const char *out_file)
    386 {
    387    FILE *in;
    388    FILE *out;
    389    unsigned char *in_buf = NULL;
    390    unsigned char *out_buf = NULL;
    391    size_t file_size;
    392    size_t bytes_read;
    393    int bytes_encoded;
    394    int bytes_written;
    395    int ret_val = 0;
    396 
    397    in = fopen(in_file, "rb");
    398    if (in == NULL) {
    399       return 1;
    400    }
    401 
    402    // allocate buffer to read entire contents of files
    403    fseek(in, 0, SEEK_END);
    404    file_size = ftell(in);
    405    fseek(in, 0, SEEK_SET);
    406    in_buf = malloc(file_size);
    407 
    408    // read bytes
    409    bytes_read = fread(in_buf, 1, file_size, in);
    410    if (bytes_read != file_size) {
    411       ret_val = 2;
    412       goto free_all;
    413    }
    414 
    415    // allocate worst case length
    416    out_buf = malloc(MIO0_HEADER_LENGTH + ((file_size+7)/8) + file_size);
    417 
    418    // compress data in MIO0 format
    419    bytes_encoded = mio0_encode(in_buf, file_size, out_buf);
    420 
    421    // open output file
    422    out = mio0_open_out_file(out_file);
    423    if (out == NULL) {
    424       ret_val = 4;
    425       goto free_all;
    426    }
    427 
    428    // write data to file
    429    bytes_written = fwrite(out_buf, 1, bytes_encoded, out);
    430    if (bytes_written != bytes_encoded) {
    431       ret_val = 5;
    432    }
    433 
    434    // clean up
    435    if (out != stdout) {
    436       fclose(out);
    437    }
    438 free_all:
    439    if (out_buf) {
    440       free(out_buf);
    441    }
    442    if (in_buf) {
    443       free(in_buf);
    444    }
    445    fclose(in);
    446 
    447    return ret_val;
    448 }
    449 
    450 // mio0 standalone executable
    451 #ifdef MIO0_STANDALONE
    452 typedef struct
    453 {
    454    char *in_filename;
    455    char *out_filename;
    456    unsigned int offset;
    457    int compress;
    458 } arg_config;
    459 
    460 static arg_config default_config =
    461 {
    462    NULL,
    463    NULL,
    464    0,
    465    1
    466 };
    467 
    468 static void print_usage(void)
    469 {
    470    ERROR("Usage: mio0 [-c / -d] [-o OFFSET] FILE [OUTPUT]\n"
    471          "\n"
    472          "mio0 v" MIO0_VERSION ": MIO0 compression and decompression tool\n"
    473          "\n"
    474          "Optional arguments:\n"
    475          " -c           compress raw data into MIO0 (default: compress)\n"
    476          " -d           decompress MIO0 into raw data\n"
    477          " -o OFFSET    starting offset in FILE (default: 0)\n"
    478          "\n"
    479          "File arguments:\n"
    480          " FILE        input file\n"
    481          " [OUTPUT]    output file (default: FILE.out), \"-\" for stdout\n");
    482    exit(1);
    483 }
    484 
    485 // parse command line arguments
    486 static void parse_arguments(int argc, char *argv[], arg_config *config)
    487 {
    488    int i;
    489    int file_count = 0;
    490    if (argc < 2) {
    491       print_usage();
    492       exit(1);
    493    }
    494    for (i = 1; i < argc; i++) {
    495       if (argv[i][0] == '-' && argv[i][1] != '\0') {
    496          switch (argv[i][1]) {
    497             case 'c':
    498                config->compress = 1;
    499                break;
    500             case 'd':
    501                config->compress = 0;
    502                break;
    503             case 'o':
    504                if (++i >= argc) {
    505                   print_usage();
    506                }
    507                config->offset = strtoul(argv[i], NULL, 0);
    508                break;
    509             default:
    510                print_usage();
    511                break;
    512          }
    513       } else {
    514          switch (file_count) {
    515             case 0:
    516                config->in_filename = argv[i];
    517                break;
    518             case 1:
    519                config->out_filename = argv[i];
    520                break;
    521             default: // too many
    522                print_usage();
    523                break;
    524          }
    525          file_count++;
    526       }
    527    }
    528    if (file_count < 1) {
    529       print_usage();
    530    }
    531 }
    532 
    533 int main(int argc, char *argv[])
    534 {
    535    char out_filename[FILENAME_MAX];
    536    arg_config config;
    537    int ret_val;
    538 
    539    // get configuration from arguments
    540    config = default_config;
    541    parse_arguments(argc, argv, &config);
    542    if (config.out_filename == NULL) {
    543       config.out_filename = out_filename;
    544       sprintf(config.out_filename, "%s.out", config.in_filename);
    545    }
    546 
    547    // operation
    548    if (config.compress) {
    549       ret_val = mio0_encode_file(config.in_filename, config.out_filename);
    550    } else {
    551       ret_val = mio0_decode_file(config.in_filename, config.offset, config.out_filename);
    552    }
    553 
    554    switch (ret_val) {
    555       case 1:
    556          ERROR("Error opening input file \"%s\"\n", config.in_filename);
    557          break;
    558       case 2:
    559          ERROR("Error reading from input file \"%s\"\n", config.in_filename);
    560          break;
    561       case 3:
    562          ERROR("Error decoding MIO0 data. Wrong offset (0x%X)?\n", config.offset);
    563          break;
    564       case 4:
    565          ERROR("Error opening output file \"%s\"\n", config.out_filename);
    566          break;
    567       case 5:
    568          ERROR("Error writing bytes to output file \"%s\"\n", config.out_filename);
    569          break;
    570    }
    571 
    572    return ret_val;
    573 }
    574 #endif // MIO0_STANDALONE
    575