sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

extract_data_for_mio.c (10865B)


      1 #include <string.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <stdint.h>
      5 #include <stdbool.h>
      6 
      7 #define EI_DATA 5
      8 #define EI_NIDENT 16
      9 
     10 #define STT_NOTYPE  0
     11 #define STT_OBJECT  1
     12 #define STT_FUNC    2
     13 #define STT_SECTION 3
     14 #define STT_FILE    4
     15 #define STT_COMMON  5
     16 #define STT_TLS     6
     17 
     18 #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf)
     19 
     20 typedef uint32_t Elf32_Addr;
     21 typedef uint32_t Elf32_Off;
     22 
     23 typedef struct {
     24     unsigned char e_ident[EI_NIDENT];
     25     uint16_t      e_type;
     26     uint16_t      e_machine;
     27     uint32_t      e_version;
     28     Elf32_Addr    e_entry;
     29     Elf32_Off     e_phoff;
     30     Elf32_Off     e_shoff;
     31     uint32_t      e_flags;
     32     uint16_t      e_ehsize;
     33     uint16_t      e_phentsize;
     34     uint16_t      e_phnum;
     35     uint16_t      e_shentsize;
     36     uint16_t      e_shnum;
     37     uint16_t      e_shstrndx;
     38 } Elf32_Ehdr;
     39 
     40 typedef struct {
     41     uint32_t   sh_name;
     42     uint32_t   sh_type;
     43     uint32_t   sh_flags;
     44     Elf32_Addr sh_addr;
     45     Elf32_Off  sh_offset;
     46     uint32_t   sh_size;
     47     uint32_t   sh_link;
     48     uint32_t   sh_info;
     49     uint32_t   sh_addralign;
     50     uint32_t   sh_entsize;
     51 } Elf32_Shdr;
     52 
     53 typedef struct {
     54     uint32_t      st_name;
     55     Elf32_Addr    st_value;
     56     uint32_t      st_size;
     57     unsigned char st_info;
     58     unsigned char st_other;
     59     uint16_t      st_shndx;
     60 } Elf32_Sym;
     61 
     62 typedef struct {
     63     uint16_t magic; //To verify validity of the table
     64     uint16_t vstamp; //Version stamp
     65     uint32_t ilineMax; //Number of line number entries
     66     uint32_t cbLine; //Number of bytes for line number entries
     67     uint32_t cbLineOffset; //Index to start of line numbers
     68     uint32_t idnMax; //Max index into dense numbers
     69     uint32_t cbDnOffset; //Index to start dense numbers
     70     uint32_t ipdMax; //Number of procedures
     71     uint32_t cbPdOffset; //Index to procedure descriptors
     72     uint32_t isymMax; //Number of local symbols
     73     uint32_t cbSymOffset; //Index to start of local symbols
     74     uint32_t ioptMax; //Maximum index into optimization entries
     75     uint32_t cbOptOffset; //Index to start of optimization entries
     76     uint32_t iauxMax; //Number of auxiliary symbols
     77     uint32_t cbAuxOffset; //Index to the start of auxiliary symbols
     78     uint32_t issMax; //Max index into local strings
     79     uint32_t cbSsOffset; //Index to start of local strings
     80     uint32_t issExtMax; //Max index into external strings
     81     uint32_t cbSsExtOffset; //Index to the start of external strings
     82     uint32_t ifdMax; //Number of file descriptors
     83     uint32_t cbFdOffset; //Index to file descriptor
     84     uint32_t crfd; //Number of relative file descriptors
     85     uint32_t cbRfdOffset; //Index to relative file descriptors
     86     uint32_t iextMax; //Maximum index into external symbols
     87     uint32_t cbExtOffset; //Index to the start of external symbols.
     88 } SymbolicHeader;
     89 
     90 typedef struct {
     91     uint32_t adr; // Memory address of start of file
     92     uint32_t rss; // Source file name
     93     uint32_t issBase; // Start of local strings
     94     uint32_t cbSs; // Number of bytes in local strings
     95     uint32_t isymBase; // Start of local symbol entries
     96     uint32_t csym; // Count of local symbol entries
     97     uint32_t ilineBase; // Start of line number entries
     98     uint32_t cline; // Count of line number entries
     99     uint32_t ioptBase; // Start of optimization symbol entries
    100     uint32_t copt; // Count of optimization symbol entries
    101     uint16_t ipdFirst; // Start of procedure descriptor table
    102     uint16_t cpd; // Count of procedures descriptors
    103     uint32_t iauxBase; // Start of auxiliary symbol entries
    104     uint32_t caux; // Count of auxiliary symbol entries
    105     uint32_t rfdBase; // Index into relative file descriptors
    106     uint32_t crfd; // Relative file descriptor count
    107     uint32_t flags;
    108     uint32_t cbLineOffset; // Byte offset from header or file ln's
    109     uint32_t cbLine;
    110 } FileDescriptorTable;
    111 
    112 typedef struct {
    113     uint32_t iss;
    114     uint32_t value;
    115     uint32_t st_sc_index;
    116 } LocalSymbolsEntry;
    117 
    118 typedef enum {
    119     stNil,
    120     stGlobal,
    121     stStatic,
    122     stParam,
    123     stLocal,
    124     stLabel,
    125     stProc,
    126     stBlock,
    127     stEnd,
    128     stMember,
    129     stTypedef,
    130     stFile,
    131     stStaticProc,
    132     stConstant
    133 } StConstants;
    134 
    135 uint32_t u32be(uint32_t val) {
    136 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    137     return __builtin_bswap32(val);
    138 #else
    139     return val;
    140 #endif
    141 }
    142 
    143 uint16_t u16be(uint16_t val) {
    144 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    145     return __builtin_bswap16(val);
    146 #else
    147     return val;
    148 #endif    
    149 }
    150 
    151 static bool elf_get_section_range(uint8_t *file, const char *searched_name, uint32_t *address, uint32_t *offset, uint32_t *size, uint32_t *section_index) {
    152     Elf32_Ehdr *ehdr = (Elf32_Ehdr *)file;
    153 
    154     for (int i = 0; i < u16be(ehdr->e_shnum); i++) {
    155         if (memcmp("\x7f" "ELF", ehdr->e_ident, 4) != 0) {
    156             fprintf(stderr, "Missing ELF magic\n");
    157             exit(1);
    158         }
    159         if (ehdr->e_ident[EI_DATA] != 2) {
    160             fprintf(stderr, "ELF file is not big-endian\n");
    161             exit(1);
    162         }
    163 
    164         Elf32_Shdr *shdr = (Elf32_Shdr *)(file + u32be(ehdr->e_shoff) + i * u16be(ehdr->e_shentsize));
    165         if (u16be(ehdr->e_shstrndx) >= u16be(ehdr->e_shnum)) {
    166             fprintf(stderr, "Invalid ELF file\n");
    167             exit(1);
    168         }
    169         Elf32_Shdr *str_shdr = (Elf32_Shdr *)(file + u32be(ehdr->e_shoff) + u16be(ehdr->e_shstrndx) * u16be(ehdr->e_shentsize));
    170         char *name = (char *)(file + u32be(str_shdr->sh_offset) + u32be(shdr->sh_name));
    171         if (memcmp(name, searched_name, strlen(searched_name)) == 0) {
    172             *address = u32be(shdr->sh_addr);
    173             *offset = u32be(shdr->sh_offset);
    174             *size = u32be(shdr->sh_size);
    175             *section_index = i;
    176             return true;
    177         }
    178     }
    179     return false;
    180 }
    181 
    182 int main(int argc, char *argv[]) {
    183     if (argc < 3) {
    184         fprintf(stderr, "Usage: %s INFILE OUTFILE\n", argv[0]);
    185         return 1;
    186     }
    187 
    188     FILE *in = fopen(argv[1], "rb");
    189     if (in == NULL) {
    190         perror("fopen couldn't open input file");
    191         exit(1);
    192     }
    193     fseek(in, 0, SEEK_END);
    194     size_t file_size = ftell(in);
    195     fseek(in, 0, SEEK_SET);
    196     uint8_t *file = malloc(file_size);
    197     if (fread(file, 1, file_size, in) != file_size) {
    198         fclose(in);
    199         fprintf(stderr, "Failed to read file: %s\n", argv[1]);
    200         exit(1);
    201     }
    202     fclose(in);
    203 
    204     uint32_t data_address, data_offset, data_size, data_index;
    205     if (!elf_get_section_range(file, ".data", &data_address, &data_offset, &data_size, &data_index)) {
    206         fprintf(stderr, "section .data not found\n");
    207         exit(1);
    208     }
    209 
    210     uint32_t rodata_address, rodata_offset, rodata_size, rodata_index;
    211     if (elf_get_section_range(file, ".rodata", &rodata_address, &rodata_offset, &rodata_size, &rodata_index)) {
    212         fprintf(stderr, ".rodata section found, please put everything in .data instead (non-const variables)\n");
    213         exit(1);
    214     }
    215 
    216     uint32_t symtab_address, symtab_offset, symtab_size, symtab_index;
    217     if (!elf_get_section_range(file, ".symtab", &symtab_address, &symtab_offset, &symtab_size, &symtab_index)) {
    218         fprintf(stderr, "section .symtab not found\n");
    219         exit(1);
    220     }
    221 
    222     uint32_t strtab_address, strtab_offset, strtab_size, strtab_index;
    223     if (!elf_get_section_range(file, ".strtab", &strtab_address, &strtab_offset, &strtab_size, &strtab_index)) {
    224         fprintf(stderr, "section .strtab not found\n");
    225         exit(1);
    226     }
    227 
    228     // IDO might pad the section to the nearest 16 byte boundary,
    229     // but the mio0 data should not include that. Therefore find
    230     // the "real" end by finding where the last symbol ends.
    231     uint32_t last_symbol_end = 0;
    232 
    233     for (uint32_t i = 0; i < symtab_size / sizeof(Elf32_Sym); i++) {
    234         Elf32_Sym *symbol = (Elf32_Sym *)(file + symtab_offset + i * sizeof(Elf32_Sym));
    235 #if DEBUG
    236         const char *name = "(null)";
    237         if (symbol->st_name != 0U) {
    238             name = (const char*)file + strtab_offset + u32be(symbol->st_name);
    239         }
    240         printf("%08x\t%08x\t%02x\t%02x\t%02x\t%s\n", u32be(symbol->st_value), u32be(symbol->st_size), symbol->st_info, symbol->st_other, u16be(symbol->st_shndx), name);
    241 #endif
    242         if (ELF_ST_TYPE(symbol->st_info) == STT_OBJECT && u16be(symbol->st_shndx) == data_index) {
    243             uint32_t symbol_end = u32be(symbol->st_value) + u32be(symbol->st_size);
    244             if (symbol_end > last_symbol_end) {
    245                 last_symbol_end = symbol_end;
    246             }
    247         }
    248     }
    249 
    250     uint32_t mdebug_address, mdebug_offset, mdebug_size, mdebug_index;
    251     if (elf_get_section_range(file, ".mdebug", &mdebug_address, &mdebug_offset, &mdebug_size, &mdebug_index)) {
    252         SymbolicHeader *symbolic_header = (SymbolicHeader *)(file + mdebug_offset);
    253 
    254         for (uint32_t i = 0; i < u32be(symbolic_header->ifdMax); i++) {
    255             FileDescriptorTable *fdt = (FileDescriptorTable *)(file + u32be(symbolic_header->cbFdOffset) + i * sizeof(FileDescriptorTable));
    256 
    257             for (uint32_t j = 0; j < u32be(fdt->csym); j++) {
    258                 LocalSymbolsEntry lse;
    259                 memcpy(&lse, file + u32be(symbolic_header->cbSymOffset) + (u32be(fdt->isymBase) + j) * sizeof(LocalSymbolsEntry), sizeof(LocalSymbolsEntry));
    260 
    261                 uint32_t value = u32be(lse.value);
    262                 uint32_t st_sc_index = u32be(lse.st_sc_index);
    263                 uint32_t st = (st_sc_index >> 26);
    264 #ifdef DEBUG
    265                 uint32_t sc = (st_sc_index >> 21) & 0x1f;
    266                 uint32_t index = st_sc_index & 0xfffff;
    267                 uint32_t iss = u32be(lse.iss);
    268                 const char *symbol_name = file + u32be(symbolic_header->cbSsOffset) + iss;
    269                 printf("%s %08x\n", symbol_name, value);
    270 #endif
    271 
    272                 if (st == stStatic || st == stGlobal) {
    273                     // Right now just assume length 8 since it's quite much work to extract the real size
    274                     uint32_t symbol_end = value + 8;
    275                     if (symbol_end > last_symbol_end) {
    276                         last_symbol_end = symbol_end;
    277                     }
    278                 }
    279             }
    280         }
    281     }
    282 
    283 #ifdef DEBUG
    284     printf("Last symbol end: %08x\n", last_symbol_end);
    285 #endif
    286 
    287     size_t new_size = last_symbol_end - data_address;
    288     if (new_size + 16 <= data_size) {
    289         // There seems to be more than 16 bytes padding or non-identified data, so abort and take the original size
    290         new_size = data_size;
    291     } else {
    292         // Make sure we don't cut off non-zero bytes
    293         for (size_t i = new_size; i < data_size; i++) {
    294             if (file[data_offset + i] != 0) {
    295                 // Must be some symbol missing, so abort and take the original size
    296                 new_size = data_size;
    297                 break;
    298             }
    299         }
    300     }
    301 
    302     FILE *out = fopen(argv[2], "wb");
    303     fwrite(file + data_offset, 1, new_size, out);
    304     fclose(out);
    305 
    306     free(file);
    307     return 0;
    308 }