extract_data_for_mio.c (10865B)
1 #include <string.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <stdint.h> 5 #include <stdbool.h> 6 7 #define EI_DATA 5 8 #define EI_NIDENT 16 9 10 #define STT_NOTYPE 0 11 #define STT_OBJECT 1 12 #define STT_FUNC 2 13 #define STT_SECTION 3 14 #define STT_FILE 4 15 #define STT_COMMON 5 16 #define STT_TLS 6 17 18 #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) 19 20 typedef uint32_t Elf32_Addr; 21 typedef uint32_t Elf32_Off; 22 23 typedef struct { 24 unsigned char e_ident[EI_NIDENT]; 25 uint16_t e_type; 26 uint16_t e_machine; 27 uint32_t e_version; 28 Elf32_Addr e_entry; 29 Elf32_Off e_phoff; 30 Elf32_Off e_shoff; 31 uint32_t e_flags; 32 uint16_t e_ehsize; 33 uint16_t e_phentsize; 34 uint16_t e_phnum; 35 uint16_t e_shentsize; 36 uint16_t e_shnum; 37 uint16_t e_shstrndx; 38 } Elf32_Ehdr; 39 40 typedef struct { 41 uint32_t sh_name; 42 uint32_t sh_type; 43 uint32_t sh_flags; 44 Elf32_Addr sh_addr; 45 Elf32_Off sh_offset; 46 uint32_t sh_size; 47 uint32_t sh_link; 48 uint32_t sh_info; 49 uint32_t sh_addralign; 50 uint32_t sh_entsize; 51 } Elf32_Shdr; 52 53 typedef struct { 54 uint32_t st_name; 55 Elf32_Addr st_value; 56 uint32_t st_size; 57 unsigned char st_info; 58 unsigned char st_other; 59 uint16_t st_shndx; 60 } Elf32_Sym; 61 62 typedef struct { 63 uint16_t magic; //To verify validity of the table 64 uint16_t vstamp; //Version stamp 65 uint32_t ilineMax; //Number of line number entries 66 uint32_t cbLine; //Number of bytes for line number entries 67 uint32_t cbLineOffset; //Index to start of line numbers 68 uint32_t idnMax; //Max index into dense numbers 69 uint32_t cbDnOffset; //Index to start dense numbers 70 uint32_t ipdMax; //Number of procedures 71 uint32_t cbPdOffset; //Index to procedure descriptors 72 uint32_t isymMax; //Number of local symbols 73 uint32_t cbSymOffset; //Index to start of local symbols 74 uint32_t ioptMax; //Maximum index into optimization entries 75 uint32_t cbOptOffset; //Index to start of optimization entries 76 uint32_t iauxMax; //Number of auxiliary symbols 77 uint32_t cbAuxOffset; //Index to the start of auxiliary symbols 78 uint32_t issMax; //Max index into local strings 79 uint32_t cbSsOffset; //Index to start of local strings 80 uint32_t issExtMax; //Max index into external strings 81 uint32_t cbSsExtOffset; //Index to the start of external strings 82 uint32_t ifdMax; //Number of file descriptors 83 uint32_t cbFdOffset; //Index to file descriptor 84 uint32_t crfd; //Number of relative file descriptors 85 uint32_t cbRfdOffset; //Index to relative file descriptors 86 uint32_t iextMax; //Maximum index into external symbols 87 uint32_t cbExtOffset; //Index to the start of external symbols. 88 } SymbolicHeader; 89 90 typedef struct { 91 uint32_t adr; // Memory address of start of file 92 uint32_t rss; // Source file name 93 uint32_t issBase; // Start of local strings 94 uint32_t cbSs; // Number of bytes in local strings 95 uint32_t isymBase; // Start of local symbol entries 96 uint32_t csym; // Count of local symbol entries 97 uint32_t ilineBase; // Start of line number entries 98 uint32_t cline; // Count of line number entries 99 uint32_t ioptBase; // Start of optimization symbol entries 100 uint32_t copt; // Count of optimization symbol entries 101 uint16_t ipdFirst; // Start of procedure descriptor table 102 uint16_t cpd; // Count of procedures descriptors 103 uint32_t iauxBase; // Start of auxiliary symbol entries 104 uint32_t caux; // Count of auxiliary symbol entries 105 uint32_t rfdBase; // Index into relative file descriptors 106 uint32_t crfd; // Relative file descriptor count 107 uint32_t flags; 108 uint32_t cbLineOffset; // Byte offset from header or file ln's 109 uint32_t cbLine; 110 } FileDescriptorTable; 111 112 typedef struct { 113 uint32_t iss; 114 uint32_t value; 115 uint32_t st_sc_index; 116 } LocalSymbolsEntry; 117 118 typedef enum { 119 stNil, 120 stGlobal, 121 stStatic, 122 stParam, 123 stLocal, 124 stLabel, 125 stProc, 126 stBlock, 127 stEnd, 128 stMember, 129 stTypedef, 130 stFile, 131 stStaticProc, 132 stConstant 133 } StConstants; 134 135 uint32_t u32be(uint32_t val) { 136 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 137 return __builtin_bswap32(val); 138 #else 139 return val; 140 #endif 141 } 142 143 uint16_t u16be(uint16_t val) { 144 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 145 return __builtin_bswap16(val); 146 #else 147 return val; 148 #endif 149 } 150 151 static bool elf_get_section_range(uint8_t *file, const char *searched_name, uint32_t *address, uint32_t *offset, uint32_t *size, uint32_t *section_index) { 152 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)file; 153 154 for (int i = 0; i < u16be(ehdr->e_shnum); i++) { 155 if (memcmp("\x7f" "ELF", ehdr->e_ident, 4) != 0) { 156 fprintf(stderr, "Missing ELF magic\n"); 157 exit(1); 158 } 159 if (ehdr->e_ident[EI_DATA] != 2) { 160 fprintf(stderr, "ELF file is not big-endian\n"); 161 exit(1); 162 } 163 164 Elf32_Shdr *shdr = (Elf32_Shdr *)(file + u32be(ehdr->e_shoff) + i * u16be(ehdr->e_shentsize)); 165 if (u16be(ehdr->e_shstrndx) >= u16be(ehdr->e_shnum)) { 166 fprintf(stderr, "Invalid ELF file\n"); 167 exit(1); 168 } 169 Elf32_Shdr *str_shdr = (Elf32_Shdr *)(file + u32be(ehdr->e_shoff) + u16be(ehdr->e_shstrndx) * u16be(ehdr->e_shentsize)); 170 char *name = (char *)(file + u32be(str_shdr->sh_offset) + u32be(shdr->sh_name)); 171 if (memcmp(name, searched_name, strlen(searched_name)) == 0) { 172 *address = u32be(shdr->sh_addr); 173 *offset = u32be(shdr->sh_offset); 174 *size = u32be(shdr->sh_size); 175 *section_index = i; 176 return true; 177 } 178 } 179 return false; 180 } 181 182 int main(int argc, char *argv[]) { 183 if (argc < 3) { 184 fprintf(stderr, "Usage: %s INFILE OUTFILE\n", argv[0]); 185 return 1; 186 } 187 188 FILE *in = fopen(argv[1], "rb"); 189 if (in == NULL) { 190 perror("fopen couldn't open input file"); 191 exit(1); 192 } 193 fseek(in, 0, SEEK_END); 194 size_t file_size = ftell(in); 195 fseek(in, 0, SEEK_SET); 196 uint8_t *file = malloc(file_size); 197 if (fread(file, 1, file_size, in) != file_size) { 198 fclose(in); 199 fprintf(stderr, "Failed to read file: %s\n", argv[1]); 200 exit(1); 201 } 202 fclose(in); 203 204 uint32_t data_address, data_offset, data_size, data_index; 205 if (!elf_get_section_range(file, ".data", &data_address, &data_offset, &data_size, &data_index)) { 206 fprintf(stderr, "section .data not found\n"); 207 exit(1); 208 } 209 210 uint32_t rodata_address, rodata_offset, rodata_size, rodata_index; 211 if (elf_get_section_range(file, ".rodata", &rodata_address, &rodata_offset, &rodata_size, &rodata_index)) { 212 fprintf(stderr, ".rodata section found, please put everything in .data instead (non-const variables)\n"); 213 exit(1); 214 } 215 216 uint32_t symtab_address, symtab_offset, symtab_size, symtab_index; 217 if (!elf_get_section_range(file, ".symtab", &symtab_address, &symtab_offset, &symtab_size, &symtab_index)) { 218 fprintf(stderr, "section .symtab not found\n"); 219 exit(1); 220 } 221 222 uint32_t strtab_address, strtab_offset, strtab_size, strtab_index; 223 if (!elf_get_section_range(file, ".strtab", &strtab_address, &strtab_offset, &strtab_size, &strtab_index)) { 224 fprintf(stderr, "section .strtab not found\n"); 225 exit(1); 226 } 227 228 // IDO might pad the section to the nearest 16 byte boundary, 229 // but the mio0 data should not include that. Therefore find 230 // the "real" end by finding where the last symbol ends. 231 uint32_t last_symbol_end = 0; 232 233 for (uint32_t i = 0; i < symtab_size / sizeof(Elf32_Sym); i++) { 234 Elf32_Sym *symbol = (Elf32_Sym *)(file + symtab_offset + i * sizeof(Elf32_Sym)); 235 #if DEBUG 236 const char *name = "(null)"; 237 if (symbol->st_name != 0U) { 238 name = (const char*)file + strtab_offset + u32be(symbol->st_name); 239 } 240 printf("%08x\t%08x\t%02x\t%02x\t%02x\t%s\n", u32be(symbol->st_value), u32be(symbol->st_size), symbol->st_info, symbol->st_other, u16be(symbol->st_shndx), name); 241 #endif 242 if (ELF_ST_TYPE(symbol->st_info) == STT_OBJECT && u16be(symbol->st_shndx) == data_index) { 243 uint32_t symbol_end = u32be(symbol->st_value) + u32be(symbol->st_size); 244 if (symbol_end > last_symbol_end) { 245 last_symbol_end = symbol_end; 246 } 247 } 248 } 249 250 uint32_t mdebug_address, mdebug_offset, mdebug_size, mdebug_index; 251 if (elf_get_section_range(file, ".mdebug", &mdebug_address, &mdebug_offset, &mdebug_size, &mdebug_index)) { 252 SymbolicHeader *symbolic_header = (SymbolicHeader *)(file + mdebug_offset); 253 254 for (uint32_t i = 0; i < u32be(symbolic_header->ifdMax); i++) { 255 FileDescriptorTable *fdt = (FileDescriptorTable *)(file + u32be(symbolic_header->cbFdOffset) + i * sizeof(FileDescriptorTable)); 256 257 for (uint32_t j = 0; j < u32be(fdt->csym); j++) { 258 LocalSymbolsEntry lse; 259 memcpy(&lse, file + u32be(symbolic_header->cbSymOffset) + (u32be(fdt->isymBase) + j) * sizeof(LocalSymbolsEntry), sizeof(LocalSymbolsEntry)); 260 261 uint32_t value = u32be(lse.value); 262 uint32_t st_sc_index = u32be(lse.st_sc_index); 263 uint32_t st = (st_sc_index >> 26); 264 #ifdef DEBUG 265 uint32_t sc = (st_sc_index >> 21) & 0x1f; 266 uint32_t index = st_sc_index & 0xfffff; 267 uint32_t iss = u32be(lse.iss); 268 const char *symbol_name = file + u32be(symbolic_header->cbSsOffset) + iss; 269 printf("%s %08x\n", symbol_name, value); 270 #endif 271 272 if (st == stStatic || st == stGlobal) { 273 // Right now just assume length 8 since it's quite much work to extract the real size 274 uint32_t symbol_end = value + 8; 275 if (symbol_end > last_symbol_end) { 276 last_symbol_end = symbol_end; 277 } 278 } 279 } 280 } 281 } 282 283 #ifdef DEBUG 284 printf("Last symbol end: %08x\n", last_symbol_end); 285 #endif 286 287 size_t new_size = last_symbol_end - data_address; 288 if (new_size + 16 <= data_size) { 289 // There seems to be more than 16 bytes padding or non-identified data, so abort and take the original size 290 new_size = data_size; 291 } else { 292 // Make sure we don't cut off non-zero bytes 293 for (size_t i = new_size; i < data_size; i++) { 294 if (file[data_offset + i] != 0) { 295 // Must be some symbol missing, so abort and take the original size 296 new_size = data_size; 297 break; 298 } 299 } 300 } 301 302 FILE *out = fopen(argv[2], "wb"); 303 fwrite(file + data_offset, 1, new_size, out); 304 fclose(out); 305 306 free(file); 307 return 0; 308 }