first-diff.py (9225B)
1 #!/usr/bin/env python3 2 import os.path 3 import argparse 4 from subprocess import check_call 5 6 # TODO: -S argument for shifted ROMs 7 8 parser = argparse.ArgumentParser( 9 description="find the first difference(s) between the compiled ROM and the baserom" 10 ) 11 versionGroup = parser.add_mutually_exclusive_group() 12 versionGroup.add_argument( 13 "-j", 14 "--jp", 15 help="use original Japanese version", 16 action="store_const", 17 const="jp", 18 dest="version", 19 ) 20 versionGroup.add_argument( 21 "-u", 22 "--us", 23 help="use United States version", 24 action="store_const", 25 const="us", 26 dest="version", 27 ) 28 versionGroup.add_argument( 29 "-e", 30 "--eu", 31 help="use European (PAL) version", 32 action="store_const", 33 const="eu", 34 dest="version", 35 ) 36 versionGroup.add_argument( 37 "-s", 38 "--sh", 39 help="use Shindou (Rumble) version", 40 action="store_const", 41 const="sh", 42 dest="version", 43 ) 44 versionGroup.add_argument( 45 "-c", 46 "--cn", 47 help="use iQue (Chinese) version", 48 action="store_const", 49 const="cn", 50 dest="version", 51 ) 52 parser.add_argument( 53 "-m", "--make", help="run make before finding difference(s)", action="store_true" 54 ) 55 parser.add_argument( 56 "--count", 57 type=int, 58 default=1, 59 help="find up to this many instruction difference(s)", 60 ) 61 parser.add_argument( 62 "-n", "--by-name", type=str, default="", help="perform a symbol or address lookup" 63 ) 64 parser.add_argument( 65 "-d", "--diff", action="store_true", help="run ./diff.py on the result" 66 ) 67 args = parser.parse_args() 68 diff_count = args.count 69 70 version = args.version 71 72 if version is None: 73 version = "us" 74 best = 0 75 for ver in ["us", "jp", "eu", "sh"]: 76 try: 77 mtime = os.path.getmtime(f"build/{ver}/sm64.{ver}.z64") 78 if mtime > best: 79 best = mtime 80 version = ver 81 except Exception: 82 pass 83 print("Assuming version " + version) 84 85 if args.make: 86 check_call(["make", "-j4", "VERSION=" + version, "COMPARE=0"]) 87 88 baseimg = f"baserom.{version}.z64" 89 basemap = f"sm64.{version}.map" 90 91 myimg = f"build/{version}/sm64.{version}.z64" 92 mymap = f"build/{version}/{basemap}" 93 94 if os.path.isfile("expected/" + mymap): 95 basemap = "expected/" + mymap 96 97 required_files = [baseimg, myimg, mymap] 98 if not os.path.isfile(baseimg): 99 print(baseimg + " must exist.") 100 exit(1) 101 if not os.path.isfile(myimg) or not os.path.isfile(mymap): 102 print( 103 myimg 104 + " and " 105 + mymap 106 + " must exist. Try rerunning with --make to build them." 107 ) 108 exit(1) 109 110 mybin = open(myimg, "rb").read() 111 basebin = open(baseimg, "rb").read() 112 113 if len(mybin) != len(basebin): 114 print("Modified ROM has different size...") 115 exit(1) 116 117 if mybin == basebin: 118 print("No differences!") 119 if not args.by_name: 120 exit(0) 121 122 123 def search_map(rom_addr): 124 ram_offset = None 125 last_ram = 0 126 last_rom = 0 127 last_fn = "<start of rom>" 128 last_file = "<no file>" 129 prev_line = "" 130 with open(mymap) as f: 131 for line in f: 132 if "load address" in line: 133 # Example: ".boot 0x0000000004000000 0x1000 load address 0x0000000000000000" 134 if "noload" in line or "noload" in prev_line: 135 ram_offset = None 136 continue 137 ram = int(line[16 : 16 + 18], 0) 138 rom = int(line[59 : 59 + 18], 0) 139 ram_offset = ram - rom 140 continue 141 prev_line = line 142 143 if ( 144 ram_offset is None 145 or "=" in line 146 or "*fill*" in line 147 or " 0x" not in line 148 ): 149 continue 150 ram = int(line[16 : 16 + 18], 0) 151 rom = ram - ram_offset 152 fn = line.split()[-1] 153 if "0x" in fn: 154 ram_offset = None 155 continue 156 if rom > rom_addr or (rom_addr & 0x80000000 and ram > rom_addr): 157 return f"in {last_fn} (ram 0x{last_ram:08x}, rom 0x{last_rom:06x}, {last_file})" 158 last_ram = ram 159 last_rom = rom 160 last_fn = fn 161 if "/" in fn: 162 last_file = fn 163 return "at end of rom?" 164 165 166 def parse_map(fname): 167 ram_offset = None 168 cur_file = "<no file>" 169 syms = {} 170 prev_sym = None 171 prev_line = "" 172 with open(fname) as f: 173 for line in f: 174 if "load address" in line: 175 if "noload" in line or "noload" in prev_line: 176 ram_offset = None 177 continue 178 ram = int(line[16 : 16 + 18], 0) 179 rom = int(line[59 : 59 + 18], 0) 180 ram_offset = ram - rom 181 continue 182 prev_line = line 183 184 if ( 185 ram_offset is None 186 or "=" in line 187 or "*fill*" in line 188 or " 0x" not in line 189 ): 190 continue 191 ram = int(line[16 : 16 + 18], 0) 192 rom = ram - ram_offset 193 fn = line.split()[-1] 194 if "0x" in fn: 195 ram_offset = None 196 elif "/" in fn: 197 cur_file = fn 198 else: 199 syms[fn] = (rom, cur_file, prev_sym, ram) 200 prev_sym = fn 201 return syms 202 203 204 def map_diff(): 205 map1 = parse_map(mymap) 206 map2 = parse_map(basemap) 207 min_ram = None 208 found = None 209 for sym, addr in map1.items(): 210 if sym not in map2: 211 continue 212 if addr[0] != map2[sym][0]: 213 if min_ram is None or addr[0] < min_ram: 214 min_ram = addr[0] 215 found = (sym, addr[1], addr[2]) 216 if min_ram is None: 217 return False 218 else: 219 print() 220 print( 221 f"Map appears to have shifted just before {found[0]} ({found[1]}) -- in {found[2]}?" 222 ) 223 if found[2] is not None and found[2] not in map2: 224 print() 225 print( 226 f"(Base map file {basemap} out of date due to renamed symbols, so result may be imprecise.)" 227 ) 228 return True 229 230 231 def hexbytes(bs): 232 return ":".join("{:02x}".format(c) for c in bs) 233 234 235 # For convenience, allow `./first-diff.py <ROM addr | RAM addr | function name>` 236 # to do a symbol <-> address lookup. This should really be split out into a 237 # separate script... 238 if args.by_name: 239 try: 240 addr = int(args.by_name, 0) 241 print(args.by_name, "is", search_map(addr)) 242 except ValueError: 243 m = parse_map(mymap) 244 try: 245 print( 246 args.by_name, 247 "is at position", 248 hex(m[args.by_name][0]), 249 "in ROM,", 250 hex(m[args.by_name][3]), 251 "in RAM", 252 ) 253 except KeyError: 254 print("function", args.by_name, "not found") 255 exit() 256 257 found_instr_diff = [] 258 map_search_diff = [] 259 diffs = 0 260 shift_cap = 1000 261 for i in range(24, len(mybin), 4): 262 # (mybin[i:i+4] != basebin[i:i+4], but that's slightly slower in CPython...) 263 if diffs <= shift_cap and ( 264 mybin[i] != basebin[i] 265 or mybin[i + 1] != basebin[i + 1] 266 or mybin[i + 2] != basebin[i + 2] 267 or mybin[i + 3] != basebin[i + 3] 268 ): 269 if diffs == 0: 270 print(f"First difference at ROM addr {hex(i)}, {search_map(i)}") 271 print( 272 f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" 273 ) 274 diffs += 1 275 if ( 276 len(found_instr_diff) < diff_count 277 and mybin[i] >> 2 != basebin[i] >> 2 278 and not search_map(i) in map_search_diff 279 ): 280 found_instr_diff.append(i) 281 map_search_diff.append(search_map(i)) 282 if diffs == 0: 283 print("No differences!") 284 if not args.by_name: 285 exit() 286 definite_shift = diffs > shift_cap 287 if not definite_shift: 288 print(str(diffs) + " differing word(s).") 289 290 if diffs > 100: 291 if len(found_instr_diff) > 0: 292 for i in found_instr_diff: 293 print(f"Instruction difference at ROM addr {hex(i)}, {search_map(i)}") 294 print( 295 f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" 296 ) 297 if version == "sh": 298 print("Shifted ROM, as expected.") 299 else: 300 if not os.path.isfile(basemap): 301 if definite_shift: 302 print("Tons of differences, must be a shifted ROM.") 303 print( 304 "To find ROM shifts, copy a clean .map file to " 305 + basemap 306 + " and rerun this script." 307 ) 308 exit() 309 310 if not map_diff(): 311 print(f"No ROM shift{' (!?)' if definite_shift else ''}") 312 if args.diff: 313 diff_args = input("Call ./diff.py with which arguments? ") or "--" 314 if diff_args[0] != "-": 315 diff_args = "-" + diff_args 316 if "w" in diff_args and args.make: 317 diff_args += "m" # To avoid warnings when passing -w, also pass -m as long as -m was passed to first-diff itself 318 319 check_call( 320 [ 321 "python3", 322 "diff.py", 323 f"-{version[0]}", 324 diff_args, 325 search_map(found_instr_diff[0]).split()[1], 326 ] 327 )