sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

first-diff.py (9225B)


      1 #!/usr/bin/env python3
      2 import os.path
      3 import argparse
      4 from subprocess import check_call
      5 
      6 # TODO: -S argument for shifted ROMs
      7 
      8 parser = argparse.ArgumentParser(
      9     description="find the first difference(s) between the compiled ROM and the baserom"
     10 )
     11 versionGroup = parser.add_mutually_exclusive_group()
     12 versionGroup.add_argument(
     13     "-j",
     14     "--jp",
     15     help="use original Japanese version",
     16     action="store_const",
     17     const="jp",
     18     dest="version",
     19 )
     20 versionGroup.add_argument(
     21     "-u",
     22     "--us",
     23     help="use United States version",
     24     action="store_const",
     25     const="us",
     26     dest="version",
     27 )
     28 versionGroup.add_argument(
     29     "-e",
     30     "--eu",
     31     help="use European (PAL) version",
     32     action="store_const",
     33     const="eu",
     34     dest="version",
     35 )
     36 versionGroup.add_argument(
     37     "-s",
     38     "--sh",
     39     help="use Shindou (Rumble) version",
     40     action="store_const",
     41     const="sh",
     42     dest="version",
     43 )
     44 versionGroup.add_argument(
     45     "-c",
     46     "--cn",
     47     help="use iQue (Chinese) version",
     48     action="store_const",
     49     const="cn",
     50     dest="version",
     51 )
     52 parser.add_argument(
     53     "-m", "--make", help="run make before finding difference(s)", action="store_true"
     54 )
     55 parser.add_argument(
     56     "--count",
     57     type=int,
     58     default=1,
     59     help="find up to this many instruction difference(s)",
     60 )
     61 parser.add_argument(
     62     "-n", "--by-name", type=str, default="", help="perform a symbol or address lookup"
     63 )
     64 parser.add_argument(
     65     "-d", "--diff", action="store_true", help="run ./diff.py on the result"
     66 )
     67 args = parser.parse_args()
     68 diff_count = args.count
     69 
     70 version = args.version
     71 
     72 if version is None:
     73     version = "us"
     74     best = 0
     75     for ver in ["us", "jp", "eu", "sh"]:
     76         try:
     77             mtime = os.path.getmtime(f"build/{ver}/sm64.{ver}.z64")
     78             if mtime > best:
     79                 best = mtime
     80                 version = ver
     81         except Exception:
     82             pass
     83     print("Assuming version " + version)
     84 
     85 if args.make:
     86     check_call(["make", "-j4", "VERSION=" + version, "COMPARE=0"])
     87 
     88 baseimg = f"baserom.{version}.z64"
     89 basemap = f"sm64.{version}.map"
     90 
     91 myimg = f"build/{version}/sm64.{version}.z64"
     92 mymap = f"build/{version}/{basemap}"
     93 
     94 if os.path.isfile("expected/" + mymap):
     95     basemap = "expected/" + mymap
     96 
     97 required_files = [baseimg, myimg, mymap]
     98 if not os.path.isfile(baseimg):
     99     print(baseimg + " must exist.")
    100     exit(1)
    101 if not os.path.isfile(myimg) or not os.path.isfile(mymap):
    102     print(
    103         myimg
    104         + " and "
    105         + mymap
    106         + " must exist. Try rerunning with --make to build them."
    107     )
    108     exit(1)
    109 
    110 mybin = open(myimg, "rb").read()
    111 basebin = open(baseimg, "rb").read()
    112 
    113 if len(mybin) != len(basebin):
    114     print("Modified ROM has different size...")
    115     exit(1)
    116 
    117 if mybin == basebin:
    118     print("No differences!")
    119     if not args.by_name:
    120         exit(0)
    121 
    122 
    123 def search_map(rom_addr):
    124     ram_offset = None
    125     last_ram = 0
    126     last_rom = 0
    127     last_fn = "<start of rom>"
    128     last_file = "<no file>"
    129     prev_line = ""
    130     with open(mymap) as f:
    131         for line in f:
    132             if "load address" in line:
    133                 # Example: ".boot           0x0000000004000000     0x1000 load address 0x0000000000000000"
    134                 if "noload" in line or "noload" in prev_line:
    135                     ram_offset = None
    136                     continue
    137                 ram = int(line[16 : 16 + 18], 0)
    138                 rom = int(line[59 : 59 + 18], 0)
    139                 ram_offset = ram - rom
    140                 continue
    141             prev_line = line
    142 
    143             if (
    144                 ram_offset is None
    145                 or "=" in line
    146                 or "*fill*" in line
    147                 or " 0x" not in line
    148             ):
    149                 continue
    150             ram = int(line[16 : 16 + 18], 0)
    151             rom = ram - ram_offset
    152             fn = line.split()[-1]
    153             if "0x" in fn:
    154                 ram_offset = None
    155                 continue
    156             if rom > rom_addr or (rom_addr & 0x80000000 and ram > rom_addr):
    157                 return f"in {last_fn} (ram 0x{last_ram:08x}, rom 0x{last_rom:06x}, {last_file})"
    158             last_ram = ram
    159             last_rom = rom
    160             last_fn = fn
    161             if "/" in fn:
    162                 last_file = fn
    163     return "at end of rom?"
    164 
    165 
    166 def parse_map(fname):
    167     ram_offset = None
    168     cur_file = "<no file>"
    169     syms = {}
    170     prev_sym = None
    171     prev_line = ""
    172     with open(fname) as f:
    173         for line in f:
    174             if "load address" in line:
    175                 if "noload" in line or "noload" in prev_line:
    176                     ram_offset = None
    177                     continue
    178                 ram = int(line[16 : 16 + 18], 0)
    179                 rom = int(line[59 : 59 + 18], 0)
    180                 ram_offset = ram - rom
    181                 continue
    182             prev_line = line
    183 
    184             if (
    185                 ram_offset is None
    186                 or "=" in line
    187                 or "*fill*" in line
    188                 or " 0x" not in line
    189             ):
    190                 continue
    191             ram = int(line[16 : 16 + 18], 0)
    192             rom = ram - ram_offset
    193             fn = line.split()[-1]
    194             if "0x" in fn:
    195                 ram_offset = None
    196             elif "/" in fn:
    197                 cur_file = fn
    198             else:
    199                 syms[fn] = (rom, cur_file, prev_sym, ram)
    200                 prev_sym = fn
    201     return syms
    202 
    203 
    204 def map_diff():
    205     map1 = parse_map(mymap)
    206     map2 = parse_map(basemap)
    207     min_ram = None
    208     found = None
    209     for sym, addr in map1.items():
    210         if sym not in map2:
    211             continue
    212         if addr[0] != map2[sym][0]:
    213             if min_ram is None or addr[0] < min_ram:
    214                 min_ram = addr[0]
    215                 found = (sym, addr[1], addr[2])
    216     if min_ram is None:
    217         return False
    218     else:
    219         print()
    220         print(
    221             f"Map appears to have shifted just before {found[0]} ({found[1]}) -- in {found[2]}?"
    222         )
    223         if found[2] is not None and found[2] not in map2:
    224             print()
    225             print(
    226                 f"(Base map file {basemap} out of date due to renamed symbols, so result may be imprecise.)"
    227             )
    228         return True
    229 
    230 
    231 def hexbytes(bs):
    232     return ":".join("{:02x}".format(c) for c in bs)
    233 
    234 
    235 # For convenience, allow `./first-diff.py <ROM addr | RAM addr | function name>`
    236 # to do a symbol <-> address lookup. This should really be split out into a
    237 # separate script...
    238 if args.by_name:
    239     try:
    240         addr = int(args.by_name, 0)
    241         print(args.by_name, "is", search_map(addr))
    242     except ValueError:
    243         m = parse_map(mymap)
    244         try:
    245             print(
    246                 args.by_name,
    247                 "is at position",
    248                 hex(m[args.by_name][0]),
    249                 "in ROM,",
    250                 hex(m[args.by_name][3]),
    251                 "in RAM",
    252             )
    253         except KeyError:
    254             print("function", args.by_name, "not found")
    255     exit()
    256 
    257 found_instr_diff = []
    258 map_search_diff = []
    259 diffs = 0
    260 shift_cap = 1000
    261 for i in range(24, len(mybin), 4):
    262     # (mybin[i:i+4] != basebin[i:i+4], but that's slightly slower in CPython...)
    263     if diffs <= shift_cap and (
    264         mybin[i] != basebin[i]
    265         or mybin[i + 1] != basebin[i + 1]
    266         or mybin[i + 2] != basebin[i + 2]
    267         or mybin[i + 3] != basebin[i + 3]
    268     ):
    269         if diffs == 0:
    270             print(f"First difference at ROM addr {hex(i)}, {search_map(i)}")
    271             print(
    272                 f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}"
    273             )
    274         diffs += 1
    275     if (
    276         len(found_instr_diff) < diff_count
    277         and mybin[i] >> 2 != basebin[i] >> 2
    278         and not search_map(i) in map_search_diff
    279     ):
    280         found_instr_diff.append(i)
    281         map_search_diff.append(search_map(i))
    282 if diffs == 0:
    283     print("No differences!")
    284     if not args.by_name:
    285         exit()
    286 definite_shift = diffs > shift_cap
    287 if not definite_shift:
    288     print(str(diffs) + " differing word(s).")
    289 
    290 if diffs > 100:
    291     if len(found_instr_diff) > 0:
    292         for i in found_instr_diff:
    293             print(f"Instruction difference at ROM addr {hex(i)}, {search_map(i)}")
    294             print(
    295                 f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}"
    296             )
    297     if version == "sh":
    298         print("Shifted ROM, as expected.")
    299     else:
    300         if not os.path.isfile(basemap):
    301             if definite_shift:
    302                 print("Tons of differences, must be a shifted ROM.")
    303             print(
    304                 "To find ROM shifts, copy a clean .map file to "
    305                 + basemap
    306                 + " and rerun this script."
    307             )
    308             exit()
    309 
    310         if not map_diff():
    311             print(f"No ROM shift{' (!?)' if definite_shift else ''}")
    312 if args.diff:
    313     diff_args = input("Call ./diff.py with which arguments? ") or "--"
    314     if diff_args[0] != "-":
    315         diff_args = "-" + diff_args
    316     if "w" in diff_args and args.make:
    317         diff_args += "m"  # To avoid warnings when passing -w, also pass -m as long as -m was passed to first-diff itself
    318 
    319     check_call(
    320         [
    321             "python3",
    322             "diff.py",
    323             f"-{version[0]}",
    324             diff_args,
    325             search_map(found_instr_diff[0]).split()[1],
    326         ]
    327     )