diff.py - sm64 - A Super Mario 64 decompilation

diff.py (48905B)
      1 #!/usr/bin/env python3
      2 # PYTHON_ARGCOMPLETE_OK
      3 import argparse
      4 import sys
      5 from typing import (
      6     Any,
      7     Dict,
      8     List,
      9     Match,
     10     NamedTuple,
     11     NoReturn,
     12     Optional,
     13     Set,
     14     Tuple,
     15     Union,
     16     Callable,
     17     Pattern,
     18 )
     19 
     20 
     21 def fail(msg: str) -> NoReturn:
     22     print(msg, file=sys.stderr)
     23     sys.exit(1)
     24 
     25 
     26 # Prefer to use diff_settings.py from the current working directory
     27 sys.path.insert(0, ".")
     28 try:
     29     import diff_settings
     30 except ModuleNotFoundError:
     31     fail("Unable to find diff_settings.py in the same directory.")
     32 sys.path.pop(0)
     33 
     34 # ==== COMMAND-LINE ====
     35 
     36 try:
     37     import argcomplete  # type: ignore
     38 except ModuleNotFoundError:
     39     argcomplete = None
     40 
     41 parser = argparse.ArgumentParser(description="Diff MIPS or AArch64 assembly.")
     42 
     43 start_argument = parser.add_argument(
     44     "start",
     45     help="Function name or address to start diffing from.",
     46 )
     47 
     48 if argcomplete:
     49 
     50     def complete_symbol(
     51         prefix: str, parsed_args: argparse.Namespace, **kwargs: object
     52     ) -> List[str]:
     53         if not prefix or prefix.startswith("-"):
     54             # skip reading the map file, which would
     55             # result in a lot of useless completions
     56             return []
     57         config: Dict[str, Any] = {}
     58         diff_settings.apply(config, parsed_args)  # type: ignore
     59         mapfile = config.get("mapfile")
     60         if not mapfile:
     61             return []
     62         completes = []
     63         with open(mapfile) as f:
     64             data = f.read()
     65             # assume symbols are prefixed by a space character
     66             search = f" {prefix}"
     67             pos = data.find(search)
     68             while pos != -1:
     69                 # skip the space character in the search string
     70                 pos += 1
     71                 # assume symbols are suffixed by either a space
     72                 # character or a (unix-style) line return
     73                 spacePos = data.find(" ", pos)
     74                 lineReturnPos = data.find("\n", pos)
     75                 if lineReturnPos == -1:
     76                     endPos = spacePos
     77                 elif spacePos == -1:
     78                     endPos = lineReturnPos
     79                 else:
     80                     endPos = min(spacePos, lineReturnPos)
     81                 if endPos == -1:
     82                     match = data[pos:]
     83                     pos = -1
     84                 else:
     85                     match = data[pos:endPos]
     86                     pos = data.find(search, endPos)
     87                 completes.append(match)
     88         return completes
     89 
     90     setattr(start_argument, "completer", complete_symbol)
     91 
     92 parser.add_argument(
     93     "end",
     94     nargs="?",
     95     help="Address to end diff at.",
     96 )
     97 parser.add_argument(
     98     "-o",
     99     dest="diff_obj",
    100     action="store_true",
    101     help="Diff .o files rather than a whole binary. This makes it possible to "
    102     "see symbol names. (Recommended)",
    103 )
    104 parser.add_argument(
    105     "--elf",
    106     dest="diff_elf_symbol",
    107     metavar="SYMBOL",
    108     help="Diff a given function in two ELFs, one being stripped and the other "
    109     "one non-stripped. Requires objdump from binutils 2.33+.",
    110 )
    111 parser.add_argument(
    112     "--source",
    113     action="store_true",
    114     help="Show source code (if possible). Only works with -o and -e.",
    115 )
    116 parser.add_argument(
    117     "--inlines",
    118     action="store_true",
    119     help="Show inline function calls (if possible). Only works with -o and -e.",
    120 )
    121 parser.add_argument(
    122     "--base-asm",
    123     dest="base_asm",
    124     metavar="FILE",
    125     help="Read assembly from given file instead of configured base img.",
    126 )
    127 parser.add_argument(
    128     "--write-asm",
    129     dest="write_asm",
    130     metavar="FILE",
    131     help="Write the current assembly output to file, e.g. for use with --base-asm.",
    132 )
    133 parser.add_argument(
    134     "-m",
    135     "--make",
    136     dest="make",
    137     action="store_true",
    138     help="Automatically run 'make' on the .o file or binary before diffing.",
    139 )
    140 parser.add_argument(
    141     "-l",
    142     "--skip-lines",
    143     dest="skip_lines",
    144     type=int,
    145     default=0,
    146     metavar="LINES",
    147     help="Skip the first N lines of output.",
    148 )
    149 parser.add_argument(
    150     "-f",
    151     "--stop-jr-ra",
    152     dest="stop_jrra",
    153     action="store_true",
    154     help="Stop disassembling at the first 'jr ra'. Some functions have multiple return points, so use with care!",
    155 )
    156 parser.add_argument(
    157     "-i",
    158     "--ignore-large-imms",
    159     dest="ignore_large_imms",
    160     action="store_true",
    161     help="Pretend all large enough immediates are the same.",
    162 )
    163 parser.add_argument(
    164     "-I",
    165     "--ignore-addr-diffs",
    166     action="store_true",
    167     help="Ignore address differences. Currently only affects AArch64.",
    168 )
    169 parser.add_argument(
    170     "-B",
    171     "--no-show-branches",
    172     dest="show_branches",
    173     action="store_false",
    174     help="Don't visualize branches/branch targets.",
    175 )
    176 parser.add_argument(
    177     "-S",
    178     "--base-shift",
    179     dest="base_shift",
    180     type=str,
    181     default="0",
    182     help="Diff position X in our img against position X + shift in the base img. "
    183     'Arithmetic is allowed, so e.g. |-S "0x1234 - 0x4321"| is a reasonable '
    184     "flag to pass if it is known that position 0x1234 in the base img syncs "
    185     "up with position 0x4321 in our img. Not supported together with -o.",
    186 )
    187 parser.add_argument(
    188     "-w",
    189     "--watch",
    190     dest="watch",
    191     action="store_true",
    192     help="Automatically update when source/object files change. "
    193     "Recommended in combination with -m.",
    194 )
    195 parser.add_argument(
    196     "-3",
    197     "--threeway=prev",
    198     dest="threeway",
    199     action="store_const",
    200     const="prev",
    201     help="Show a three-way diff between target asm, current asm, and asm "
    202     "prior to -w rebuild. Requires -w.",
    203 )
    204 parser.add_argument(
    205     "-b",
    206     "--threeway=base",
    207     dest="threeway",
    208     action="store_const",
    209     const="base",
    210     help="Show a three-way diff between target asm, current asm, and asm "
    211     "when diff.py was started. Requires -w.",
    212 )
    213 parser.add_argument(
    214     "--width",
    215     dest="column_width",
    216     type=int,
    217     default=50,
    218     help="Sets the width of the left and right view column.",
    219 )
    220 parser.add_argument(
    221     "--algorithm",
    222     dest="algorithm",
    223     default="levenshtein",
    224     choices=["levenshtein", "difflib"],
    225     help="Diff algorithm to use. Levenshtein gives the minimum diff, while difflib "
    226     "aims for long sections of equal opcodes. Defaults to %(default)s.",
    227 )
    228 parser.add_argument(
    229     "--max-size",
    230     "--max-lines",
    231     dest="max_lines",
    232     type=int,
    233     default=1024,
    234     help="The maximum length of the diff, in lines.",
    235 )
    236 
    237 # Project-specific flags, e.g. different versions/make arguments.
    238 add_custom_arguments_fn = getattr(diff_settings, "add_custom_arguments", None)
    239 if add_custom_arguments_fn:
    240     add_custom_arguments_fn(parser)
    241 
    242 if argcomplete:
    243     argcomplete.autocomplete(parser)
    244 
    245 # ==== IMPORTS ====
    246 
    247 # (We do imports late to optimize auto-complete performance.)
    248 
    249 import re
    250 import os
    251 import ast
    252 import subprocess
    253 import difflib
    254 import string
    255 import itertools
    256 import threading
    257 import queue
    258 import time
    259 
    260 
    261 MISSING_PREREQUISITES = (
    262     "Missing prerequisite python module {}. "
    263     "Run `python3 -m pip install --user colorama ansiwrap watchdog python-Levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
    264 )
    265 
    266 try:
    267     from colorama import Fore, Style, Back  # type: ignore
    268     import ansiwrap  # type: ignore
    269     import watchdog  # type: ignore
    270 except ModuleNotFoundError as e:
    271     fail(MISSING_PREREQUISITES.format(e.name))
    272 
    273 # ==== CONFIG ====
    274 
    275 args = parser.parse_args()
    276 
    277 # Set imgs, map file and make flags in a project-specific manner.
    278 config: Dict[str, Any] = {}
    279 diff_settings.apply(config, args)  # type: ignore
    280 
    281 arch: str = config.get("arch", "mips")
    282 baseimg: Optional[str] = config.get("baseimg")
    283 myimg: Optional[str] = config.get("myimg")
    284 mapfile: Optional[str] = config.get("mapfile")
    285 makeflags: List[str] = config.get("makeflags", [])
    286 source_directories: Optional[List[str]] = config.get("source_directories")
    287 objdump_executable: Optional[str] = config.get("objdump_executable")
    288 
    289 MAX_FUNCTION_SIZE_LINES: int = args.max_lines
    290 MAX_FUNCTION_SIZE_BYTES: int = MAX_FUNCTION_SIZE_LINES * 4
    291 
    292 COLOR_ROTATION: List[str] = [
    293     Fore.MAGENTA,
    294     Fore.CYAN,
    295     Fore.GREEN,
    296     Fore.RED,
    297     Fore.LIGHTYELLOW_EX,
    298     Fore.LIGHTMAGENTA_EX,
    299     Fore.LIGHTCYAN_EX,
    300     Fore.LIGHTGREEN_EX,
    301     Fore.LIGHTBLACK_EX,
    302 ]
    303 
    304 BUFFER_CMD: List[str] = ["tail", "-c", str(10 ** 9)]
    305 LESS_CMD: List[str] = ["less", "-SRic", "-#6"]
    306 
    307 DEBOUNCE_DELAY: float = 0.1
    308 FS_WATCH_EXTENSIONS: List[str] = [".c", ".h", ".s"]
    309 
    310 # ==== LOGIC ====
    311 
    312 ObjdumpCommand = Tuple[List[str], str, Optional[str]]
    313 
    314 if args.algorithm == "levenshtein":
    315     try:
    316         import Levenshtein  # type: ignore
    317     except ModuleNotFoundError as e:
    318         fail(MISSING_PREREQUISITES.format(e.name))
    319 
    320 if args.source:
    321     try:
    322         import cxxfilt  # type: ignore
    323     except ModuleNotFoundError as e:
    324         fail(MISSING_PREREQUISITES.format(e.name))
    325 
    326 if args.threeway and not args.watch:
    327     fail("Threeway diffing requires -w.")
    328 
    329 if objdump_executable is None:
    330     for objdump_cand in ["mips-linux-gnu-objdump", "mips64-elf-objdump"]:
    331         try:
    332             subprocess.check_call(
    333                 [objdump_cand, "--version"],
    334                 stdout=subprocess.DEVNULL,
    335                 stderr=subprocess.DEVNULL,
    336             )
    337             objdump_executable = objdump_cand
    338             break
    339         except subprocess.CalledProcessError:
    340             pass
    341         except FileNotFoundError:
    342             pass
    343 
    344 if not objdump_executable:
    345     fail(
    346         "Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist, or configure objdump_executable."
    347     )
    348 
    349 
    350 def maybe_eval_int(expr: str) -> Optional[int]:
    351     try:
    352         ret = ast.literal_eval(expr)
    353         if not isinstance(ret, int):
    354             raise Exception("not an integer")
    355         return ret
    356     except Exception:
    357         return None
    358 
    359 
    360 def eval_int(expr: str, emsg: str) -> int:
    361     ret = maybe_eval_int(expr)
    362     if ret is None:
    363         fail(emsg)
    364     return ret
    365 
    366 
    367 def eval_line_num(expr: str) -> int:
    368     return int(expr.strip().replace(":", ""), 16)
    369 
    370 
    371 def run_make(target: str) -> None:
    372     subprocess.check_call(["make"] + makeflags + [target])
    373 
    374 
    375 def run_make_capture_output(target: str) -> "subprocess.CompletedProcess[bytes]":
    376     return subprocess.run(
    377         ["make"] + makeflags + [target],
    378         stderr=subprocess.PIPE,
    379         stdout=subprocess.PIPE,
    380     )
    381 
    382 
    383 def restrict_to_function(dump: str, fn_name: str) -> str:
    384     out: List[str] = []
    385     search = f"<{fn_name}>:"
    386     found = False
    387     for line in dump.split("\n"):
    388         if found:
    389             if len(out) >= MAX_FUNCTION_SIZE_LINES:
    390                 break
    391             out.append(line)
    392         elif search in line:
    393             found = True
    394     return "\n".join(out)
    395 
    396 
    397 def maybe_get_objdump_source_flags() -> List[str]:
    398     if not args.source:
    399         return []
    400 
    401     flags = [
    402         "--source",
    403         "--source-comment=│ ",
    404         "-l",
    405     ]
    406 
    407     if args.inlines:
    408         flags.append("--inlines")
    409 
    410     return flags
    411 
    412 
    413 def run_objdump(cmd: ObjdumpCommand) -> str:
    414     flags, target, restrict = cmd
    415     assert objdump_executable, "checked previously"
    416     out = subprocess.check_output(
    417         [objdump_executable] + arch_flags + flags + [target], universal_newlines=True
    418     )
    419     if restrict is not None:
    420         return restrict_to_function(out, restrict)
    421     return out
    422 
    423 
    424 base_shift: int = eval_int(
    425     args.base_shift, "Failed to parse --base-shift (-S) argument as an integer."
    426 )
    427 
    428 
    429 def search_map_file(fn_name: str) -> Tuple[Optional[str], Optional[int]]:
    430     if not mapfile:
    431         fail(f"No map file configured; cannot find function {fn_name}.")
    432 
    433     try:
    434         with open(mapfile) as f:
    435             lines = f.read().split("\n")
    436     except Exception:
    437         fail(f"Failed to open map file {mapfile} for reading.")
    438 
    439     try:
    440         cur_objfile = None
    441         ram_to_rom = None
    442         cands = []
    443         last_line = ""
    444         for line in lines:
    445             if line.startswith(" .text"):
    446                 cur_objfile = line.split()[3]
    447             if "load address" in line:
    448                 tokens = last_line.split() + line.split()
    449                 ram = int(tokens[1], 0)
    450                 rom = int(tokens[5], 0)
    451                 ram_to_rom = rom - ram
    452             if line.endswith(" " + fn_name):
    453                 ram = int(line.split()[0], 0)
    454                 if cur_objfile is not None and ram_to_rom is not None:
    455                     cands.append((cur_objfile, ram + ram_to_rom))
    456             last_line = line
    457     except Exception as e:
    458         import traceback
    459 
    460         traceback.print_exc()
    461         fail(f"Internal error while parsing map file")
    462 
    463     if len(cands) > 1:
    464         fail(f"Found multiple occurrences of function {fn_name} in map file.")
    465     if len(cands) == 1:
    466         return cands[0]
    467     return None, None
    468 
    469 
    470 def dump_elf() -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
    471     if not baseimg or not myimg:
    472         fail("Missing myimg/baseimg in config.")
    473     if base_shift:
    474         fail("--base-shift not compatible with -e")
    475 
    476     start_addr = eval_int(args.start, "Start address must be an integer expression.")
    477 
    478     if args.end is not None:
    479         end_addr = eval_int(args.end, "End address must be an integer expression.")
    480     else:
    481         end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
    482 
    483     flags1 = [
    484         f"--start-address={start_addr}",
    485         f"--stop-address={end_addr}",
    486     ]
    487 
    488     flags2 = [
    489         f"--disassemble={args.diff_elf_symbol}",
    490     ]
    491 
    492     objdump_flags = ["-drz", "-j", ".text"]
    493     return (
    494         myimg,
    495         (objdump_flags + flags1, baseimg, None),
    496         (objdump_flags + flags2 + maybe_get_objdump_source_flags(), myimg, None),
    497     )
    498 
    499 
    500 def dump_objfile() -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
    501     if base_shift:
    502         fail("--base-shift not compatible with -o")
    503     if args.end is not None:
    504         fail("end address not supported together with -o")
    505     if args.start.startswith("0"):
    506         fail("numerical start address not supported with -o; pass a function name")
    507 
    508     objfile, _ = search_map_file(args.start)
    509     if not objfile:
    510         fail("Not able to find .o file for function.")
    511 
    512     if args.make:
    513         run_make(objfile)
    514 
    515     if not os.path.isfile(objfile):
    516         fail(f"Not able to find .o file for function: {objfile} is not a file.")
    517 
    518     refobjfile = "expected/" + objfile
    519     if not os.path.isfile(refobjfile):
    520         fail(f'Please ensure an OK .o file exists at "{refobjfile}".')
    521 
    522     objdump_flags = ["-drz"]
    523     return (
    524         objfile,
    525         (objdump_flags, refobjfile, args.start),
    526         (objdump_flags + maybe_get_objdump_source_flags(), objfile, args.start),
    527     )
    528 
    529 
    530 def dump_binary() -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
    531     if not baseimg or not myimg:
    532         fail("Missing myimg/baseimg in config.")
    533     if args.make:
    534         run_make(myimg)
    535     start_addr = maybe_eval_int(args.start)
    536     if start_addr is None:
    537         _, start_addr = search_map_file(args.start)
    538         if start_addr is None:
    539             fail("Not able to find function in map file.")
    540     if args.end is not None:
    541         end_addr = eval_int(args.end, "End address must be an integer expression.")
    542     else:
    543         end_addr = start_addr + MAX_FUNCTION_SIZE_BYTES
    544     objdump_flags = ["-Dz", "-bbinary", "-EB"]
    545     flags1 = [
    546         f"--start-address={start_addr + base_shift}",
    547         f"--stop-address={end_addr + base_shift}",
    548     ]
    549     flags2 = [f"--start-address={start_addr}", f"--stop-address={end_addr}"]
    550     return (
    551         myimg,
    552         (objdump_flags + flags1, baseimg, None),
    553         (objdump_flags + flags2, myimg, None),
    554     )
    555 
    556 
    557 def ansi_ljust(s: str, width: int) -> str:
    558     """Like s.ljust(width), but accounting for ANSI colors."""
    559     needed: int = width - ansiwrap.ansilen(s)
    560     if needed > 0:
    561         return s + " " * needed
    562     else:
    563         return s
    564 
    565 
    566 if arch == "mips":
    567     re_int = re.compile(r"[0-9]+")
    568     re_comment = re.compile(r"<.*?>")
    569     re_reg = re.compile(
    570         r"\$?\b(a[0-3]|t[0-9]|s[0-8]|at|v[01]|f[12]?[0-9]|f3[01]|k[01]|fp|ra|zero)\b"
    571     )
    572     re_sprel = re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)")
    573     re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
    574     re_imm = re.compile(r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi)\([^)]*\)")
    575     forbidden = set(string.ascii_letters + "_")
    576     arch_flags = ["-m", "mips:4300"]
    577     branch_likely_instructions = {
    578         "beql",
    579         "bnel",
    580         "beqzl",
    581         "bnezl",
    582         "bgezl",
    583         "bgtzl",
    584         "blezl",
    585         "bltzl",
    586         "bc1tl",
    587         "bc1fl",
    588     }
    589     branch_instructions = branch_likely_instructions.union(
    590         {
    591             "b",
    592             "beq",
    593             "bne",
    594             "beqz",
    595             "bnez",
    596             "bgez",
    597             "bgtz",
    598             "blez",
    599             "bltz",
    600             "bc1t",
    601             "bc1f",
    602         }
    603     )
    604     instructions_with_address_immediates = branch_instructions.union({"jal", "j"})
    605 elif arch == "aarch64":
    606     re_int = re.compile(r"[0-9]+")
    607     re_comment = re.compile(r"(<.*?>|//.*$)")
    608     # GPRs and FP registers: X0-X30, W0-W30, [DSHQ]0..31
    609     # The zero registers and SP should not be in this list.
    610     re_reg = re.compile(r"\$?\b([dshq][12]?[0-9]|[dshq]3[01]|[xw][12]?[0-9]|[xw]30)\b")
    611     re_sprel = re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b")
    612     re_large_imm = re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}")
    613     re_imm = re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b")
    614     arch_flags = []
    615     forbidden = set(string.ascii_letters + "_")
    616     branch_likely_instructions = set()
    617     branch_instructions = {
    618         "bl",
    619         "b",
    620         "b.eq",
    621         "b.ne",
    622         "b.cs",
    623         "b.hs",
    624         "b.cc",
    625         "b.lo",
    626         "b.mi",
    627         "b.pl",
    628         "b.vs",
    629         "b.vc",
    630         "b.hi",
    631         "b.ls",
    632         "b.ge",
    633         "b.lt",
    634         "b.gt",
    635         "b.le",
    636         "cbz",
    637         "cbnz",
    638         "tbz",
    639         "tbnz",
    640     }
    641     instructions_with_address_immediates = branch_instructions.union({"adrp"})
    642 else:
    643     fail("Unknown architecture.")
    644 
    645 
    646 def hexify_int(row: str, pat: Match[str]) -> str:
    647     full = pat.group(0)
    648     if len(full) <= 1:
    649         # leave one-digit ints alone
    650         return full
    651     start, end = pat.span()
    652     if start and row[start - 1] in forbidden:
    653         return full
    654     if end < len(row) and row[end] in forbidden:
    655         return full
    656     return hex(int(full))
    657 
    658 
    659 def parse_relocated_line(line: str) -> Tuple[str, str, str]:
    660     try:
    661         ind2 = line.rindex(",")
    662     except ValueError:
    663         ind2 = line.rindex("\t")
    664     before = line[: ind2 + 1]
    665     after = line[ind2 + 1 :]
    666     ind2 = after.find("(")
    667     if ind2 == -1:
    668         imm, after = after, ""
    669     else:
    670         imm, after = after[:ind2], after[ind2:]
    671     if imm == "0x0":
    672         imm = "0"
    673     return before, imm, after
    674 
    675 
    676 def process_mips_reloc(row: str, prev: str) -> str:
    677     before, imm, after = parse_relocated_line(prev)
    678     repl = row.split()[-1]
    679     if imm != "0":
    680         # MIPS uses relocations with addends embedded in the code as immediates.
    681         # If there is an immediate, show it as part of the relocation. Ideally
    682         # we'd show this addend in both %lo/%hi, but annoyingly objdump's output
    683         # doesn't include enough information to pair up %lo's and %hi's...
    684         # TODO: handle unambiguous cases where all addends for a symbol are the
    685         # same, or show "+???".
    686         mnemonic = prev.split()[0]
    687         if mnemonic in instructions_with_address_immediates and not imm.startswith(
    688             "0x"
    689         ):
    690             imm = "0x" + imm
    691         repl += "+" + imm if int(imm, 0) > 0 else imm
    692     if "R_MIPS_LO16" in row:
    693         repl = f"%lo({repl})"
    694     elif "R_MIPS_HI16" in row:
    695         # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a
    696         # correct addend for each, but objdump doesn't give us the order of
    697         # the relocations, so we can't find the right LO16. :(
    698         repl = f"%hi({repl})"
    699     elif "R_MIPS_26" in row:
    700         # Function calls
    701         pass
    702     elif "R_MIPS_PC16" in row:
    703         # Branch to glabel. This gives confusing output, but there's not much
    704         # we can do here.
    705         pass
    706     else:
    707         assert False, f"unknown relocation type '{row}' for line '{prev}'"
    708     return before + repl + after
    709 
    710 
    711 def pad_mnemonic(line: str) -> str:
    712     if "\t" not in line:
    713         return line
    714     mn, args = line.split("\t", 1)
    715     return f"{mn:<7s} {args}"
    716 
    717 
    718 class Line(NamedTuple):
    719     mnemonic: str
    720     diff_row: str
    721     original: str
    722     normalized_original: str
    723     line_num: str
    724     branch_target: Optional[str]
    725     source_lines: List[str]
    726     comment: Optional[str]
    727 
    728 
    729 class DifferenceNormalizer:
    730     def normalize(self, mnemonic: str, row: str) -> str:
    731         """This should be called exactly once for each line."""
    732         row = self._normalize_arch_specific(mnemonic, row)
    733         if args.ignore_large_imms:
    734             row = re.sub(re_large_imm, "<imm>", row)
    735         return row
    736 
    737     def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
    738         return row
    739 
    740 
    741 class DifferenceNormalizerAArch64(DifferenceNormalizer):
    742     def __init__(self) -> None:
    743         super().__init__()
    744         self._adrp_pair_registers: Set[str] = set()
    745 
    746     def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
    747         if args.ignore_addr_diffs:
    748             row = self._normalize_adrp_differences(mnemonic, row)
    749             row = self._normalize_bl(mnemonic, row)
    750         return row
    751 
    752     def _normalize_bl(self, mnemonic: str, row: str) -> str:
    753         if mnemonic != "bl":
    754             return row
    755 
    756         row, _ = split_off_branch(row)
    757         return row
    758 
    759     def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str:
    760         """Identifies ADRP + LDR/ADD pairs that are used to access the GOT and
    761         suppresses any immediate differences.
    762 
    763         Whenever an ADRP is seen, the destination register is added to the set of registers
    764         that are part of an ADRP + LDR/ADD pair. Registers are removed from the set as soon
    765         as they are used for an LDR or ADD instruction which completes the pair.
    766 
    767         This method is somewhat crude but should manage to detect most such pairs.
    768         """
    769         row_parts = row.split("\t", 1)
    770         if mnemonic == "adrp":
    771             self._adrp_pair_registers.add(row_parts[1].strip().split(",")[0])
    772             row, _ = split_off_branch(row)
    773         elif mnemonic == "ldr":
    774             for reg in self._adrp_pair_registers:
    775                 # ldr xxx, [reg]
    776                 # ldr xxx, [reg, <imm>]
    777                 if f", [{reg}" in row_parts[1]:
    778                     self._adrp_pair_registers.remove(reg)
    779                     return normalize_imms(row)
    780         elif mnemonic == "add":
    781             for reg in self._adrp_pair_registers:
    782                 # add reg, reg, <imm>
    783                 if row_parts[1].startswith(f"{reg}, {reg}, "):
    784                     self._adrp_pair_registers.remove(reg)
    785                     return normalize_imms(row)
    786 
    787         return row
    788 
    789 
    790 def make_difference_normalizer() -> DifferenceNormalizer:
    791     if arch == "aarch64":
    792         return DifferenceNormalizerAArch64()
    793     return DifferenceNormalizer()
    794 
    795 
    796 def process(lines: List[str]) -> List[Line]:
    797     normalizer = make_difference_normalizer()
    798     skip_next = False
    799     source_lines = []
    800     if not args.diff_obj:
    801         lines = lines[7:]
    802         if lines and not lines[-1]:
    803             lines.pop()
    804 
    805     output: List[Line] = []
    806     stop_after_delay_slot = False
    807     for row in lines:
    808         if args.diff_obj and (">:" in row or not row):
    809             continue
    810 
    811         if args.source and (row and row[0] != " "):
    812             source_lines.append(row)
    813             continue
    814 
    815         if "R_AARCH64_" in row:
    816             # TODO: handle relocation
    817             continue
    818 
    819         if "R_MIPS_" in row:
    820             # N.B. Don't transform the diff rows, they already ignore immediates
    821             # if output[-1].diff_row != "<delay-slot>":
    822             # output[-1] = output[-1].replace(diff_row=process_mips_reloc(row, output[-1].row_with_imm))
    823             new_original = process_mips_reloc(row, output[-1].original)
    824             output[-1] = output[-1]._replace(original=new_original)
    825             continue
    826 
    827         m_comment = re.search(re_comment, row)
    828         comment = m_comment[0] if m_comment else None
    829         row = re.sub(re_comment, "", row)
    830         row = row.rstrip()
    831         tabs = row.split("\t")
    832         row = "\t".join(tabs[2:])
    833         line_num = tabs[0].strip()
    834         row_parts = row.split("\t", 1)
    835         mnemonic = row_parts[0].strip()
    836         if mnemonic not in instructions_with_address_immediates:
    837             row = re.sub(re_int, lambda m: hexify_int(row, m), row)
    838         original = row
    839         normalized_original = normalizer.normalize(mnemonic, original)
    840         if skip_next:
    841             skip_next = False
    842             row = "<delay-slot>"
    843             mnemonic = "<delay-slot>"
    844         if mnemonic in branch_likely_instructions:
    845             skip_next = True
    846         row = re.sub(re_reg, "<reg>", row)
    847         row = re.sub(re_sprel, "addr(sp)", row)
    848         row_with_imm = row
    849         if mnemonic in instructions_with_address_immediates:
    850             row = row.strip()
    851             row, _ = split_off_branch(row)
    852             row += "<imm>"
    853         else:
    854             row = normalize_imms(row)
    855 
    856         branch_target = None
    857         if mnemonic in branch_instructions:
    858             target = row_parts[1].strip().split(",")[-1]
    859             if mnemonic in branch_likely_instructions:
    860                 target = hex(int(target, 16) - 4)[2:]
    861             branch_target = target.strip()
    862 
    863         output.append(
    864             Line(
    865                 mnemonic=mnemonic,
    866                 diff_row=row,
    867                 original=original,
    868                 normalized_original=normalized_original,
    869                 line_num=line_num,
    870                 branch_target=branch_target,
    871                 source_lines=source_lines,
    872                 comment=comment,
    873             )
    874         )
    875         source_lines = []
    876 
    877         if args.stop_jrra and mnemonic == "jr" and row_parts[1].strip() == "ra":
    878             stop_after_delay_slot = True
    879         elif stop_after_delay_slot:
    880             break
    881 
    882     return output
    883 
    884 
    885 def format_single_line_diff(line1: str, line2: str, column_width: int) -> str:
    886     return ansi_ljust(line1, column_width) + line2
    887 
    888 
    889 class SymbolColorer:
    890     symbol_colors: Dict[str, str]
    891 
    892     def __init__(self, base_index: int) -> None:
    893         self.color_index = base_index
    894         self.symbol_colors = {}
    895 
    896     def color_symbol(self, s: str, t: Optional[str] = None) -> str:
    897         try:
    898             color = self.symbol_colors[s]
    899         except:
    900             color = COLOR_ROTATION[self.color_index % len(COLOR_ROTATION)]
    901             self.color_index += 1
    902             self.symbol_colors[s] = color
    903         t = t or s
    904         return f"{color}{t}{Fore.RESET}"
    905 
    906 
    907 def normalize_imms(row: str) -> str:
    908     return re.sub(re_imm, "<imm>", row)
    909 
    910 
    911 def normalize_stack(row: str) -> str:
    912     return re.sub(re_sprel, "addr(sp)", row)
    913 
    914 
    915 def split_off_branch(line: str) -> Tuple[str, str]:
    916     parts = line.split(",")
    917     if len(parts) < 2:
    918         parts = line.split(None, 1)
    919     off = len(line) - len(parts[-1])
    920     return line[:off], line[off:]
    921 
    922 
    923 ColorFunction = Callable[[str], str]
    924 
    925 
    926 def color_fields(
    927     pat: Pattern[str],
    928     out1: str,
    929     out2: str,
    930     color1: ColorFunction,
    931     color2: Optional[ColorFunction] = None,
    932 ) -> Tuple[str, str]:
    933     diffs = [
    934         of.group() != nf.group()
    935         for (of, nf) in zip(pat.finditer(out1), pat.finditer(out2))
    936     ]
    937 
    938     it = iter(diffs)
    939 
    940     def maybe_color(color: ColorFunction, s: str) -> str:
    941         return color(s) if next(it, False) else f"{Style.RESET_ALL}{s}"
    942 
    943     out1 = pat.sub(lambda m: maybe_color(color1, m.group()), out1)
    944     it = iter(diffs)
    945     out2 = pat.sub(lambda m: maybe_color(color2 or color1, m.group()), out2)
    946 
    947     return out1, out2
    948 
    949 
    950 def color_branch_imms(br1: str, br2: str) -> Tuple[str, str]:
    951     if br1 != br2:
    952         br1 = f"{Fore.LIGHTBLUE_EX}{br1}{Style.RESET_ALL}"
    953         br2 = f"{Fore.LIGHTBLUE_EX}{br2}{Style.RESET_ALL}"
    954     return br1, br2
    955 
    956 
    957 def diff_sequences_difflib(
    958     seq1: List[str], seq2: List[str]
    959 ) -> List[Tuple[str, int, int, int, int]]:
    960     differ = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False)
    961     return differ.get_opcodes()
    962 
    963 
    964 def diff_sequences(
    965     seq1: List[str], seq2: List[str]
    966 ) -> List[Tuple[str, int, int, int, int]]:
    967     if (
    968         args.algorithm != "levenshtein"
    969         or len(seq1) * len(seq2) > 4 * 10 ** 8
    970         or len(seq1) + len(seq2) >= 0x110000
    971     ):
    972         return diff_sequences_difflib(seq1, seq2)
    973 
    974     # The Levenshtein library assumes that we compare strings, not lists. Convert.
    975     # (Per the check above we know we have fewer than 0x110000 unique elements, so chr() works.)
    976     remapping: Dict[str, str] = {}
    977 
    978     def remap(seq: List[str]) -> str:
    979         seq = seq[:]
    980         for i in range(len(seq)):
    981             val = remapping.get(seq[i])
    982             if val is None:
    983                 val = chr(len(remapping))
    984                 remapping[seq[i]] = val
    985             seq[i] = val
    986         return "".join(seq)
    987 
    988     rem1 = remap(seq1)
    989     rem2 = remap(seq2)
    990     return Levenshtein.opcodes(rem1, rem2)  # type: ignore
    991 
    992 
    993 def diff_lines(
    994     lines1: List[Line],
    995     lines2: List[Line],
    996 ) -> List[Tuple[Optional[Line], Optional[Line]]]:
    997     ret = []
    998     for (tag, i1, i2, j1, j2) in diff_sequences(
    999         [line.mnemonic for line in lines1],
   1000         [line.mnemonic for line in lines2],
   1001     ):
   1002         for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
   1003             if tag == "replace":
   1004                 if line1 is None:
   1005                     tag = "insert"
   1006                 elif line2 is None:
   1007                     tag = "delete"
   1008             elif tag == "insert":
   1009                 assert line1 is None
   1010             elif tag == "delete":
   1011                 assert line2 is None
   1012             ret.append((line1, line2))
   1013 
   1014     return ret
   1015 
   1016 
   1017 class OutputLine:
   1018     base: Optional[str]
   1019     fmt2: str
   1020     key2: Optional[str]
   1021 
   1022     def __init__(self, base: Optional[str], fmt2: str, key2: Optional[str]) -> None:
   1023         self.base = base
   1024         self.fmt2 = fmt2
   1025         self.key2 = key2
   1026 
   1027     def __eq__(self, other: object) -> bool:
   1028         if not isinstance(other, OutputLine):
   1029             return NotImplemented
   1030         return self.key2 == other.key2
   1031 
   1032     def __hash__(self) -> int:
   1033         return hash(self.key2)
   1034 
   1035 
   1036 def do_diff(basedump: str, mydump: str) -> List[OutputLine]:
   1037     output: List[OutputLine] = []
   1038 
   1039     lines1 = process(basedump.split("\n"))
   1040     lines2 = process(mydump.split("\n"))
   1041 
   1042     sc1 = SymbolColorer(0)
   1043     sc2 = SymbolColorer(0)
   1044     sc3 = SymbolColorer(4)
   1045     sc4 = SymbolColorer(4)
   1046     sc5 = SymbolColorer(0)
   1047     sc6 = SymbolColorer(0)
   1048     bts1: Set[str] = set()
   1049     bts2: Set[str] = set()
   1050 
   1051     if args.show_branches:
   1052         for (lines, btset, sc) in [
   1053             (lines1, bts1, sc5),
   1054             (lines2, bts2, sc6),
   1055         ]:
   1056             for line in lines:
   1057                 bt = line.branch_target
   1058                 if bt is not None:
   1059                     btset.add(bt + ":")
   1060                     sc.color_symbol(bt + ":")
   1061 
   1062     for (line1, line2) in diff_lines(lines1, lines2):
   1063         line_color1 = line_color2 = sym_color = Fore.RESET
   1064         line_prefix = " "
   1065         if line1 and line2 and line1.diff_row == line2.diff_row:
   1066             if line1.normalized_original == line2.normalized_original:
   1067                 out1 = line1.original
   1068                 out2 = line2.original
   1069             elif line1.diff_row == "<delay-slot>":
   1070                 out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}"
   1071                 out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}"
   1072             else:
   1073                 mnemonic = line1.original.split()[0]
   1074                 out1, out2 = line1.original, line2.original
   1075                 branch1 = branch2 = ""
   1076                 if mnemonic in instructions_with_address_immediates:
   1077                     out1, branch1 = split_off_branch(line1.original)
   1078                     out2, branch2 = split_off_branch(line2.original)
   1079                 branchless1 = out1
   1080                 branchless2 = out2
   1081                 out1, out2 = color_fields(
   1082                     re_imm,
   1083                     out1,
   1084                     out2,
   1085                     lambda s: f"{Fore.LIGHTBLUE_EX}{s}{Style.RESET_ALL}",
   1086                 )
   1087 
   1088                 same_relative_target = False
   1089                 if line1.branch_target is not None and line2.branch_target is not None:
   1090                     relative_target1 = eval_line_num(
   1091                         line1.branch_target
   1092                     ) - eval_line_num(line1.line_num)
   1093                     relative_target2 = eval_line_num(
   1094                         line2.branch_target
   1095                     ) - eval_line_num(line2.line_num)
   1096                     same_relative_target = relative_target1 == relative_target2
   1097 
   1098                 if not same_relative_target:
   1099                     branch1, branch2 = color_branch_imms(branch1, branch2)
   1100 
   1101                 out1 += branch1
   1102                 out2 += branch2
   1103                 if normalize_imms(branchless1) == normalize_imms(branchless2):
   1104                     if not same_relative_target:
   1105                         # only imms differences
   1106                         sym_color = Fore.LIGHTBLUE_EX
   1107                         line_prefix = "i"
   1108                 else:
   1109                     out1, out2 = color_fields(
   1110                         re_sprel, out1, out2, sc3.color_symbol, sc4.color_symbol
   1111                     )
   1112                     if normalize_stack(branchless1) == normalize_stack(branchless2):
   1113                         # only stack differences (luckily stack and imm
   1114                         # differences can't be combined in MIPS, so we
   1115                         # don't have to think about that case)
   1116                         sym_color = Fore.YELLOW
   1117                         line_prefix = "s"
   1118                     else:
   1119                         # regs differences and maybe imms as well
   1120                         out1, out2 = color_fields(
   1121                             re_reg, out1, out2, sc1.color_symbol, sc2.color_symbol
   1122                         )
   1123                         line_color1 = line_color2 = sym_color = Fore.YELLOW
   1124                         line_prefix = "r"
   1125         elif line1 and line2:
   1126             line_prefix = "|"
   1127             line_color1 = Fore.LIGHTBLUE_EX
   1128             line_color2 = Fore.LIGHTBLUE_EX
   1129             sym_color = Fore.LIGHTBLUE_EX
   1130             out1 = line1.original
   1131             out2 = line2.original
   1132         elif line1:
   1133             line_prefix = "<"
   1134             line_color1 = sym_color = Fore.RED
   1135             out1 = line1.original
   1136             out2 = ""
   1137         elif line2:
   1138             line_prefix = ">"
   1139             line_color2 = sym_color = Fore.GREEN
   1140             out1 = ""
   1141             out2 = line2.original
   1142 
   1143         if args.source and line2 and line2.comment:
   1144             out2 += f" {line2.comment}"
   1145 
   1146         def format_part(
   1147             out: str,
   1148             line: Optional[Line],
   1149             line_color: str,
   1150             btset: Set[str],
   1151             sc: SymbolColorer,
   1152         ) -> Optional[str]:
   1153             if line is None:
   1154                 return None
   1155             in_arrow = "  "
   1156             out_arrow = ""
   1157             if args.show_branches:
   1158                 if line.line_num in btset:
   1159                     in_arrow = sc.color_symbol(line.line_num, "~>") + line_color
   1160                 if line.branch_target is not None:
   1161                     out_arrow = " " + sc.color_symbol(line.branch_target + ":", "~>")
   1162             out = pad_mnemonic(out)
   1163             return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}"
   1164 
   1165         part1 = format_part(out1, line1, line_color1, bts1, sc5)
   1166         part2 = format_part(out2, line2, line_color2, bts2, sc6)
   1167         key2 = line2.original if line2 else None
   1168 
   1169         mid = f"{sym_color}{line_prefix}"
   1170 
   1171         if line2:
   1172             for source_line in line2.source_lines:
   1173                 color = Style.DIM
   1174                 # File names and function names
   1175                 if source_line and source_line[0] != "│":
   1176                     color += Style.BRIGHT
   1177                     # Function names
   1178                     if source_line.endswith("():"):
   1179                         # Underline. Colorama does not provide this feature, unfortunately.
   1180                         color += "\u001b[4m"
   1181                         try:
   1182                             source_line = cxxfilt.demangle(
   1183                                 source_line[:-3], external_only=False
   1184                             )
   1185                         except:
   1186                             pass
   1187                 output.append(
   1188                     OutputLine(
   1189                         None,
   1190                         f"  {color}{source_line}{Style.RESET_ALL}",
   1191                         source_line,
   1192                     )
   1193                 )
   1194 
   1195         fmt2 = mid + " " + (part2 or "")
   1196         output.append(OutputLine(part1, fmt2, key2))
   1197 
   1198     return output
   1199 
   1200 
   1201 def chunk_diff(diff: List[OutputLine]) -> List[Union[List[OutputLine], OutputLine]]:
   1202     cur_right: List[OutputLine] = []
   1203     chunks: List[Union[List[OutputLine], OutputLine]] = []
   1204     for output_line in diff:
   1205         if output_line.base is not None:
   1206             chunks.append(cur_right)
   1207             chunks.append(output_line)
   1208             cur_right = []
   1209         else:
   1210             cur_right.append(output_line)
   1211     chunks.append(cur_right)
   1212     return chunks
   1213 
   1214 
   1215 def format_diff(
   1216     old_diff: List[OutputLine], new_diff: List[OutputLine]
   1217 ) -> Tuple[str, List[str]]:
   1218     old_chunks = chunk_diff(old_diff)
   1219     new_chunks = chunk_diff(new_diff)
   1220     output: List[Tuple[str, OutputLine, OutputLine]] = []
   1221     assert len(old_chunks) == len(new_chunks), "same target"
   1222     empty = OutputLine("", "", None)
   1223     for old_chunk, new_chunk in zip(old_chunks, new_chunks):
   1224         if isinstance(old_chunk, list):
   1225             assert isinstance(new_chunk, list)
   1226             if not old_chunk and not new_chunk:
   1227                 # Most of the time lines sync up without insertions/deletions,
   1228                 # and there's no interdiffing to be done.
   1229                 continue
   1230             differ = difflib.SequenceMatcher(a=old_chunk, b=new_chunk, autojunk=False)
   1231             for (tag, i1, i2, j1, j2) in differ.get_opcodes():
   1232                 if tag in ["equal", "replace"]:
   1233                     for i, j in zip(range(i1, i2), range(j1, j2)):
   1234                         output.append(("", old_chunk[i], new_chunk[j]))
   1235                 if tag in ["insert", "replace"]:
   1236                     for j in range(j1 + i2 - i1, j2):
   1237                         output.append(("", empty, new_chunk[j]))
   1238                 if tag in ["delete", "replace"]:
   1239                     for i in range(i1 + j2 - j1, i2):
   1240                         output.append(("", old_chunk[i], empty))
   1241         else:
   1242             assert isinstance(new_chunk, OutputLine)
   1243             assert new_chunk.base
   1244             # old_chunk.base and new_chunk.base have the same text since
   1245             # both diffs are based on the same target, but they might
   1246             # differ in color. Use the new version.
   1247             output.append((new_chunk.base, old_chunk, new_chunk))
   1248 
   1249     # TODO: status line, with e.g. approximate permuter score?
   1250     width = args.column_width
   1251     if args.threeway:
   1252         header_line = "TARGET".ljust(width) + "  CURRENT".ljust(width) + "  PREVIOUS"
   1253         diff_lines = [
   1254             ansi_ljust(base, width)
   1255             + ansi_ljust(new.fmt2, width)
   1256             + (old.fmt2 or "-" if old != new else "")
   1257             for (base, old, new) in output
   1258         ]
   1259     else:
   1260         header_line = ""
   1261         diff_lines = [
   1262             ansi_ljust(base, width) + new.fmt2
   1263             for (base, old, new) in output
   1264             if base or new.key2 is not None
   1265         ]
   1266     return header_line, diff_lines
   1267 
   1268 
   1269 def debounced_fs_watch(
   1270     targets: List[str],
   1271     outq: "queue.Queue[Optional[float]]",
   1272     debounce_delay: float,
   1273 ) -> None:
   1274     import watchdog.events  # type: ignore
   1275     import watchdog.observers  # type: ignore
   1276 
   1277     class WatchEventHandler(watchdog.events.FileSystemEventHandler):  # type: ignore
   1278         def __init__(
   1279             self, queue: "queue.Queue[float]", file_targets: List[str]
   1280         ) -> None:
   1281             self.queue = queue
   1282             self.file_targets = file_targets
   1283 
   1284         def on_modified(self, ev: object) -> None:
   1285             if isinstance(ev, watchdog.events.FileModifiedEvent):
   1286                 self.changed(ev.src_path)
   1287 
   1288         def on_moved(self, ev: object) -> None:
   1289             if isinstance(ev, watchdog.events.FileMovedEvent):
   1290                 self.changed(ev.dest_path)
   1291 
   1292         def should_notify(self, path: str) -> bool:
   1293             for target in self.file_targets:
   1294                 if path == target:
   1295                     return True
   1296             if args.make and any(
   1297                 path.endswith(suffix) for suffix in FS_WATCH_EXTENSIONS
   1298             ):
   1299                 return True
   1300             return False
   1301 
   1302         def changed(self, path: str) -> None:
   1303             if self.should_notify(path):
   1304                 self.queue.put(time.time())
   1305 
   1306     def debounce_thread() -> NoReturn:
   1307         listenq: "queue.Queue[float]" = queue.Queue()
   1308         file_targets: List[str] = []
   1309         event_handler = WatchEventHandler(listenq, file_targets)
   1310         observer = watchdog.observers.Observer()
   1311         observed = set()
   1312         for target in targets:
   1313             if os.path.isdir(target):
   1314                 observer.schedule(event_handler, target, recursive=True)
   1315             else:
   1316                 file_targets.append(target)
   1317                 target = os.path.dirname(target) or "."
   1318                 if target not in observed:
   1319                     observed.add(target)
   1320                     observer.schedule(event_handler, target)
   1321         observer.start()
   1322         while True:
   1323             t = listenq.get()
   1324             more = True
   1325             while more:
   1326                 delay = t + debounce_delay - time.time()
   1327                 if delay > 0:
   1328                     time.sleep(delay)
   1329                 # consume entire queue
   1330                 more = False
   1331                 try:
   1332                     while True:
   1333                         t = listenq.get(block=False)
   1334                         more = True
   1335                 except queue.Empty:
   1336                     pass
   1337             outq.put(t)
   1338 
   1339     th = threading.Thread(target=debounce_thread, daemon=True)
   1340     th.start()
   1341 
   1342 
   1343 class Display:
   1344     basedump: str
   1345     mydump: str
   1346     emsg: Optional[str]
   1347     last_diff_output: Optional[List[OutputLine]]
   1348     pending_update: Optional[Tuple[str, bool]]
   1349     ready_queue: "queue.Queue[None]"
   1350     watch_queue: "queue.Queue[Optional[float]]"
   1351     less_proc: "Optional[subprocess.Popen[bytes]]"
   1352 
   1353     def __init__(self, basedump: str, mydump: str) -> None:
   1354         self.basedump = basedump
   1355         self.mydump = mydump
   1356         self.emsg = None
   1357         self.last_diff_output = None
   1358 
   1359     def run_less(self) -> "Tuple[subprocess.Popen[bytes], subprocess.Popen[bytes]]":
   1360         if self.emsg is not None:
   1361             output = self.emsg
   1362         else:
   1363             diff_output = do_diff(self.basedump, self.mydump)
   1364             last_diff_output = self.last_diff_output or diff_output
   1365             if args.threeway != "base" or not self.last_diff_output:
   1366                 self.last_diff_output = diff_output
   1367             header, diff_lines = format_diff(last_diff_output, diff_output)
   1368             header_lines = [header] if header else []
   1369             output = "\n".join(header_lines + diff_lines[args.skip_lines :])
   1370 
   1371         # Pipe the output through 'tail' and only then to less, to ensure the
   1372         # write call doesn't block. ('tail' has to buffer all its input before
   1373         # it starts writing.) This also means we don't have to deal with pipe
   1374         # closure errors.
   1375         buffer_proc = subprocess.Popen(
   1376             BUFFER_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE
   1377         )
   1378         less_proc = subprocess.Popen(LESS_CMD, stdin=buffer_proc.stdout)
   1379         assert buffer_proc.stdin
   1380         assert buffer_proc.stdout
   1381         buffer_proc.stdin.write(output.encode())
   1382         buffer_proc.stdin.close()
   1383         buffer_proc.stdout.close()
   1384         return (buffer_proc, less_proc)
   1385 
   1386     def run_sync(self) -> None:
   1387         proca, procb = self.run_less()
   1388         procb.wait()
   1389         proca.wait()
   1390 
   1391     def run_async(self, watch_queue: "queue.Queue[Optional[float]]") -> None:
   1392         self.watch_queue = watch_queue
   1393         self.ready_queue = queue.Queue()
   1394         self.pending_update = None
   1395         dthread = threading.Thread(target=self.display_thread)
   1396         dthread.start()
   1397         self.ready_queue.get()
   1398 
   1399     def display_thread(self) -> None:
   1400         proca, procb = self.run_less()
   1401         self.less_proc = procb
   1402         self.ready_queue.put(None)
   1403         while True:
   1404             ret = procb.wait()
   1405             proca.wait()
   1406             self.less_proc = None
   1407             if ret != 0:
   1408                 # fix the terminal
   1409                 os.system("tput reset")
   1410             if ret != 0 and self.pending_update is not None:
   1411                 # killed by program with the intent to refresh
   1412                 msg, error = self.pending_update
   1413                 self.pending_update = None
   1414                 if not error:
   1415                     self.mydump = msg
   1416                     self.emsg = None
   1417                 else:
   1418                     self.emsg = msg
   1419                 proca, procb = self.run_less()
   1420                 self.less_proc = procb
   1421                 self.ready_queue.put(None)
   1422             else:
   1423                 # terminated by user, or killed
   1424                 self.watch_queue.put(None)
   1425                 self.ready_queue.put(None)
   1426                 break
   1427 
   1428     def progress(self, msg: str) -> None:
   1429         # Write message to top-left corner
   1430         sys.stdout.write("\x1b7\x1b[1;1f{}\x1b8".format(msg + " "))
   1431         sys.stdout.flush()
   1432 
   1433     def update(self, text: str, error: bool) -> None:
   1434         if not error and not self.emsg and text == self.mydump:
   1435             self.progress("Unchanged. ")
   1436             return
   1437         self.pending_update = (text, error)
   1438         if not self.less_proc:
   1439             return
   1440         self.less_proc.kill()
   1441         self.ready_queue.get()
   1442 
   1443     def terminate(self) -> None:
   1444         if not self.less_proc:
   1445             return
   1446         self.less_proc.kill()
   1447         self.ready_queue.get()
   1448 
   1449 
   1450 def main() -> None:
   1451     if args.diff_elf_symbol:
   1452         make_target, basecmd, mycmd = dump_elf()
   1453     elif args.diff_obj:
   1454         make_target, basecmd, mycmd = dump_objfile()
   1455     else:
   1456         make_target, basecmd, mycmd = dump_binary()
   1457 
   1458     if args.write_asm is not None:
   1459         mydump = run_objdump(mycmd)
   1460         with open(args.write_asm, "w") as f:
   1461             f.write(mydump)
   1462         print(f"Wrote assembly to {args.write_asm}.")
   1463         sys.exit(0)
   1464 
   1465     if args.base_asm is not None:
   1466         with open(args.base_asm) as f:
   1467             basedump = f.read()
   1468     else:
   1469         basedump = run_objdump(basecmd)
   1470 
   1471     mydump = run_objdump(mycmd)
   1472 
   1473     display = Display(basedump, mydump)
   1474 
   1475     if not args.watch:
   1476         display.run_sync()
   1477     else:
   1478         if not args.make:
   1479             yn = input(
   1480                 "Warning: watch-mode (-w) enabled without auto-make (-m). "
   1481                 "You will have to run make manually. Ok? (Y/n) "
   1482             )
   1483             if yn.lower() == "n":
   1484                 return
   1485         if args.make:
   1486             watch_sources = None
   1487             watch_sources_for_target_fn = getattr(
   1488                 diff_settings, "watch_sources_for_target", None
   1489             )
   1490             if watch_sources_for_target_fn:
   1491                 watch_sources = watch_sources_for_target_fn(make_target)
   1492             watch_sources = watch_sources or source_directories
   1493             if not watch_sources:
   1494                 fail("Missing source_directories config, don't know what to watch.")
   1495         else:
   1496             watch_sources = [make_target]
   1497         q: "queue.Queue[Optional[float]]" = queue.Queue()
   1498         debounced_fs_watch(watch_sources, q, DEBOUNCE_DELAY)
   1499         display.run_async(q)
   1500         last_build = 0.0
   1501         try:
   1502             while True:
   1503                 t = q.get()
   1504                 if t is None:
   1505                     break
   1506                 if t < last_build:
   1507                     continue
   1508                 last_build = time.time()
   1509                 if args.make:
   1510                     display.progress("Building...")
   1511                     ret = run_make_capture_output(make_target)
   1512                     if ret.returncode != 0:
   1513                         display.update(
   1514                             ret.stderr.decode("utf-8-sig", "replace")
   1515                             or ret.stdout.decode("utf-8-sig", "replace"),
   1516                             error=True,
   1517                         )
   1518                         continue
   1519                 mydump = run_objdump(mycmd)
   1520                 display.update(mydump, error=False)
   1521         except KeyboardInterrupt:
   1522             display.terminate()
   1523 
   1524 
   1525 main()
	sm64 A Super Mario 64 decompilation
	Log \| Files \| Refs \| README \| LICENSE