sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

gen_asset_list.cpp (12010B)


      1 // WARNING: THIS SCRIPT IS CURRENTLY BROKEN.
      2 // It doesn't handle skyboxes/cake images correctly.
      3 
      4 // Usage:
      5 // g++-8 -std=c++17 ./tools/gen_asset_list.cpp -lstdc++fs -O1 -Wall -o tools/gen_asset_list
      6 // ./tools/gen_asset_list
      7 
      8 #include <algorithm>
      9 #include <cassert>
     10 #include <cstdio>
     11 #include <filesystem>
     12 #include <fstream>
     13 #include <iostream>
     14 #include <map>
     15 #include <sstream>
     16 #include <string>
     17 #include <unordered_map>
     18 #include <vector>
     19 using namespace std;
     20 
     21 #define BSWAP32(x) ((((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000U))
     22 #define BSWAP16(x) ((((x) >> 8) & 0xff) | (((x) << 8) & 0xff00))
     23 
     24 const char* OUTPUT_FILE = "assets.json";
     25 const size_t CHUNK_SIZE = 16;
     26 const vector<string> LANGS = {"jp", "us", "eu", "sh", "cn"};
     27 
     28 typedef uint8_t u8;
     29 typedef uint64_t u64;
     30 
     31 struct Pos {
     32     size_t pos;
     33     size_t mio0;
     34 };
     35 
     36 const u64 C = 12318461241ULL;
     37 
     38 size_t findCutPos(const string& s) {
     39     size_t ind = s.find_first_not_of(s[0], 1);
     40     if (ind == string::npos) ind = 0;
     41     else ind--;
     42     if (ind + CHUNK_SIZE <= s.size())
     43         return ind;
     44     return s.size() - CHUNK_SIZE;
     45 }
     46 
     47 pair<size_t, u64> hashString(const string& inp) {
     48     size_t cutPos = findCutPos(inp);
     49     string s = inp.substr(cutPos, CHUNK_SIZE);
     50     u64 ret = 0;
     51     for (u8 c : s) {
     52         ret *= C;
     53         ret += c;
     54     }
     55     return {cutPos, ret};
     56 }
     57 
     58 template<class F>
     59 void rollingHashes(const string& str, size_t chunkSize, F&& f) {
     60     if (str.size() < chunkSize) return;
     61     u64 h = 0, pw = 1;
     62     for (size_t i = 0; i < chunkSize; i++)
     63         h = h * C + (u8)str[i], pw = pw * C;
     64     f(0, h);
     65     for (size_t i = chunkSize; i < str.size(); i++) {
     66         h = h * C + (u8)str[i] - pw * (u8)str[i-chunkSize];
     67         f(i - chunkSize + 1, h);
     68     }
     69 }
     70 
     71 bool stringMatches(const string& base, size_t pos, const string& target) {
     72     if (pos + target.size() > base.size()) return false;
     73     for (int it = 0; it < 10; it++) {
     74         size_t i = rand() % target.size();
     75         if (base[pos + i] != target[i]) return false;
     76     }
     77     for (size_t i = 0; i < target.size(); i++) {
     78         if (base[pos + i] != target[i]) return false;
     79     }
     80     return true;
     81 }
     82 
     83 string mio0_decompress(uint32_t *src) {
     84     uint32_t size = BSWAP32(src[1]);
     85     string output(size, '\0');
     86     char *dest = output.data();
     87     char *destEnd = (size + dest);
     88     uint16_t *cmpOffset = (uint16_t *)((char *)src + BSWAP32(src[2]));
     89     char *rawOffset = ((char *)src + BSWAP32(src[3]));
     90     int counter = 0;
     91     uint32_t controlBits;
     92 
     93     src += 4;
     94 
     95     while (dest != destEnd) {
     96         if (counter == 0) {
     97             controlBits = *src++;
     98             controlBits = BSWAP32(controlBits);
     99             counter = 32;
    100         }
    101 
    102         if (controlBits & 0x80000000) {
    103             *dest++ = *rawOffset++;
    104         }
    105         else {
    106             uint16_t dcmpParam = *cmpOffset++;
    107             dcmpParam = BSWAP16(dcmpParam);
    108             int dcmpCount = (dcmpParam >> 12) + 3;
    109             char* dcmpPtr = dest - (dcmpParam & 0x0FFF);
    110 
    111             while (dcmpCount) {
    112                 *dest++ = dcmpPtr[-1];
    113                 dcmpCount--;
    114                 dcmpPtr++;
    115             }
    116         }
    117 
    118         counter--;
    119         controlBits <<= 1;
    120     }
    121     return output;
    122 }
    123 
    124 string readFile(const string& p, bool allowMissing = false) {
    125     ifstream fin(p, ios::binary);
    126     if (!fin) {
    127         if (allowMissing) return "";
    128         cerr << "missing file " << p << endl;
    129         exit(1);
    130     }
    131     fin.seekg(0, fin.end);
    132     auto length = fin.tellg();
    133     fin.seekg(0, fin.beg);
    134     string data(length, '\0');
    135     fin.read(data.data(), length);
    136     assert(fin);
    137     return data;
    138 }
    139 
    140 pair<int, int> getPngSize(const string& fname) {
    141     string buffer(16, '\0');
    142     uint32_t w, h;
    143     ifstream fin(fname, ios::binary);
    144     fin.read(buffer.data(), 16);
    145     fin.read((char*)&w, 4);
    146     fin.read((char*)&h, 4);
    147     assert(fin);
    148     assert(buffer.substr(0, 4) == "\x89PNG");
    149     assert(buffer.substr(12, 4) == "IHDR");
    150     w = BSWAP32(w);
    151     h = BSWAP32(h);
    152     return {w, h};
    153 }
    154 
    155 string exec(const string& cmd) {
    156     char buffer[128];
    157     string result;
    158     FILE* pipe = popen(cmd.c_str(), "r");
    159     assert(pipe);
    160     size_t s;
    161     while ((s = fread(buffer, 1, sizeof(buffer), pipe))) {
    162         result += string(buffer, buffer + s);
    163     }
    164     assert(!ferror(pipe));
    165     pclose(pipe);
    166     return result;
    167 }
    168 
    169 string compileAsset(const string& fname) {
    170     auto ind = fname.rfind('.');
    171     if (ind == string::npos) return "";
    172     string q = fname.substr(ind + 1);
    173     if (q == "png") {
    174         string prev = fname.substr(0, ind);
    175 
    176         for (const string& lang : LANGS) {
    177             string ret = readFile("build/" + lang + "/" + prev, true);
    178             if (!ret.empty()) return ret;
    179         }
    180 
    181         ind = prev.rfind('.');
    182         if (ind == string::npos) return "";
    183         q = prev.substr(ind + 1);
    184         if (q == "rgba16" || q == "ia16" || q == "ia8" || q == "ia4" || q == "ia1") {
    185             return exec("./tools/n64graphics -i /dev/stdout -g " + fname + " -f " + q);
    186         }
    187     }
    188     if (q == "m64")
    189         return readFile(fname);
    190     if (q == "bin" && fname.find("assets") != string::npos)
    191         return readFile(fname);
    192     return "";
    193 }
    194 
    195 tuple<string, string, vector<string>> compileSoundData(const string& lang) {
    196     string upper_lang = lang;
    197     for (char& ch : upper_lang) ch = (char)(ch + 'A' - 'a');
    198     string build_dir = "build/" + lang;
    199     string dir = build_dir + "/sound";
    200     string ctl = dir + "/sound_data.ctl";
    201     string tbl = dir + "/sound_data.tbl";
    202     exec("make " + tbl + " VERSION=" + lang + " NOEXTRACT=1");
    203     string sampleFilesStr =
    204         exec("python3 tools/assemble_sound.py " +
    205             dir + "/samples/ "
    206             "sound/sound_banks/ " +
    207             dir + "/sound_data.ctl " +
    208             dir + "/sound_data.tbl " +
    209             "-DVERSION_" + upper_lang +
    210             " --print-samples");
    211     vector<string> sampleFiles;
    212     istringstream iss(sampleFilesStr);
    213     string line;
    214     while (getline(iss, line)) {
    215         line = line.substr(build_dir.size() + 1);
    216         line[line.size() - 1] = 'f';
    217         sampleFiles.push_back(line);
    218     }
    219     string ctlData = readFile(ctl);
    220     string tblData = readFile(tbl);
    221     return {ctlData, tblData, sampleFiles};
    222 }
    223 
    224 int main() {
    225     intentional syntax error; // (see comment at top of file)
    226     map<string, string> assets;
    227     map<string, vector<pair<string, int>>> soundAssets;
    228 
    229     cout << "compiling assets..." << endl;
    230     int totalAssets = 0;
    231     for (string base_dir : {"assets", "sound/sequences", "textures", "levels", "actors"}) {
    232         for (auto& ent: filesystem::recursive_directory_iterator(base_dir)) {
    233             string p = ent.path().string();
    234             string bin = compileAsset(p);
    235             if (bin.empty()) continue;
    236             if (bin.size() < CHUNK_SIZE) {
    237                 cerr << "asset " << p << " is too small (" << bin.size() << " bytes), expected at least " << CHUNK_SIZE << " bytes" << endl;
    238                 continue;
    239             }
    240             assets[p] = bin;
    241             totalAssets++;
    242         }
    243     }
    244     for (const string& lang : LANGS) {
    245         string ctl, tbl;
    246         vector<string> sampleFiles;
    247         tie(ctl, tbl, sampleFiles) = compileSoundData(lang);
    248         assets["@sound ctl " + lang] = ctl;
    249         assets["@sound tbl " + lang] = tbl;
    250         totalAssets += 2;
    251         for (size_t i = 0; i < sampleFiles.size(); i++) {
    252             soundAssets[sampleFiles[i]].emplace_back(lang, i);
    253         }
    254     }
    255     cout << "compiled " << totalAssets << " assets" << endl;
    256 
    257     unordered_map<u64, vector<pair<size_t, pair<string, string>>>> hashes;
    258     for (const auto& asset : assets) {
    259         size_t cutPos;
    260         u64 hash;
    261         tie(cutPos, hash) = hashString(asset.second);
    262         hashes[hash].push_back(make_pair(cutPos, asset));
    263     }
    264 
    265     map<pair<string, string>, Pos> assetPositions;
    266     for (const string& lang : LANGS) {
    267         cout << "searching " << lang << "..." << endl;
    268         auto remHashes = hashes;
    269         auto search = [&](string& str, string lang, size_t mio0) {
    270             rollingHashes(str, CHUNK_SIZE, [&](size_t hashPos, u64 hash) {
    271                 if (!remHashes.count(hash)) return;
    272                 vector<pair<size_t, pair<string, string>>>& conts = remHashes.at(hash);
    273                 auto it = remove_if(conts.begin(), conts.end(),
    274                     [&](const pair<size_t, pair<string, string>>& pa) {
    275                         size_t cutPos = pa.first;
    276                         const string& name = pa.second.first;
    277                         const string& data = pa.second.second;
    278                         size_t assetPos = hashPos - cutPos;
    279                         if (stringMatches(str, assetPos, data)) {
    280                             assetPositions[make_pair(lang, name)] = {assetPos, mio0};
    281                             return true;
    282                         }
    283                         return false;
    284                     });
    285                 conts.erase(it, conts.end());
    286                 if (conts.empty()) remHashes.erase(hash);
    287             });
    288         };
    289 
    290         string rom = readFile("baserom." + lang + ".z64");
    291 
    292         for (size_t i = 0; i < rom.size(); i += 4) {
    293             if (rom[i] == 'M' && rom[i+1] == 'I' && rom[i+2] == 'O' && rom[i+3] == '0') {
    294                 string data = mio0_decompress((uint32_t*)&rom[i]);
    295                 search(data, lang, i);
    296             }
    297         }
    298 
    299         search(rom, lang, 0);
    300     }
    301 
    302     cout << "generating " << OUTPUT_FILE << "..." << endl;
    303     ofstream fout(OUTPUT_FILE);
    304     assert(fout);
    305     fout <<
    306         "{\n"
    307         "\"@comment\": \"This file was generated by tools/gen_asset_list.cpp. "
    308         "When renaming a file, either change its name in this file directly, "
    309         "or regenerate this file using that script.\"";
    310 
    311     bool first1 = true;
    312     vector<string> notFound;
    313     for (const auto& asset : assets) {
    314         const string& name = asset.first;
    315         const string& data = asset.second;
    316         vector<pair<string, Pos>> positions;
    317         for (const string& lang : LANGS) {
    318             auto it = assetPositions.find(make_pair(lang, name));
    319             if (it != assetPositions.end()) {
    320                 positions.push_back(make_pair(lang, it->second));
    321             }
    322         }
    323 
    324         if (positions.empty()) {
    325             notFound.push_back(name);
    326         }
    327         else {
    328             fout << ",\n";
    329             if (first1) fout << "\n";
    330             first1 = false;
    331             fout << "\"" << name << "\": [";
    332             if (name.substr(name.size() - 4) == ".png") {
    333                 int w, h;
    334                 tie(w, h) = getPngSize(name);
    335                 fout << w << "," << h << ",";
    336             }
    337             fout << data.size() << ",{";
    338             bool first2 = true;
    339             for (auto& pa : positions) {
    340                 auto p = pa.second;
    341                 if (!first2) fout << ",";
    342                 first2 = false;
    343                 fout << "\"" << pa.first << "\":[";
    344                 if (p.mio0)
    345                     fout << p.mio0 << ",";
    346                 fout << p.pos << ']';
    347             }
    348             fout << "}]";
    349         }
    350     }
    351     for (const auto& asset : soundAssets) {
    352         const string& name = asset.first;
    353         const vector<pair<string, int>>& locs = asset.second;
    354         fout << ",\n";
    355         fout << "\"" << name << "\": [0,{";
    356         bool first2 = true;
    357         for (auto& pa : locs) {
    358             if (!first2) fout << ",";
    359             first2 = false;
    360             fout << "\"" << pa.first << "\":[\"@sound\"," << pa.second << ']';
    361         }
    362         fout << "}]";
    363     }
    364     fout << "\n}" << endl;
    365     assert(fout);
    366     fout.close();
    367 
    368     if (!notFound.empty()) {
    369         cout << endl;
    370         cout << "Missing " << notFound.size() << " assets." << endl;
    371         if (notFound.size() <= 10) {
    372             for (auto& s : notFound) {
    373                 cout << s << endl;
    374             }
    375         }
    376         return 1;
    377     }
    378 
    379     cout << "done!" << endl;
    380 
    381     return 0;
    382 }