gen_asset_list.cpp (12010B)
1 // WARNING: THIS SCRIPT IS CURRENTLY BROKEN. 2 // It doesn't handle skyboxes/cake images correctly. 3 4 // Usage: 5 // g++-8 -std=c++17 ./tools/gen_asset_list.cpp -lstdc++fs -O1 -Wall -o tools/gen_asset_list 6 // ./tools/gen_asset_list 7 8 #include <algorithm> 9 #include <cassert> 10 #include <cstdio> 11 #include <filesystem> 12 #include <fstream> 13 #include <iostream> 14 #include <map> 15 #include <sstream> 16 #include <string> 17 #include <unordered_map> 18 #include <vector> 19 using namespace std; 20 21 #define BSWAP32(x) ((((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000U)) 22 #define BSWAP16(x) ((((x) >> 8) & 0xff) | (((x) << 8) & 0xff00)) 23 24 const char* OUTPUT_FILE = "assets.json"; 25 const size_t CHUNK_SIZE = 16; 26 const vector<string> LANGS = {"jp", "us", "eu", "sh", "cn"}; 27 28 typedef uint8_t u8; 29 typedef uint64_t u64; 30 31 struct Pos { 32 size_t pos; 33 size_t mio0; 34 }; 35 36 const u64 C = 12318461241ULL; 37 38 size_t findCutPos(const string& s) { 39 size_t ind = s.find_first_not_of(s[0], 1); 40 if (ind == string::npos) ind = 0; 41 else ind--; 42 if (ind + CHUNK_SIZE <= s.size()) 43 return ind; 44 return s.size() - CHUNK_SIZE; 45 } 46 47 pair<size_t, u64> hashString(const string& inp) { 48 size_t cutPos = findCutPos(inp); 49 string s = inp.substr(cutPos, CHUNK_SIZE); 50 u64 ret = 0; 51 for (u8 c : s) { 52 ret *= C; 53 ret += c; 54 } 55 return {cutPos, ret}; 56 } 57 58 template<class F> 59 void rollingHashes(const string& str, size_t chunkSize, F&& f) { 60 if (str.size() < chunkSize) return; 61 u64 h = 0, pw = 1; 62 for (size_t i = 0; i < chunkSize; i++) 63 h = h * C + (u8)str[i], pw = pw * C; 64 f(0, h); 65 for (size_t i = chunkSize; i < str.size(); i++) { 66 h = h * C + (u8)str[i] - pw * (u8)str[i-chunkSize]; 67 f(i - chunkSize + 1, h); 68 } 69 } 70 71 bool stringMatches(const string& base, size_t pos, const string& target) { 72 if (pos + target.size() > base.size()) return false; 73 for (int it = 0; it < 10; it++) { 74 size_t i = rand() % target.size(); 75 if (base[pos + i] != target[i]) return false; 76 } 77 for (size_t i = 0; i < target.size(); i++) { 78 if (base[pos + i] != target[i]) return false; 79 } 80 return true; 81 } 82 83 string mio0_decompress(uint32_t *src) { 84 uint32_t size = BSWAP32(src[1]); 85 string output(size, '\0'); 86 char *dest = output.data(); 87 char *destEnd = (size + dest); 88 uint16_t *cmpOffset = (uint16_t *)((char *)src + BSWAP32(src[2])); 89 char *rawOffset = ((char *)src + BSWAP32(src[3])); 90 int counter = 0; 91 uint32_t controlBits; 92 93 src += 4; 94 95 while (dest != destEnd) { 96 if (counter == 0) { 97 controlBits = *src++; 98 controlBits = BSWAP32(controlBits); 99 counter = 32; 100 } 101 102 if (controlBits & 0x80000000) { 103 *dest++ = *rawOffset++; 104 } 105 else { 106 uint16_t dcmpParam = *cmpOffset++; 107 dcmpParam = BSWAP16(dcmpParam); 108 int dcmpCount = (dcmpParam >> 12) + 3; 109 char* dcmpPtr = dest - (dcmpParam & 0x0FFF); 110 111 while (dcmpCount) { 112 *dest++ = dcmpPtr[-1]; 113 dcmpCount--; 114 dcmpPtr++; 115 } 116 } 117 118 counter--; 119 controlBits <<= 1; 120 } 121 return output; 122 } 123 124 string readFile(const string& p, bool allowMissing = false) { 125 ifstream fin(p, ios::binary); 126 if (!fin) { 127 if (allowMissing) return ""; 128 cerr << "missing file " << p << endl; 129 exit(1); 130 } 131 fin.seekg(0, fin.end); 132 auto length = fin.tellg(); 133 fin.seekg(0, fin.beg); 134 string data(length, '\0'); 135 fin.read(data.data(), length); 136 assert(fin); 137 return data; 138 } 139 140 pair<int, int> getPngSize(const string& fname) { 141 string buffer(16, '\0'); 142 uint32_t w, h; 143 ifstream fin(fname, ios::binary); 144 fin.read(buffer.data(), 16); 145 fin.read((char*)&w, 4); 146 fin.read((char*)&h, 4); 147 assert(fin); 148 assert(buffer.substr(0, 4) == "\x89PNG"); 149 assert(buffer.substr(12, 4) == "IHDR"); 150 w = BSWAP32(w); 151 h = BSWAP32(h); 152 return {w, h}; 153 } 154 155 string exec(const string& cmd) { 156 char buffer[128]; 157 string result; 158 FILE* pipe = popen(cmd.c_str(), "r"); 159 assert(pipe); 160 size_t s; 161 while ((s = fread(buffer, 1, sizeof(buffer), pipe))) { 162 result += string(buffer, buffer + s); 163 } 164 assert(!ferror(pipe)); 165 pclose(pipe); 166 return result; 167 } 168 169 string compileAsset(const string& fname) { 170 auto ind = fname.rfind('.'); 171 if (ind == string::npos) return ""; 172 string q = fname.substr(ind + 1); 173 if (q == "png") { 174 string prev = fname.substr(0, ind); 175 176 for (const string& lang : LANGS) { 177 string ret = readFile("build/" + lang + "/" + prev, true); 178 if (!ret.empty()) return ret; 179 } 180 181 ind = prev.rfind('.'); 182 if (ind == string::npos) return ""; 183 q = prev.substr(ind + 1); 184 if (q == "rgba16" || q == "ia16" || q == "ia8" || q == "ia4" || q == "ia1") { 185 return exec("./tools/n64graphics -i /dev/stdout -g " + fname + " -f " + q); 186 } 187 } 188 if (q == "m64") 189 return readFile(fname); 190 if (q == "bin" && fname.find("assets") != string::npos) 191 return readFile(fname); 192 return ""; 193 } 194 195 tuple<string, string, vector<string>> compileSoundData(const string& lang) { 196 string upper_lang = lang; 197 for (char& ch : upper_lang) ch = (char)(ch + 'A' - 'a'); 198 string build_dir = "build/" + lang; 199 string dir = build_dir + "/sound"; 200 string ctl = dir + "/sound_data.ctl"; 201 string tbl = dir + "/sound_data.tbl"; 202 exec("make " + tbl + " VERSION=" + lang + " NOEXTRACT=1"); 203 string sampleFilesStr = 204 exec("python3 tools/assemble_sound.py " + 205 dir + "/samples/ " 206 "sound/sound_banks/ " + 207 dir + "/sound_data.ctl " + 208 dir + "/sound_data.tbl " + 209 "-DVERSION_" + upper_lang + 210 " --print-samples"); 211 vector<string> sampleFiles; 212 istringstream iss(sampleFilesStr); 213 string line; 214 while (getline(iss, line)) { 215 line = line.substr(build_dir.size() + 1); 216 line[line.size() - 1] = 'f'; 217 sampleFiles.push_back(line); 218 } 219 string ctlData = readFile(ctl); 220 string tblData = readFile(tbl); 221 return {ctlData, tblData, sampleFiles}; 222 } 223 224 int main() { 225 intentional syntax error; // (see comment at top of file) 226 map<string, string> assets; 227 map<string, vector<pair<string, int>>> soundAssets; 228 229 cout << "compiling assets..." << endl; 230 int totalAssets = 0; 231 for (string base_dir : {"assets", "sound/sequences", "textures", "levels", "actors"}) { 232 for (auto& ent: filesystem::recursive_directory_iterator(base_dir)) { 233 string p = ent.path().string(); 234 string bin = compileAsset(p); 235 if (bin.empty()) continue; 236 if (bin.size() < CHUNK_SIZE) { 237 cerr << "asset " << p << " is too small (" << bin.size() << " bytes), expected at least " << CHUNK_SIZE << " bytes" << endl; 238 continue; 239 } 240 assets[p] = bin; 241 totalAssets++; 242 } 243 } 244 for (const string& lang : LANGS) { 245 string ctl, tbl; 246 vector<string> sampleFiles; 247 tie(ctl, tbl, sampleFiles) = compileSoundData(lang); 248 assets["@sound ctl " + lang] = ctl; 249 assets["@sound tbl " + lang] = tbl; 250 totalAssets += 2; 251 for (size_t i = 0; i < sampleFiles.size(); i++) { 252 soundAssets[sampleFiles[i]].emplace_back(lang, i); 253 } 254 } 255 cout << "compiled " << totalAssets << " assets" << endl; 256 257 unordered_map<u64, vector<pair<size_t, pair<string, string>>>> hashes; 258 for (const auto& asset : assets) { 259 size_t cutPos; 260 u64 hash; 261 tie(cutPos, hash) = hashString(asset.second); 262 hashes[hash].push_back(make_pair(cutPos, asset)); 263 } 264 265 map<pair<string, string>, Pos> assetPositions; 266 for (const string& lang : LANGS) { 267 cout << "searching " << lang << "..." << endl; 268 auto remHashes = hashes; 269 auto search = [&](string& str, string lang, size_t mio0) { 270 rollingHashes(str, CHUNK_SIZE, [&](size_t hashPos, u64 hash) { 271 if (!remHashes.count(hash)) return; 272 vector<pair<size_t, pair<string, string>>>& conts = remHashes.at(hash); 273 auto it = remove_if(conts.begin(), conts.end(), 274 [&](const pair<size_t, pair<string, string>>& pa) { 275 size_t cutPos = pa.first; 276 const string& name = pa.second.first; 277 const string& data = pa.second.second; 278 size_t assetPos = hashPos - cutPos; 279 if (stringMatches(str, assetPos, data)) { 280 assetPositions[make_pair(lang, name)] = {assetPos, mio0}; 281 return true; 282 } 283 return false; 284 }); 285 conts.erase(it, conts.end()); 286 if (conts.empty()) remHashes.erase(hash); 287 }); 288 }; 289 290 string rom = readFile("baserom." + lang + ".z64"); 291 292 for (size_t i = 0; i < rom.size(); i += 4) { 293 if (rom[i] == 'M' && rom[i+1] == 'I' && rom[i+2] == 'O' && rom[i+3] == '0') { 294 string data = mio0_decompress((uint32_t*)&rom[i]); 295 search(data, lang, i); 296 } 297 } 298 299 search(rom, lang, 0); 300 } 301 302 cout << "generating " << OUTPUT_FILE << "..." << endl; 303 ofstream fout(OUTPUT_FILE); 304 assert(fout); 305 fout << 306 "{\n" 307 "\"@comment\": \"This file was generated by tools/gen_asset_list.cpp. " 308 "When renaming a file, either change its name in this file directly, " 309 "or regenerate this file using that script.\""; 310 311 bool first1 = true; 312 vector<string> notFound; 313 for (const auto& asset : assets) { 314 const string& name = asset.first; 315 const string& data = asset.second; 316 vector<pair<string, Pos>> positions; 317 for (const string& lang : LANGS) { 318 auto it = assetPositions.find(make_pair(lang, name)); 319 if (it != assetPositions.end()) { 320 positions.push_back(make_pair(lang, it->second)); 321 } 322 } 323 324 if (positions.empty()) { 325 notFound.push_back(name); 326 } 327 else { 328 fout << ",\n"; 329 if (first1) fout << "\n"; 330 first1 = false; 331 fout << "\"" << name << "\": ["; 332 if (name.substr(name.size() - 4) == ".png") { 333 int w, h; 334 tie(w, h) = getPngSize(name); 335 fout << w << "," << h << ","; 336 } 337 fout << data.size() << ",{"; 338 bool first2 = true; 339 for (auto& pa : positions) { 340 auto p = pa.second; 341 if (!first2) fout << ","; 342 first2 = false; 343 fout << "\"" << pa.first << "\":["; 344 if (p.mio0) 345 fout << p.mio0 << ","; 346 fout << p.pos << ']'; 347 } 348 fout << "}]"; 349 } 350 } 351 for (const auto& asset : soundAssets) { 352 const string& name = asset.first; 353 const vector<pair<string, int>>& locs = asset.second; 354 fout << ",\n"; 355 fout << "\"" << name << "\": [0,{"; 356 bool first2 = true; 357 for (auto& pa : locs) { 358 if (!first2) fout << ","; 359 first2 = false; 360 fout << "\"" << pa.first << "\":[\"@sound\"," << pa.second << ']'; 361 } 362 fout << "}]"; 363 } 364 fout << "\n}" << endl; 365 assert(fout); 366 fout.close(); 367 368 if (!notFound.empty()) { 369 cout << endl; 370 cout << "Missing " << notFound.size() << " assets." << endl; 371 if (notFound.size() <= 10) { 372 for (auto& s : notFound) { 373 cout << s << endl; 374 } 375 } 376 return 1; 377 } 378 379 cout << "done!" << endl; 380 381 return 0; 382 }