Skip to content

Commit

Permalink
Process Text Files in Decompiler (#122)
Browse files Browse the repository at this point in the history
* begin support for v2

* export game text

* generate text files

* working text load

* fix windows

* add test and clean up game tests a bit

* load the right file

* add separate program to launch the data compiler

* add offline test script
  • Loading branch information
water111 authored Nov 20, 2020
1 parent ae05387 commit 953c151
Show file tree
Hide file tree
Showing 48 changed files with 1,790 additions and 196 deletions.
6 changes: 6 additions & 0 deletions check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
# Directory of this script
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

cd ${DIR}/out
md5sum --check hash.md5
8 changes: 8 additions & 0 deletions common/link_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,12 @@ struct ObjectHeader {
char name[60];
};

// Header for link data used for V2 linking data
// used in GOAL and OpenGOAL
struct LinkHeaderV2 {
uint32_t type_tag; // always -1
uint32_t length; // length of link data
uint32_t version; // always 2
};

#endif // JAK1_LINK_TYPES_H
3 changes: 2 additions & 1 deletion decompiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ add_executable(decompiler
IR/IR.cpp
IR/IR_TypeAnalysis.cpp
Function/TypeInspector.cpp
data/tpage.cpp)
data/tpage.cpp
data/game_text.cpp)

target_link_libraries(decompiler
goos
Expand Down
1 change: 1 addition & 0 deletions decompiler/ObjectFile/LinkedObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ std::string LinkedObjectFile::get_goal_string(int seg, int word_idx, bool with_q
char cword[4];
memcpy(cword, &word.data, 4);
result += cword[byte_offset];
assert(result.back() != 0);
}
if (with_quotes) {
result += "\"";
Expand Down
72 changes: 49 additions & 23 deletions decompiler/ObjectFile/LinkedObjectFileCreation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "LinkedObjectFileCreation.h"
#include "decompiler/config.h"
#include "decompiler/util/DecompilerTypeSystem.h"
#include "common/link_types.h"

// There are three link versions:
// V2 - not really in use anymore, but V4 will resue logic from it (and the game didn't rename the
Expand All @@ -26,13 +27,6 @@ struct LinkHeaderCommon {
uint16_t version; // what version (2, 3, 4)
};

// Header for link data used for V2 linking data
struct LinkHeaderV2 {
uint32_t type_tag; // always -1
uint32_t length; // length of link data
uint32_t version; // always 2
};

// Header for link data used for V4
struct LinkHeaderV4 {
uint32_t type_tag; // always -1
Expand Down Expand Up @@ -101,12 +95,15 @@ static uint32_t c_symlink2(LinkedObjectFile& f,
uint32_t next_reloc = link_ptr_offset + 1;

if (seek & 3) {
// 0b01, 0b10
seek = (relocPtr[1] << 8) | table_value;
next_reloc = link_ptr_offset + 2;
if (seek & 2) {
// 0b10
seek = (relocPtr[2] << 16) | seek;
next_reloc = link_ptr_offset + 3;
if (seek & 1) {
// 0b11
seek = (relocPtr[3] << 24) | seek;
next_reloc = link_ptr_offset + 4;
}
Expand Down Expand Up @@ -216,32 +213,58 @@ static uint32_t align16(uint32_t in) {
}

/*!
* Process link data for a "V4" object file.
* Process link data for a "V4" or "V2" object file.
* In reality a V4 seems to be just a V2 object, but with the link data after the real data.
* There's a V4 header at the very beginning, but another V2 header/link data at the end
* -----------------------------------------------
* | V4 header | data | V2 header | V2 link data |
* -----------------------------------------------
*
* V2
* -----------------------------------
* | V2 header | V2 link data | data |
* -----------------------------------
* The V4 format avoids having to copy the data to the left once the V2 link data is discarded.
* Presumably once they decided that data could never be relocated after being loaded in,
* it became worth it to throw away the link data, and avoid the memcpy of the data.
* The memcpy is surprisingly expensive, when you consider the linker ran for ~3% of a frame each
* frame and level data is ~10 MB.
*/
static void link_v4(LinkedObjectFile& f,
const std::vector<uint8_t>& data,
const std::string& name,
DecompilerTypeSystem& dts) {
// read the V4 header to find where the link data really is
static void link_v2_or_v4(LinkedObjectFile& f,
const std::vector<uint8_t>& data,
const std::string& name,
DecompilerTypeSystem& dts) {
const auto* header = (const LinkHeaderV4*)&data.at(0);
uint32_t link_data_offset = header->code_size + sizeof(LinkHeaderV4); // no basic offset
assert(header->version == 4 || header->version == 2);

// these are different depending on the version.
uint32_t code_offset, link_data_offset, code_size;

if (header->version == 4) {
// code starts immediately after the V4 header
code_offset = sizeof(LinkHeaderV4);
// link_data_offset points to a V2 header
link_data_offset = header->code_size + sizeof(LinkHeaderV4);
// code size is specified!
code_size = header->code_size;
} else {
// link data starts immediately
link_data_offset = 0;

// code starts immediately after the header
uint32_t code_offset = sizeof(LinkHeaderV4);
uint32_t code_size = header->code_size;
// code is after all the link data
code_offset = header->length;
// we have to compute the code size ourself
code_size = data.size() - code_offset;
assert(header->type_tag == 0xffffffff);
}

f.stats.total_code_bytes += code_size;
f.stats.total_v2_code_bytes += code_size;

// add all code
const uint8_t* code_start = &data.at(code_offset);
const uint8_t* code_end =
&data.at(code_offset + code_size); // safe because link data is after code.
&data.at(code_offset + code_size - 1) + 1; // get the pointer to one past the end.
assert(((code_end - code_start) % 4) == 0);
f.set_segment_count(1);
for (auto x = code_start; x < code_end; x += 4) {
Expand All @@ -250,12 +273,13 @@ static void link_v4(LinkedObjectFile& f,

// read v2 header after the code
const uint8_t* link_data = &data.at(link_data_offset);
const auto* link_header_v2 = (const LinkHeaderV2*)(link_data); // subtract off type tag
uint32_t link_ptr_offset = link_data_offset;
link_ptr_offset += sizeof(LinkHeaderV2);
auto* link_header_v2 = (const LinkHeaderV2*)(link_data);
assert(link_header_v2->type_tag == 0xffffffff);
assert(link_header_v2->version == 2);
assert(link_header_v2->length == header->length);
f.stats.total_v2_link_bytes += link_header_v2->length;
uint32_t link_ptr_offset = link_data_offset + sizeof(LinkHeaderV2);

// first "section" of link data is a list of where all the pointer are.
if (data.at(link_ptr_offset) == 0) {
Expand Down Expand Up @@ -369,7 +393,8 @@ static void link_v4(LinkedObjectFile& f,

// check length
assert(link_header_v2->length == align64(link_ptr_offset - link_data_offset + 1));
while (link_ptr_offset < data.size()) {
size_t expected_end = header->version == 4 ? data.size() : link_header_v2->length;
while (link_ptr_offset < expected_end) {
assert(data.at(link_ptr_offset) == 0);
link_ptr_offset++;
}
Expand Down Expand Up @@ -790,12 +815,13 @@ LinkedObjectFile to_linked_object_file(const std::vector<uint8_t>& data,
if (header->version == 3) {
assert(header->type_tag == 0);
link_v3(result, data, name, dts);
} else if (header->version == 4) {
} else if (header->version == 4 || header->version == 2) {
assert(header->type_tag == 0xffffffff);
link_v4(result, data, name, dts);
link_v2_or_v4(result, data, name, dts);
} else if (header->version == 5) {
link_v5(result, data, name, dts);
} else {
printf("Unsupported version %d\n", header->version);
assert(false);
}

Expand Down
70 changes: 69 additions & 1 deletion decompiler/ObjectFile/ObjectFileDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <cstring>
#include <map>
#include "decompiler/data/tpage.h"
#include "decompiler/data/game_text.h"
#include "LinkedObjectFileCreation.h"
#include "decompiler/config.h"
#include "third-party/minilzo/minilzo.h"
Expand All @@ -32,6 +33,37 @@ std::string strip_dgo_extension(const std::string& x) {
}
return x;
}

/*!
* Get an object name from a file name.
* Strips off the file extension and anything before the last slash.
*/
std::string obj_filename_to_name(const std::string& x) {
auto end = x.length();

// find last dot
auto last_dot = end;
for (; last_dot-- > 0;) {
if (x.at(last_dot) == '.') {
break;
}
}

if (last_dot == 0) {
last_dot = end;
}

auto last_slash = end;
for (; last_slash-- > 0;) {
if (x.at(last_slash) == '\\' || x.at(last_slash) == '/') {
break;
}
}

assert(last_dot > last_slash + 1);
assert(last_slash + 1 < x.length());
return x.substr(last_slash + 1, last_dot - last_slash - 1);
}
} // namespace

std::string ObjectFileData::to_unique_name() const {
Expand All @@ -53,6 +85,7 @@ std::string ObjectFileData::to_unique_name() const {
return record.name;
}
}

ObjectFileData& ObjectFileDB::lookup_record(const ObjectFileRecord& rec) {
ObjectFileData* result = nullptr;

Expand All @@ -72,7 +105,8 @@ ObjectFileData& ObjectFileDB::lookup_record(const ObjectFileRecord& rec) {
* Build an object file DB for the given list of DGOs.
*/
ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos,
const std::string& obj_file_name_map_file) {
const std::string& obj_file_name_map_file,
const std::vector<std::string>& object_files) {
Timer timer;

spdlog::info("-Loading types...");
Expand All @@ -93,6 +127,12 @@ ObjectFileDB::ObjectFileDB(const std::vector<std::string>& _dgos,
get_objs_from_dgo(dgo);
}

for (auto& obj : object_files) {
auto data = file_util::read_binary_file(obj);
auto name = obj_filename_to_name(obj);
add_obj_from_dgo(name, name, data.data(), data.size(), "NO-XGO");
}

spdlog::info("ObjectFileDB Initialized:");
spdlog::info("Total DGOs: {}", int(_dgos.size()));
spdlog::info("Total data: {} bytes", stats.total_dgo_bytes);
Expand Down Expand Up @@ -629,6 +669,34 @@ void ObjectFileDB::process_tpages() {
100.f * float(success) / float(total), timer.getMs());
}

std::string ObjectFileDB::process_game_text() {
spdlog::info("- Finding game text...");
std::string text_string = "COMMON";
Timer timer;
int file_count = 0;
int string_count = 0;
int char_count = 0;
std::unordered_map<int, std::unordered_map<int, std::string>> text_by_language_by_id;

for_each_obj([&](ObjectFileData& data) {
if (data.name_in_dgo.substr(1) == text_string) {
file_count++;
auto statistics = ::process_game_text(data);
string_count += statistics.total_text;
char_count += statistics.total_chars;
if (text_by_language_by_id.find(statistics.language) != text_by_language_by_id.end()) {
assert(false);
}
text_by_language_by_id[statistics.language] = std::move(statistics.text);
}
});

spdlog::info("Processed {} text files ({} strings, {} characters) in {:.2f} ms", file_count,
string_count, char_count, timer.getMs());

return write_game_text(text_by_language_by_id);
}

void ObjectFileDB::analyze_functions() {
spdlog::info("- Analyzing Functions...");
Timer timer;
Expand Down
5 changes: 4 additions & 1 deletion decompiler/ObjectFile/ObjectFileDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ struct ObjectFileData {

class ObjectFileDB {
public:
ObjectFileDB(const std::vector<std::string>& _dgos, const std::string& obj_file_name_map_file);
ObjectFileDB(const std::vector<std::string>& _dgos,
const std::string& obj_file_name_map_file,
const std::vector<std::string>& object_files);
std::string generate_dgo_listing();
std::string generate_obj_listing();
void process_link_data();
Expand All @@ -57,6 +59,7 @@ class ObjectFileDB {
void write_disassembly(const std::string& output_dir, bool disassemble_objects_without_functions);
void analyze_functions();
void process_tpages();
std::string process_game_text();

ObjectFileData& lookup_record(const ObjectFileRecord& rec);
DecompilerTypeSystem dts;
Expand Down
2 changes: 2 additions & 0 deletions decompiler/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ void set_config(const std::string& path_to_config_file) {

gConfig.game_version = cfg.at("game_version").get<int>();
gConfig.dgo_names = cfg.at("dgo_names").get<std::vector<std::string>>();
gConfig.object_file_names = cfg.at("object_file_names").get<std::vector<std::string>>();
if (cfg.contains("obj_file_name_map_file")) {
gConfig.obj_file_name_map_file = cfg.at("obj_file_name_map_file").get<std::string>();
}
Expand All @@ -27,6 +28,7 @@ void set_config(const std::string& path_to_config_file) {
gConfig.write_hex_near_instructions = cfg.at("write_hex_near_instructions").get<bool>();
gConfig.analyze_functions = cfg.at("analyze_functions").get<bool>();
gConfig.process_tpages = cfg.at("process_tpages").get<bool>();
gConfig.process_game_text = cfg.at("process_game_text").get<bool>();

std::vector<std::string> asm_functions_by_name =
cfg.at("asm_functions_by_name").get<std::vector<std::string>>();
Expand Down
2 changes: 2 additions & 0 deletions decompiler/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
struct Config {
int game_version = -1;
std::vector<std::string> dgo_names;
std::vector<std::string> object_file_names;
std::unordered_set<std::string> bad_inspect_types;
std::string obj_file_name_map_file;
bool write_disassembly = false;
Expand All @@ -20,6 +21,7 @@ struct Config {
bool write_hex_near_instructions = false;
bool analyze_functions = false;
bool process_tpages = false;
bool process_game_text = false;
std::unordered_set<std::string> asm_functions_by_name;
// ...
};
Expand Down
7 changes: 4 additions & 3 deletions decompiler/config/all-types.gc
Original file line number Diff line number Diff line change
Expand Up @@ -2570,8 +2570,9 @@

(deftype game-text (structure)
((id uint32 :offset-assert 0)
(text basic :offset-assert 4)
(text string :offset-assert 4)
)
:pack-me
:method-count-assert 9
:size-assert #x8
:flag-assert #x900000008
Expand All @@ -2580,8 +2581,8 @@
(deftype game-text-info (basic)
((length int32 :offset-assert 4)
(language-id int32 :offset-assert 8)
(group-name basic :offset-assert 12)
(data uint8 :dynamic :offset-assert 16)
(group-name string :offset-assert 12)
(data game-text :dynamic :inline :offset-assert 16)
)
:method-count-assert 10
:size-assert #x10
Expand Down
Loading

0 comments on commit 953c151

Please sign in to comment.