Skip to content

Commit

Permalink
[wasm-split] Add a multi-split mode (#6943)
Browse files Browse the repository at this point in the history
Add a mode that splits a module into arbitrarily many parts based on a
simple manifest file. This is currently implemented by splitting out one
module at a time in a loop, but this could change in the future if
splitting out all the modules at once would improve the quality of the
output.
  • Loading branch information
tlively committed Sep 16, 2024
1 parent 6c07a32 commit 9292310
Show file tree
Hide file tree
Showing 6 changed files with 369 additions and 6 deletions.
36 changes: 33 additions & 3 deletions src/tools/wasm-split/split-options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ std::ostream& operator<<(std::ostream& o, WasmSplitOptions::Mode& mode) {
case WasmSplitOptions::Mode::Split:
o << "split";
break;
case WasmSplitOptions::Mode::MultiSplit:
o << "multi-split";
break;
case WasmSplitOptions::Mode::Instrument:
o << "instrument";
break;
Expand Down Expand Up @@ -91,7 +94,14 @@ WasmSplitOptions::WasmSplitOptions()
"Split an input module into two output modules. The default mode.",
WasmSplitOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& arugment) { mode = Mode::Split; })
[&](Options* o, const std::string& argument) { mode = Mode::Split; })
.add(
"--multi-split",
"",
"Split an input module into an arbitrary number of output modules.",
WasmSplitOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& argument) { mode = Mode::MultiSplit; })
.add(
"--instrument",
"",
Expand Down Expand Up @@ -151,6 +161,25 @@ WasmSplitOptions::WasmSplitOptions()
[&](Options* o, const std::string& argument) {
splitFuncs = parseNameList(argument);
})
.add(
"--manifest",
"",
"File describing the functions to be split into each module. Each "
"section separated by a blank line begins with the base name of an "
"output module, which is followed by a list of functions to place in "
"that module, one per line.",
WasmSplitOption,
{Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { manifestFile = argument; })
.add("--out-prefix",
"",
"Prefix prepended to module names in the manifest file to create "
"output file names.",
WasmSplitOption,
{Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { outPrefix = argument; })
.add("--primary-output",
"-o1",
"Output file for the primary module.",
Expand Down Expand Up @@ -313,7 +342,7 @@ WasmSplitOptions::WasmSplitOptions()
"-g",
"Emit names section in wasm binary (or full debuginfo in wast)",
WasmSplitOption,
{Mode::Split, Mode::Instrument},
{Mode::Split, Mode::MultiSplit, Mode::Instrument},
Options::Arguments::Zero,
[&](Options* o, const std::string& arguments) {
passOptions.debugInfo = true;
Expand All @@ -322,7 +351,7 @@ WasmSplitOptions::WasmSplitOptions()
"-o",
"Output file.",
WasmSplitOption,
{Mode::Instrument, Mode::MergeProfiles},
{Mode::Instrument, Mode::MergeProfiles, Mode::MultiSplit},
Options::Arguments::One,
[&](Options* o, const std::string& argument) { output = argument; })
.add("--unescape",
Expand Down Expand Up @@ -407,6 +436,7 @@ bool WasmSplitOptions::validate() {
}
switch (mode) {
case Mode::Split:
case Mode::MultiSplit:
case Mode::Instrument:
if (inputFiles.size() > 1) {
fail("Cannot have more than one input file.");
Expand Down
4 changes: 4 additions & 0 deletions src/tools/wasm-split/split-options.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const std::string DEFAULT_PROFILE_EXPORT("__write_profile");
struct WasmSplitOptions : ToolOptions {
enum class Mode : unsigned {
Split,
MultiSplit,
Instrument,
MergeProfiles,
PrintProfile,
Expand Down Expand Up @@ -68,6 +69,9 @@ struct WasmSplitOptions : ToolOptions {
std::string secondaryMemoryName;
std::string exportPrefix;

std::string manifestFile;
std::string outPrefix;

// A hack to ensure the split and instrumented modules have the same table
// size when using Emscripten's SPLIT_MODULE mode with dynamic linking. TODO:
// Figure out a more elegant solution for that use case and remove this.
Expand Down
84 changes: 84 additions & 0 deletions src/tools/wasm-split/wasm-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,87 @@ void splitModule(const WasmSplitOptions& options) {
writeModule(*secondary, options.secondaryOutput, options);
}

void multiSplitModule(const WasmSplitOptions& options) {
if (options.manifestFile.empty()) {
Fatal() << "--multi-split requires --manifest";
}
if (options.output.empty()) {
Fatal() << "--multi-split requires --output";
}

std::ifstream manifest(options.manifestFile);
if (!manifest.is_open()) {
Fatal() << "File not found: " << options.manifestFile;
}

Module wasm;
parseInput(wasm, options);

// Map module names to the functions that should be in the modules.
std::map<std::string, std::unordered_set<std::string>> moduleFuncs;
// The module for which we are currently parsing a set of functions.
std::string currModule;
// The set of functions we are currently inserting into.
std::unordered_set<std::string>* currFuncs = nullptr;
// Map functions to their modules to ensure no function is assigned to
// multiple modules.
std::unordered_map<std::string, std::string> funcModules;

std::string line;
bool newSection = true;
while (std::getline(manifest, line)) {
if (line.empty()) {
newSection = true;
continue;
}
if (newSection) {
currModule = line;
currFuncs = &moduleFuncs[line];
newSection = false;
continue;
}
assert(currFuncs);
currFuncs->insert(line);
auto [it, inserted] = funcModules.insert({line, currModule});
if (!inserted && it->second != currModule) {
Fatal() << "Function " << line << "cannot be assigned to module "
<< currModule << "; it is already assigned to module "
<< it->second << '\n';
}
if (inserted && !options.quiet && !wasm.getFunctionOrNull(line)) {
std::cerr << "warning: Function " << line << " does not exist\n";
}
}

ModuleSplitting::Config config;
config.usePlaceholders = false;
config.importNamespace = "";
config.minimizeNewExportNames = true;
for (auto& func : wasm.functions) {
config.primaryFuncs.insert(func->name);
}
for (auto& [mod, funcs] : moduleFuncs) {
if (options.verbose) {
std::cerr << "Splitting module " << mod << '\n';
}
if (!options.quiet && funcs.empty()) {
std::cerr << "warning: Module " << mod << " will be empty\n";
}
for (auto& func : funcs) {
config.primaryFuncs.erase(Name(func));
}
auto splitResults = ModuleSplitting::splitFunctions(wasm, config);
// TODO: symbolMap, placeholderMap, emitModuleNames
// TODO: Support --emit-text and use .wast in that case.
auto moduleName = options.outPrefix + mod + ".wasm";
PassRunner runner(&*splitResults.secondary);
runner.add("remove-unused-module-elements");
runner.run();
writeModule(*splitResults.secondary, moduleName, options);
}
writeModule(wasm, options.output, options);
}

void mergeProfiles(const WasmSplitOptions& options) {
// Read the initial profile. We will merge other profiles into this one.
ProfileData data = readProfile(options.inputFiles[0]);
Expand Down Expand Up @@ -503,6 +584,9 @@ int main(int argc, const char* argv[]) {
case WasmSplitOptions::Mode::Split:
splitModule(options);
break;
case WasmSplitOptions::Mode::MultiSplit:
multiSplitModule(options);
break;
case WasmSplitOptions::Mode::Instrument:
instrumentModule(options);
break;
Expand Down
23 changes: 20 additions & 3 deletions test/lit/help/wasm-split.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
;; CHECK-NEXT: --split Split an input module into two output
;; CHECK-NEXT: modules. The default mode.
;; CHECK-NEXT:
;; CHECK-NEXT: --multi-split Split an input module into an arbitrary
;; CHECK-NEXT: number of output modules.
;; CHECK-NEXT:
;; CHECK-NEXT: --instrument Instrument an input module to allow it to
;; CHECK-NEXT: generate a profile that can be used to
;; CHECK-NEXT: guide splitting.
Expand Down Expand Up @@ -43,6 +46,18 @@
;; CHECK-NEXT: can also pass a file with one function
;; CHECK-NEXT: per line by passing @filename.
;; CHECK-NEXT:
;; CHECK-NEXT: --manifest [multi-split] File describing the
;; CHECK-NEXT: functions to be split into each module.
;; CHECK-NEXT: Each section separated by a blank line
;; CHECK-NEXT: begins with the base name of an output
;; CHECK-NEXT: module, which is followed by a list of
;; CHECK-NEXT: functions to place in that module, one
;; CHECK-NEXT: per line.
;; CHECK-NEXT:
;; CHECK-NEXT: --out-prefix [multi-split] Prefix prepended to module
;; CHECK-NEXT: names in the manifest file to create
;; CHECK-NEXT: output file names.
;; CHECK-NEXT:
;; CHECK-NEXT: --primary-output,-o1 [split] Output file for the primary
;; CHECK-NEXT: module.
;; CHECK-NEXT:
Expand Down Expand Up @@ -125,10 +140,12 @@
;; CHECK-NEXT: --emit-text,-S [split, instrument] Emit text instead of
;; CHECK-NEXT: binary for the output file or files.
;; CHECK-NEXT:
;; CHECK-NEXT: --debuginfo,-g [split, instrument] Emit names section in
;; CHECK-NEXT: wasm binary (or full debuginfo in wast)
;; CHECK-NEXT: --debuginfo,-g [split, multi-split, instrument] Emit
;; CHECK-NEXT: names section in wasm binary (or full
;; CHECK-NEXT: debuginfo in wast)
;; CHECK-NEXT:
;; CHECK-NEXT: --output,-o [instrument, merge-profiles] Output file.
;; CHECK-NEXT: --output,-o [instrument, merge-profiles, multi-split]
;; CHECK-NEXT: Output file.
;; CHECK-NEXT:
;; CHECK-NEXT: --unescape,-u Un-escape function names (in
;; CHECK-NEXT: print-profile output)
Expand Down
Loading

0 comments on commit 9292310

Please sign in to comment.