Skip to content

Commit

Permalink
build: speedup compilation of mksnapshot output
Browse files Browse the repository at this point in the history
Incremental compilation of Node.js is slow. Currently on a powerful
Linux machine, it takes about 5.6 seconds to compile
`gen/node_snapshot.cc` with g++.

As in the previous PR which dealt with `node_js2c`, we add a new flag
`--use-string-literals` to `node_mksnapshot`. When this flag is set, we
emit string literals instead of array literals for the snapshot blob and
for the code cache, i.e.:

```c++
// old: static const uint8_t X[] = { ... };
static const uint8_t *X = "...";
```

I only enabled the new flag on Linux/macOS, since those are systems that
I have available for testing. On my Linux system with gcc, it speeds up
compilation of this file by 3.7s (5.8s -> 2.1s). On my Mac system with
clang, it speeds up compilation by 1.7s (3.4s -> 1.7s).

Again, the right thing here is probably to generate separate files for
the snapshot blob and for each code cache output, but this is a nice
intermediate speedup.

The thing I'm most unsure about in this PR is how to actually thread the
argument through. I considered adding it to the general argument parser,
but that felt strange, since this flag only makes sense during the build
process. So I kind of hacked it in, which also feels weird. Suggestions
are very welcome.

Refs: nodejs#47984
Refs: nodejs#48160
  • Loading branch information
kvakil committed May 25, 2023
1 parent 817c579 commit b6567e7
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 25 deletions.
8 changes: 8 additions & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,16 @@
],
'action': [
'<@(_inputs)',
'<@(node_mksnapshot_use_string_literals_flag)',
'<@(_outputs)',
],
'conditions': [
['OS=="linux" or OS=="mac"', {
'variables': {'node_mksnapshot_use_string_literals_flag': ['--use-string-literals']},
}, {
'variables': {'node_mksnapshot_use_string_literals_flag': []},
}],
],
},
],
}],
Expand Down
3 changes: 2 additions & 1 deletion src/node_snapshot_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class NODE_EXTERN_PRIVATE SnapshotBuilder {
public:
static ExitCode Generate(std::ostream& out,
const std::vector<std::string> args,
const std::vector<std::string> exec_args);
const std::vector<std::string> exec_args,
bool use_string_literals);

// Generate the snapshot into out.
static ExitCode Generate(SnapshotData* out,
Expand Down
90 changes: 70 additions & 20 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -742,23 +742,61 @@ static std::string FormatSize(size_t size) {
return buf;
}

static void WriteStaticCodeCacheData(std::ostream* ss,
const builtins::CodeCacheInfo& info) {
static void WriteDataAsCharString(std::ostream* ss,
const uint8_t* data,
size_t length) {
for (size_t i = 0; i < length; i++) {
const uint8_t ch = data[i];
// We can print most printable characters directly. The exceptions are '\'
// (escape characters), " (would end the string), and ? (trigraphs). The
// latter may be overly conservative: we compile with C++17 which doesn't
// support trigraphs.
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
*ss << ch;
} else {
// All other characters are blindly output as octal.
const char c0 = '0' + ((ch & 0700) >> 6);
const char c1 = '0' + ((ch & 0070) >> 3);
const char c2 = '0' + (ch & 7);
*ss << "\\" << c0 << c1 << c2;
}
if (i % 64 == 63) {
// Go to a newline every 64 bytes since many text editors have
// problems with very long lines.
*ss << "\"\n\"";
}
}
}

static void WriteStaticCodeCacheDataAsArray(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
WriteVector(ss, info.data.data, info.data.length);
*ss << "};";
*ss << "};\n";
}

static void WriteStaticCodeCacheDataAsStringLiteral(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
<< "= reinterpret_cast<const uint8_t *>(\"";
WriteDataAsCharString(ss, info.data.data, info.data.length);
*ss << "\");\n";
}

static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) {
static void WriteCodeCacheInitializer(std::ostream* ss,
const std::string& id,
size_t size) {
std::string def_name = GetCodeCacheDefName(id);
*ss << " { \"" << id << "\",\n";
*ss << " {" << def_name << ",\n";
*ss << " arraysize(" << def_name << "),\n";
*ss << " " << size << ",\n";
*ss << " }\n";
*ss << " },\n";
}

void FormatBlob(std::ostream& ss, const SnapshotData* data) {
void FormatBlob(std::ostream& ss,
const SnapshotData* data,
const bool use_string_literals) {
ss << R"(#include <cstddef>
#include "env.h"
#include "node_snapshot_builder.h"
Expand All @@ -767,21 +805,32 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
// This file is generated by tools/snapshot. Do not edit.
namespace node {
static const char v8_snapshot_blob_data[] = {
)";
WriteVector(&ss,
data->v8_snapshot_blob_data.data,
data->v8_snapshot_blob_data.raw_size);
ss << R"(};
static const int v8_snapshot_blob_size = )"
if (use_string_literals) {
ss << R"(static const char *v8_snapshot_blob_data = ")";
WriteDataAsCharString(
&ss,
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
data->v8_snapshot_blob_data.raw_size);
ss << R"(";)";
} else {
ss << R"(static const char v8_snapshot_blob_data[] = {)";
WriteVector(&ss,
data->v8_snapshot_blob_data.data,
data->v8_snapshot_blob_data.raw_size);
ss << R"(};)";
}
ss << R"(static const int v8_snapshot_blob_size = )"
<< data->v8_snapshot_blob_data.raw_size << ";";

// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
for (const auto& item : data->code_cache) {
WriteStaticCodeCacheData(&ss, item);
if (use_string_literals) {
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
} else {
// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
WriteStaticCodeCacheDataAsArray(&ss, item);
}
}

ss << R"(const SnapshotData snapshot_data {
Expand All @@ -808,7 +857,7 @@ static const int v8_snapshot_blob_size = )"
// -- code_cache begins --
{)";
for (const auto& item : data->code_cache) {
WriteCodeCacheInitializer(&ss, item.id);
WriteCodeCacheInitializer(&ss, item.id, item.data.length);
}
ss << R"(
}
Expand Down Expand Up @@ -1022,13 +1071,14 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,

ExitCode SnapshotBuilder::Generate(std::ostream& out,
const std::vector<std::string> args,
const std::vector<std::string> exec_args) {
const std::vector<std::string> exec_args,
const bool use_string_literals) {
SnapshotData data;
ExitCode exit_code = Generate(&data, args, exec_args);
if (exit_code != ExitCode::kNoFailure) {
return exit_code;
}
FormatBlob(out, &data);
FormatBlob(out, &data, use_string_literals);
return exit_code;
}

Expand Down
16 changes: 12 additions & 4 deletions tools/snapshot/node_mksnapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,23 @@ int main(int argc, char* argv[]) {

int BuildSnapshot(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <path/to/output.cc>\n";
std::cerr << "Usage: " << argv[0]
<< " [--use-string-literals] <path/to/output.cc>\n";
std::cerr << " " << argv[0] << " --build-snapshot "
<< "<path/to/script.js> <path/to/output.cc>\n";
return 1;
}

std::vector<std::string> args{argv, argv + argc};
size_t size_before_remove = args.size();
args.erase(std::remove(args.begin(), args.end(), "--use-string-literals"),
args.end());
// If the size of args changed, we must have removed a
// "--use-string-literals".
const bool use_string_literals = args.size() != size_before_remove;

std::unique_ptr<node::InitializationResult> result =
node::InitializeOncePerProcess(
std::vector<std::string>(argv, argv + argc));
node::InitializeOncePerProcess(args);

CHECK(!result->early_return());
CHECK_EQ(result->exit_code(), 0);
Expand All @@ -87,7 +95,7 @@ int BuildSnapshot(int argc, char* argv[]) {
node::ExitCode exit_code = node::ExitCode::kNoFailure;
{
exit_code = node::SnapshotBuilder::Generate(
out, result->args(), result->exec_args());
out, result->args(), result->exec_args(), use_string_literals);
if (exit_code == node::ExitCode::kNoFailure) {
if (!out) {
std::cerr << "Failed to write " << out_path << "\n";
Expand Down

0 comments on commit b6567e7

Please sign in to comment.