Skip to content

Commit

Permalink
build: speed up compilation of mksnapshot output
Browse files Browse the repository at this point in the history
Incremental compilation of Node.js is slow. Currently on a powerful
Linux machine, it takes about 5.8 seconds to compile
`gen/node_snapshot.cc` with g++.

As in the previous PR which dealt with `node_js2c`, we add a new build
define `NODE_MKSNAPSHOT_USE_STRING_LITERALS` which is used by
`node_mksnapshot`. When this flag is set, we emit string literals
instead of array literals for the snapshot blob and for the code cache,
i.e.:

```c++
// old: static const uint8_t X[] = { ... };
static const uint8_t *X = "...";
```

I only enabled the new flag on Linux/macOS, since those are systems that
I have available for testing. On my Linux system with gcc, it speeds up
compilation of this file by 3.7s (5.8s -> 2.1s). On my Mac system with
clang, it speeds up compilation by 1.7s (3.4s -> 1.7s).

Again, the right thing here is probably to generate separate files for
the snapshot blob and for each code cache output, but this is a nice
intermediate speedup.

Refs: nodejs#47984
Refs: nodejs#48160
  • Loading branch information
kvakil committed May 25, 2023
1 parent 817c579 commit 3836ab5
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 13 deletions.
3 changes: 3 additions & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,9 @@
'node_target_type=="executable"', {
'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ],
}],
['OS in "linux mac"', {
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS' ],
}],
[ 'use_openssl_def==1', {
# TODO(bnoordhuis) Make all platforms export the same list of symbols.
# Teach mkssldef.py to generate linker maps that UNIX linkers understand.
Expand Down
77 changes: 64 additions & 13 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -742,18 +742,56 @@ static std::string FormatSize(size_t size) {
return buf;
}

static void WriteStaticCodeCacheData(std::ostream* ss,
const builtins::CodeCacheInfo& info) {
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
static void WriteDataAsCharString(std::ostream* ss,
const uint8_t* data,
size_t length) {
for (size_t i = 0; i < length; i++) {
const uint8_t ch = data[i];
// We can print most printable characters directly. The exceptions are '\'
// (escape characters), " (would end the string), and ? (trigraphs). The
// latter may be overly conservative: we compile with C++17 which doesn't
// support trigraphs.
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
*ss << ch;
} else {
// All other characters are blindly output as octal.
const char c0 = '0' + ((ch >> 6) & 7);
const char c1 = '0' + ((ch >> 3) & 7);
const char c2 = '0' + (ch & 7);
*ss << "\\" << c0 << c1 << c2;
}
if (i % 64 == 63) {
// Go to a newline every 64 bytes since many text editors have
// problems with very long lines.
*ss << "\"\n\"";
}
}
}

static void WriteStaticCodeCacheDataAsStringLiteral(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
<< "= reinterpret_cast<const uint8_t *>(\"";
WriteDataAsCharString(ss, info.data.data, info.data.length);
*ss << "\");\n";
}
#else
static void WriteStaticCodeCacheDataAsArray(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
WriteVector(ss, info.data.data, info.data.length);
*ss << "};";
*ss << "};\n";
}
#endif

static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) {
static void WriteCodeCacheInitializer(std::ostream* ss,
const std::string& id,
size_t size) {
std::string def_name = GetCodeCacheDefName(id);
*ss << " { \"" << id << "\",\n";
*ss << " {" << def_name << ",\n";
*ss << " arraysize(" << def_name << "),\n";
*ss << " " << size << ",\n";
*ss << " }\n";
*ss << " },\n";
}
Expand All @@ -767,21 +805,34 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
// This file is generated by tools/snapshot. Do not edit.
namespace node {
static const char v8_snapshot_blob_data[] = {
)";

#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
ss << R"(static const char *v8_snapshot_blob_data = ")";
WriteDataAsCharString(
&ss,
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
data->v8_snapshot_blob_data.raw_size);
ss << R"(";)";
#else
ss << R"(static const char v8_snapshot_blob_data[] = {)";
WriteVector(&ss,
data->v8_snapshot_blob_data.data,
data->v8_snapshot_blob_data.raw_size);
ss << R"(};
ss << R"(};)";
#endif

static const int v8_snapshot_blob_size = )"
ss << R"(static const int v8_snapshot_blob_size = )"
<< data->v8_snapshot_blob_data.raw_size << ";";

// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
for (const auto& item : data->code_cache) {
WriteStaticCodeCacheData(&ss, item);
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
#else
// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
WriteStaticCodeCacheDataAsArray(&ss, item);
#endif
}

ss << R"(const SnapshotData snapshot_data {
Expand All @@ -808,7 +859,7 @@ static const int v8_snapshot_blob_size = )"
// -- code_cache begins --
{)";
for (const auto& item : data->code_cache) {
WriteCodeCacheInitializer(&ss, item.id);
WriteCodeCacheInitializer(&ss, item.id, item.data.length);
}
ss << R"(
}
Expand Down

0 comments on commit 3836ab5

Please sign in to comment.