From 287dada1010440f7a8044956858fab848110378e Mon Sep 17 00:00:00 2001 From: Keyhan Vakil Date: Thu, 1 Jun 2023 08:41:52 -0700 Subject: [PATCH] build: speed up compilation of mksnapshot output Incremental compilation of Node.js is slow. Currently on a powerful Linux machine, it takes about 5.8 seconds to compile `gen/node_snapshot.cc` with g++. As in the previous PR which dealt with `node_js2c`, we add a new build define `NODE_MKSNAPSHOT_USE_STRING_LITERALS` which is used by `node_mksnapshot`. When this flag is set, we emit string literals instead of array literals for the snapshot blob and for the code cache, i.e.: ```c++ // old: static const uint8_t X[] = { ... }; static const uint8_t *X = "..."; ``` I only enabled the new flag on Linux/macOS, since those are systems that I have available for testing. On my Linux system with gcc, it speeds up compilation of this file by 3.7s (5.8s -> 2.1s). On my Mac system with clang, it speeds up compilation by 1.7s (3.4s -> 1.7s). Again, the right thing here is probably to generate separate files for the snapshot blob and for each code cache output, but this is a nice intermediate speedup. Refs: /~https://github.com/nodejs/node/issues/47984 Refs: /~https://github.com/nodejs/node/pull/48160 PR-URL: /~https://github.com/nodejs/node/pull/48162 Reviewed-By: Yagiz Nizipli Reviewed-By: Joyee Cheung --- node.gyp | 3 ++ src/node_snapshotable.cc | 77 +++++++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/node.gyp b/node.gyp index 0db48c18dc1186..a992981b77b94e 100644 --- a/node.gyp +++ b/node.gyp @@ -880,6 +880,9 @@ 'node_target_type=="executable"', { 'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ], }], + ['OS in "linux mac"', { + 'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS' ], + }], [ 'use_openssl_def==1', { # TODO(bnoordhuis) Make all platforms export the same list of symbols. # Teach mkssldef.py to generate linker maps that UNIX linkers understand. diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 9d27a7c66b2aa2..55933e3bbef2f3 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -742,18 +742,56 @@ static std::string FormatSize(size_t size) { return buf; } -static void WriteStaticCodeCacheData(std::ostream* ss, - const builtins::CodeCacheInfo& info) { +#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS +static void WriteDataAsCharString(std::ostream* ss, + const uint8_t* data, + size_t length) { + for (size_t i = 0; i < length; i++) { + const uint8_t ch = data[i]; + // We can print most printable characters directly. The exceptions are '\' + // (escape characters), " (would end the string), and ? (trigraphs). The + // latter may be overly conservative: we compile with C++17 which doesn't + // support trigraphs. + if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') { + *ss << ch; + } else { + // All other characters are blindly output as octal. + const char c0 = '0' + ((ch >> 6) & 7); + const char c1 = '0' + ((ch >> 3) & 7); + const char c2 = '0' + (ch & 7); + *ss << "\\" << c0 << c1 << c2; + } + if (i % 64 == 63) { + // Go to a newline every 64 bytes since many text editors have + // problems with very long lines. + *ss << "\"\n\""; + } + } +} + +static void WriteStaticCodeCacheDataAsStringLiteral( + std::ostream* ss, const builtins::CodeCacheInfo& info) { + *ss << "static const uint8_t *" << GetCodeCacheDefName(info.id) + << "= reinterpret_cast(\""; + WriteDataAsCharString(ss, info.data.data, info.data.length); + *ss << "\");\n"; +} +#else +static void WriteStaticCodeCacheDataAsArray( + std::ostream* ss, const builtins::CodeCacheInfo& info) { *ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n"; WriteVector(ss, info.data.data, info.data.length); - *ss << "};"; + *ss << "};\n"; } +#endif -static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) { +static void WriteCodeCacheInitializer(std::ostream* ss, + const std::string& id, + size_t size) { std::string def_name = GetCodeCacheDefName(id); *ss << " { \"" << id << "\",\n"; *ss << " {" << def_name << ",\n"; - *ss << " arraysize(" << def_name << "),\n"; + *ss << " " << size << ",\n"; *ss << " }\n"; *ss << " },\n"; } @@ -767,21 +805,34 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) { // This file is generated by tools/snapshot. Do not edit. namespace node { - -static const char v8_snapshot_blob_data[] = { )"; + +#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS + ss << R"(static const char *v8_snapshot_blob_data = ")"; + WriteDataAsCharString( + &ss, + reinterpret_cast(data->v8_snapshot_blob_data.data), + data->v8_snapshot_blob_data.raw_size); + ss << R"(";)"; +#else + ss << R"(static const char v8_snapshot_blob_data[] = {)"; WriteVector(&ss, data->v8_snapshot_blob_data.data, data->v8_snapshot_blob_data.raw_size); - ss << R"(}; + ss << R"(};)"; +#endif -static const int v8_snapshot_blob_size = )" + ss << R"(static const int v8_snapshot_blob_size = )" << data->v8_snapshot_blob_data.raw_size << ";"; - // Windows can't deal with too many large vector initializers. - // Store the data into static arrays first. for (const auto& item : data->code_cache) { - WriteStaticCodeCacheData(&ss, item); +#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS + WriteStaticCodeCacheDataAsStringLiteral(&ss, item); +#else + // Windows can't deal with too many large vector initializers. + // Store the data into static arrays first. + WriteStaticCodeCacheDataAsArray(&ss, item); +#endif } ss << R"(const SnapshotData snapshot_data { @@ -808,7 +859,7 @@ static const int v8_snapshot_blob_size = )" // -- code_cache begins -- {)"; for (const auto& item : data->code_cache) { - WriteCodeCacheInitializer(&ss, item.id); + WriteCodeCacheInitializer(&ss, item.id, item.data.length); } ss << R"( }