From 6a01d39120fb990f36d2e8a59a815e29a46356a3 Mon Sep 17 00:00:00 2001 From: Michael Dawson Date: Tue, 23 Aug 2022 22:41:29 -0400 Subject: [PATCH] src: add support for externally shared js builtins Refs: /~https://github.com/nodejs/node/issues/44000 - add infra to support externally shared js builtins in support of distos that want to externalize deps that include JS/WASM instead of native code - add support for externalizing - cjs_module_lexer/lexer - cjs_module_lexer/dist/lexer - undici/undici Signed-off-by: Michael Dawson PR-URL: /~https://github.com/nodejs/node/pull/44376 Backport-PR-URL: /~https://github.com/nodejs/node/pull/45867 Reviewed-By: Gireesh Punathil --- BUILDING.md | 38 +++++++ configure.py | 31 ++++++ doc/contributing/maintaining-dependencies.md | 106 +++++++++++++++++++ node.gyp | 4 +- src/node_builtins.cc | 42 ++++++++ src/node_builtins.h | 11 ++ 6 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 doc/contributing/maintaining-dependencies.md diff --git a/BUILDING.md b/BUILDING.md index 8ddd996e9fcfb0..19a77f2049d96b 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -1062,6 +1062,44 @@ To make `./myModule.js` available via `require('myModule')` and > .\vcbuild link-module './myModule.js' link-module './myModule2.js' ``` +## Building to use shared dependencies at runtime + +By default Node.js is built so that all dependencies are bundled into +the Node.js binary itself. This provides a single binary that includes +the correct versions of all dependencies on which it depends. + +Some Node.js distributions, however, prefer to manage dependencies. +A number of `configure` options are provided to support this use case. + +* For dependencies with native code, the first set of options allow + Node.js to be built so that it uses a shared library + at runtime instead of building and including the dependency + in the Node.js binary itself. These options are in the + `Shared libraries` section of the `configure` help + (run `./configure --help` to get the complete list). + They provide the ability to enable the use of a shared library, + to set the name of the shared library, and to set the paths that + contain the include and shared library files. + +* For dependencies with JavaScript code (including WASM), the second + set of options allow the Node.js binary to be built so that it loads + the JavaScript for dependencies at runtime instead of being built into + the Node.js binary itself. These options are in the `Shared builtins` + section of the `configure` help + (run `./configure --help` to get the complete list). They + provide the ability to set the path to an external JavaScript file + for the dependency to be used at runtime. + +It is the responsibility of any distribution +shipping with these options to: + +* ensure that the shared dependencies available at runtime + match what is expected by the Node.js binary. A + mismatch may result in crashes or unexpected behavior. +* fully test that Node.js operates as expected with the + external dependencies. There may be little or no test coverage + within the Node.js project CI for these non-default options. + ## Note for downstream distributors of Node.js The Node.js ecosystem is reliant on ABI compatibility within a major release. diff --git a/configure.py b/configure.py index 1d0a1741ce4e37..ce41bff4d3f6bb 100755 --- a/configure.py +++ b/configure.py @@ -57,6 +57,11 @@ with open ('tools/icu/icu_versions.json') as f: icu_versions = json.load(f) +shareable_builtins = {'cjs_module_lexer/lexer': 'deps/cjs-module-lexer/lexer.js', + 'cjs_module_lexer/dist/lexer': 'deps/cjs-module-lexer/dist/lexer.js', + 'undici/undici': 'deps/undici/undici.js' +} + # create option groups shared_optgroup = parser.add_argument_group("Shared libraries", "Flags that allows you to control whether you want to build against " @@ -70,6 +75,9 @@ "library you want to build against.") http2_optgroup = parser.add_argument_group("HTTP2", "Flags that allows you to control HTTP2 features in Node.js") +shared_builtin_optgroup = parser.add_argument_group("Shared builtins", + "Flags that allows you to control whether you want to build against " + "internal builtins or shared files.") # Options should be in alphabetical order but keep --prefix at the top, # that's arguably the one people will be looking for most. @@ -427,6 +435,16 @@ parser.add_argument_group(shared_optgroup) +for builtin in shareable_builtins: + builtin_id = 'shared_builtin_' + builtin + '_path' + shared_builtin_optgroup.add_argument('--shared-builtin-' + builtin + '-path', + action='store', + dest='node_shared_builtin_' + builtin.replace('/', '_') + '_path', + help='Path to shared file for ' + builtin + ' builtin. ' + 'Will be used instead of bundled version at runtime') + +parser.add_argument_group(shared_builtin_optgroup) + static_optgroup.add_argument('--static-zoslib-gyp', action='store', dest='static_zoslib_gyp', @@ -2006,6 +2024,19 @@ def make_bin_override(): configure_inspector(output) configure_section_file(output) +# configure shareable builtins +output['variables']['node_builtin_shareable_builtins'] = [] +for builtin in shareable_builtins: + builtin_id = 'node_shared_builtin_' + builtin.replace('/', '_') + '_path' + if getattr(options, builtin_id): + if options.with_intl == 'none': + option_name = '--shared-builtin-' + builtin + '-path' + error(option_name + ' is incompatible with --with-intl=none' ) + else: + output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)] + else: + output['variables']['node_builtin_shareable_builtins'] += [shareable_builtins[builtin]] + # Forward OSS-Fuzz settings output['variables']['ossfuzz'] = b(options.ossfuzz) diff --git a/doc/contributing/maintaining-dependencies.md b/doc/contributing/maintaining-dependencies.md new file mode 100644 index 00000000000000..2c057320b4819b --- /dev/null +++ b/doc/contributing/maintaining-dependencies.md @@ -0,0 +1,106 @@ +# Maintaining Dependencies + +Node.js depends on additional components beyond the Node.js code +itself. These dependencies provide both native and JavaScript code +and are built together with the code under the `src` and `lib` +directories to create the Node.js binaries. + +All dependencies are located within the `deps` directory. + +Any code which meets one or more of these conditions should +be managed as a dependency: + +* originates in an upstream project and is maintained + in that upstream project. +* is not built from the `preferred form of the work for + making modifications to it` (see + [GNU GPL v2, section 3.](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) + when `make node` is run. A good example is + WASM code generated from C (the preferred form). + Typically generation is only supported on a subset of platforms, needs + additional tools, and is pre-built outside of the `make node` + step and then committed as a WASM binary in the directory + for the dependency under the `deps` directory. + +By default all dependencies are bundled into the Node.js +binary, however, `configure` options should be available to +use an externalized version at runtime when: + +* the dependency provides native code and is available as + a shared library in one or more of the common Node.js + distributions. +* the dependency provides JavaScript and is not built + from the `preferred form of the work for making modifications + to it` when `make node` is run. + +Many distributions use externalized dependencies for one or +more of these reasons: + +1. They have a requirement to build everything that they ship + from the `preferred form of the work for making + modifications to it`. This means that they need to + replace any pre-built components (for example WASM + binaries) with an equivalent that they have built. +2. They manage the dependency separately as it is used by + more applications than just Node.js. Linking against + a shared library allows them to manage updates and + CVE fixes against the library instead of having to + patch all of the individual applications. +3. They have a system wide configuration for the + dependency that all applications should respect. + +## Supporting externalized dependencies with native code. + +Support for externalized dependencies with native code for which a +shared library is available can added by: + +* adding options to `configure.py`. These are added to the + shared\_optgroup and include an options to: + * enable use of a shared library + * set the name of the shared library + * set the path to the directory with the includes for the + shared library + * set the path to where to find the shared library at + runtime +* add a call to configure\_library() to `configure.py` for the + library at the end of list of existing configure\_library() calls. + If there are additional libraries that are required it is + possible to list more than one with the `pkgname` option. +* in `node.gypi` guard the build for the dependency + with `node_shared_depname` so that is is only built if + the dependency is being bundled into Node.js itself. For example: + +```text + [ 'node_shared_brotli=="false"', { + 'dependencies': [ 'deps/brotli/brotli.gyp:brotli' ], + }], +``` + +## Supporting externalizable dependencies with JavaScript codeIZA + +Support for an externalizable dependency with JavaScript code +can be added by: + +* adding an entry to the `sharable_builtins` map in + `configure.py`. The path should correspond to the file + within the deps directory that is normally bundled into + Node.js. For example `deps/cjs-module-lexer/lexer.js`. + This will add a new option for building with that dependency + externalized. After adding the entry you can see + the new option by running `./configure --help`. + +* adding a call to `AddExternalizedBuiltin` to the constructor + for BuildinLoader in `src/node_builtins.cc` for the + dependency using the `NODE_SHARED_BUILTLIN` #define generated for + the dependency. After running `./configure` with the new + option you can find the #define in `config.gypi`. You can cut and + paste one of the existing entries and then update to match the + inport name for the dependency and the #define generated. + +## Supporting non-externalized dependencies with JavaScript code + +If the dependency consists of JavaScript in the +`preferred form of the work for making modifications to it`, it +can be added as a non-externalizable dependency. In this case +simply add the path to the JavaScript file in the `deps_files` +list in the `node.gyp` file. diff --git a/node.gyp b/node.gyp index 5a91d76f458d5d..de409a31f388fc 100644 --- a/node.gyp +++ b/node.gyp @@ -50,9 +50,7 @@ 'deps/v8/tools/tickprocessor-driver.mjs', 'deps/acorn/acorn/dist/acorn.js', 'deps/acorn/acorn-walk/dist/walk.js', - 'deps/cjs-module-lexer/lexer.js', - 'deps/cjs-module-lexer/dist/lexer.js', - 'deps/undici/undici.js', + '<@(node_builtin_shareable_builtins)', ], 'node_mksnapshot_exec': '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)node_mksnapshot<(EXECUTABLE_SUFFIX)', 'mkcodecache_exec': '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)mkcodecache<(EXECUTABLE_SUFFIX)', diff --git a/src/node_builtins.cc b/src/node_builtins.cc index 9343382e0253cd..a519b6d03eb54c 100644 --- a/src/node_builtins.cc +++ b/src/node_builtins.cc @@ -1,4 +1,5 @@ #include "node_builtins.h" +#include "node_builtins_env.h" #include "debug_utils-inl.h" #include "env-inl.h" #include "node_external_reference.h" @@ -23,6 +24,24 @@ BuiltinLoader BuiltinLoader::instance_; BuiltinLoader::BuiltinLoader() : config_(GetConfig()) { LoadJavaScriptSource(); +#if defined(NODE_HAVE_I18N_SUPPORT) +#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH + AddExternalizedBuiltin( + "internal/deps/cjs-module-lexer/lexer", + STRINGIFY(NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH)); +#endif // NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH + +#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_DIST_LEXER_PATH + AddExternalizedBuiltin( + "internal/deps/cjs-module-lexer/dist/lexer", + STRINGIFY(NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_DIST_LEXER_PATH)); +#endif // NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_DIST_LEXER_PATH + +#ifdef NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH + AddExternalizedBuiltin("internal/deps/undici/undici", + STRINGIFY(NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH)); +#endif // NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH +#endif // NODE_HAVE_I18N_SUPPORT } BuiltinLoader* BuiltinLoader::GetInstance() { @@ -236,6 +255,29 @@ MaybeLocal BuiltinLoader::LoadBuiltinSource(Isolate* isolate, #endif // NODE_BUILTIN_MODULES_PATH } +#if defined(NODE_HAVE_I18N_SUPPORT) +void BuiltinLoader::AddExternalizedBuiltin(const char* id, + const char* filename) { + std::string source; + int r = ReadFileSync(&source, filename); + if (r != 0) { + fprintf( + stderr, "Cannot load externalized builtin: \"%s:%s\".\n", id, filename); + ABORT(); + return; + } + + icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8( + icu::StringPiece(source.data(), source.length())); + auto source_utf16 = std::make_unique(utf16); + BuiltinEnv::Add(id, + UnionBytes(reinterpret_cast((*source_utf16).getBuffer()), + utf16.length())); + // keep source bytes for builtin alive while BuiltinLoader exists + GetInstance()->externalized_source_bytes_.push_back(std::move(source_utf16)); +} +#endif // NODE_HAVE_I18N_SUPPORT + // Returns Local of the compiled module if return_code_cache // is false (we are only compiling the function). // Otherwise return a Local containing the cache. diff --git a/src/node_builtins.h b/src/node_builtins.h index 21bcfe032764f5..914c6ea6653ba2 100644 --- a/src/node_builtins.h +++ b/src/node_builtins.h @@ -3,10 +3,14 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS +#include #include #include #include #include +#if defined(NODE_HAVE_I18N_SUPPORT) +#include +#endif // NODE_HAVE_I18N_SUPPORT #include "node_mutex.h" #include "node_union_bytes.h" #include "v8.h" @@ -76,11 +80,18 @@ class NODE_EXTERN_PRIVATE BuiltinLoader { const char* id, Result* result); +#if defined(NODE_HAVE_I18N_SUPPORT) + static void AddExternalizedBuiltin(const char* id, const char* filename); +#endif // NODE_HAVE_I18N_SUPPORT + static BuiltinLoader instance_; BuiltinCategories builtin_categories_; BuiltinSourceMap source_; BuiltinCodeCacheMap code_cache_; UnionBytes config_; +#if defined(NODE_HAVE_I18N_SUPPORT) + std::list> externalized_source_bytes_; +#endif // NODE_HAVE_I18N_SUPPORT // Used to synchronize access to the code cache map Mutex code_cache_mutex_;