From 0d8776407777843bc7f642c19503784bbe799ecf Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 21 Jul 2023 00:57:00 +0200 Subject: [PATCH] src: support snapshot in single executable applications This patch adds snapshot support to single executable applications. To build a snapshot from the main script when preparing the blob that will be injected into the single executable application, add `"useSnapshot": true` to the configuration passed to `--experimental-sea-config`. For example: ``` { "main": "snapshot.js", "output": "sea-prep.blob", "useSnapshot": true } ``` The main script used to build the snapshot must invoke `v8.startupSnapshot.setDeserializeMainFunction()` to configure the entry point. The generated startup snapshot would be part of the preparation blob and get injected into the final executable. When the single executable application is launched, instead of running the `main` script from scratch, Node.js would instead deserialize the snapshot to get to the state initialized during build-time directly. PR-URL: https://github.com/nodejs/node/pull/46824 Refs: https://github.com/nodejs/single-executable/discussions/57 Reviewed-By: Benjamin Gruenbaum Reviewed-By: Darshan Sen --- doc/api/single-executable-applications.md | 41 ++++++- lib/internal/main/mksnapshot.js | 9 ++ lib/internal/process/pre_execution.js | 2 +- src/node.cc | 83 +++++++++---- src/node_main_instance.cc | 10 +- src/node_sea.cc | 112 ++++++++++++++++-- src/node_sea.h | 11 +- src/node_snapshotable.cc | 7 +- ...-single-executable-application-snapshot.js | 89 ++++++++++++++ 9 files changed, 322 insertions(+), 42 deletions(-) create mode 100644 test/sequential/test-single-executable-application-snapshot.js diff --git a/doc/api/single-executable-applications.md b/doc/api/single-executable-applications.md index 2db06fdd7239e1..607158643cbc3c 100644 --- a/doc/api/single-executable-applications.md +++ b/doc/api/single-executable-applications.md @@ -6,6 +6,10 @@ added: - v19.7.0 - v18.16.0 +changes: + - version: REPLACEME + pr-url: https://github.com/nodejs/node/pull/46824 + description: Added support for "useSnapshot". --> > Stability: 1 - Experimental: This feature is being designed and will change. @@ -169,7 +173,8 @@ The configuration currently reads the following top-level fields: { "main": "/path/to/bundled/script.js", "output": "/path/to/write/the/generated/blob.blob", - "disableExperimentalSEAWarning": true // Default: false + "disableExperimentalSEAWarning": true, // Default: false + "useSnapshot": false // Default: false } ``` @@ -177,6 +182,37 @@ If the paths are not absolute, Node.js will use the path relative to the current working directory. The version of the Node.js binary used to produce the blob must be the same as the one to which the blob will be injected. +### Startup snapshot support + +The `useSnapshot` field can be used to enable startup snapshot support. In this +case the `main` script would not be when the final executable is launched. +Instead, it would be run when the single executable application preparation +blob is generated on the building machine. The generated preparation blob would +then include a snapshot capturing the states initialized by the `main` script. +The final executable with the preparation blob injected would deserialize +the snapshot at run time. + +When `useSnapshot` is true, the main script must invoke the +[`v8.startupSnapshot.setDeserializeMainFunction()`][] API to configure code +that needs to be run when the final executable is launched by the users. + +The typical pattern for an application to use snapshot in a single executable +application is: + +1. At build time, on the building machine, the main script is run to + initialize the heap to a state that's ready to take user input. The script + should also configure a main function with + [`v8.startupSnapshot.setDeserializeMainFunction()`][]. This function will be + compiled and serialized into the snapshot, but not invoked at build time. +2. At run time, the main function will be run on top of the deserialized heap + on the user machine to process user input and generate output. + +The general constraints of the startup snapshot scripts also apply to the main +script when it's used to build snapshot for the single executable application, +and the main script can use the [`v8.startupSnapshot` API][] to adapt to +these constraints. See +[documentation about startup snapshot support in Node.js][]. + ## Notes ### `require(id)` in the injected module is not file based @@ -249,6 +285,9 @@ to help us document them. [`process.execPath`]: process.md#processexecpath [`require()`]: modules.md#requireid [`require.main`]: modules.md#accessing-the-main-module +[`v8.startupSnapshot.setDeserializeMainFunction()`]: v8.md#v8startupsnapshotsetdeserializemainfunctioncallback-data +[`v8.startupSnapshot` API]: v8.md#startup-snapshot-api +[documentation about startup snapshot support in Node.js]: cli.md#--build-snapshot [fuse]: https://www.electronjs.org/docs/latest/tutorial/fuses [postject]: https://github.com/nodejs/postject [signtool]: https://learn.microsoft.com/en-us/windows/win32/seccrypto/signtool diff --git a/lib/internal/main/mksnapshot.js b/lib/internal/main/mksnapshot.js index 2207d9253d7ec4..52d859d491a93f 100644 --- a/lib/internal/main/mksnapshot.js +++ b/lib/internal/main/mksnapshot.js @@ -16,6 +16,10 @@ const { anonymousMainPath, } = internalBinding('mksnapshot'); +const { isExperimentalSeaWarningNeeded } = internalBinding('sea'); + +const { emitExperimentalWarning } = require('internal/util'); + const { getOptionValue, } = require('internal/options'); @@ -126,6 +130,7 @@ function requireForUserSnapshot(id) { return require(normalizedId); } + function main() { prepareMainThreadExecution(true, false); initializeCallbacks(); @@ -167,6 +172,10 @@ function main() { const serializeMainArgs = [process, requireForUserSnapshot, minimalRunCjs]; + if (isExperimentalSeaWarningNeeded()) { + emitExperimentalWarning('Single executable application'); + } + if (getOptionValue('--inspect-brk')) { internalBinding('inspector').callAndPauseOnStart( runEmbedderEntryPoint, undefined, ...serializeMainArgs); diff --git a/lib/internal/process/pre_execution.js b/lib/internal/process/pre_execution.js index 54258c572c07f6..4286f9cb0e5038 100644 --- a/lib/internal/process/pre_execution.js +++ b/lib/internal/process/pre_execution.js @@ -174,7 +174,7 @@ function patchProcessObject(expandArgv1) { __proto__: null, enumerable: true, // Only set it to true during snapshot building. - configurable: getOptionValue('--build-snapshot'), + configurable: isBuildingSnapshot(), value: process.argv[0], }); diff --git a/src/node.cc b/src/node.cc index f483e59dd155a8..7ca3e14ee06c3a 100644 --- a/src/node.cc +++ b/src/node.cc @@ -292,6 +292,17 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { CHECK(!env->isolate_data()->is_building_snapshot()); +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) { + sea::SeaResource sea = sea::FindSingleExecutableResource(); + // The SEA preparation blob building process should already enforce this, + // this check is just here to guard against the unlikely case where + // the SEA preparation blob has been manually modified by someone. + CHECK_IMPLIES(sea.use_snapshot(), + !env->snapshot_deserialize_main().IsEmpty()); + } +#endif + // TODO(joyeecheung): move these conditions into JS land and let the // deserialize main function take precedence. For workers, we need to // move the pre-execution part into a different file that can be @@ -1198,49 +1209,66 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr, return exit_code; } -ExitCode LoadSnapshotDataAndRun(const SnapshotData** snapshot_data_ptr, - const InitializationResultImpl* result) { - ExitCode exit_code = result->exit_code_enum(); +bool LoadSnapshotData(const SnapshotData** snapshot_data_ptr) { // nullptr indicates there's no snapshot data. DCHECK_NULL(*snapshot_data_ptr); + + bool is_sea = false; +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) { + is_sea = true; + sea::SeaResource sea = sea::FindSingleExecutableResource(); + if (sea.use_snapshot()) { + std::unique_ptr read_data = + std::make_unique(); + std::string_view snapshot = sea.main_code_or_snapshot; + if (SnapshotData::FromBlob(read_data.get(), snapshot)) { + *snapshot_data_ptr = read_data.release(); + return true; + } else { + fprintf(stderr, "Invalid snapshot data in single executable binary\n"); + return false; + } + } + } +#endif + // --snapshot-blob indicates that we are reading a customized snapshot. - if (!per_process::cli_options->snapshot_blob.empty()) { + // Ignore it when we are loading from SEA. + if (!is_sea && !per_process::cli_options->snapshot_blob.empty()) { std::string filename = per_process::cli_options->snapshot_blob; FILE* fp = fopen(filename.c_str(), "rb"); if (fp == nullptr) { fprintf(stderr, "Cannot open %s", filename.c_str()); - exit_code = ExitCode::kStartupSnapshotFailure; - return exit_code; + return false; } std::unique_ptr read_data = std::make_unique(); bool ok = SnapshotData::FromFile(read_data.get(), fp); fclose(fp); if (!ok) { - // If we fail to read the customized snapshot, - // simply exit with kStartupSnapshotFailure. - exit_code = ExitCode::kStartupSnapshotFailure; - return exit_code; + return false; } *snapshot_data_ptr = read_data.release(); - } else if (per_process::cli_options->node_snapshot) { - // If --snapshot-blob is not specified, we are reading the embedded - // snapshot, but we will skip it if --no-node-snapshot is specified. + return true; + } + + if (per_process::cli_options->node_snapshot) { + // If --snapshot-blob is not specified or if the SEA contains no snapshot, + // we are reading the embedded snapshot, but we will skip it if + // --no-node-snapshot is specified. const node::SnapshotData* read_data = SnapshotBuilder::GetEmbeddedSnapshotData(); - if (read_data != nullptr && read_data->Check()) { + if (read_data != nullptr) { + if (!read_data->Check()) { + return false; + } // If we fail to read the embedded snapshot, treat it as if Node.js // was built without one. *snapshot_data_ptr = read_data; } } - NodeMainInstance main_instance(*snapshot_data_ptr, - uv_default_loop(), - per_process::v8_platform.Platform(), - result->args(), - result->exec_args()); - exit_code = main_instance.Run(); - return exit_code; + return true; } static ExitCode StartInternal(int argc, char** argv) { @@ -1275,7 +1303,8 @@ static ExitCode StartInternal(int argc, char** argv) { std::string sea_config = per_process::cli_options->experimental_sea_config; if (!sea_config.empty()) { - return sea::BuildSingleExecutableBlob(sea_config); + return sea::BuildSingleExecutableBlob( + sea_config, result->args(), result->exec_args()); } // --build-snapshot indicates that we are in snapshot building mode. @@ -1290,7 +1319,15 @@ static ExitCode StartInternal(int argc, char** argv) { } // Without --build-snapshot, we are in snapshot loading mode. - return LoadSnapshotDataAndRun(&snapshot_data, result.get()); + if (!LoadSnapshotData(&snapshot_data)) { + return ExitCode::kStartupSnapshotFailure; + } + NodeMainInstance main_instance(snapshot_data, + uv_default_loop(), + per_process::v8_platform.Platform(), + result->args(), + result->exec_args()); + return main_instance.Run(); } int Start(int argc, char** argv) { diff --git a/src/node_main_instance.cc b/src/node_main_instance.cc index 41e5bee353a579..2ef56f80dfc8f6 100644 --- a/src/node_main_instance.cc +++ b/src/node_main_instance.cc @@ -92,12 +92,16 @@ void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) { bool runs_sea_code = false; #ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION if (sea::IsSingleExecutable()) { - runs_sea_code = true; sea::SeaResource sea = sea::FindSingleExecutableResource(); - std::string_view code = sea.code; - LoadEnvironment(env, code); + if (!sea.use_snapshot()) { + runs_sea_code = true; + std::string_view code = sea.main_code_or_snapshot; + LoadEnvironment(env, code); + } } #endif + // Either there is already a snapshot main function from SEA, or it's not + // a SEA at all. if (!runs_sea_code) { LoadEnvironment(env, StartExecutionCallback{}); } diff --git a/src/node_sea.cc b/src/node_sea.cc index 88741a5fce9d48..b9eabef8196750 100644 --- a/src/node_sea.cc +++ b/src/node_sea.cc @@ -6,7 +6,9 @@ #include "json_parser.h" #include "node_external_reference.h" #include "node_internals.h" +#include "node_snapshot_builder.h" #include "node_union_bytes.h" +#include "node_v8_platform-inl.h" // The POSTJECT_SENTINEL_FUSE macro is a string of random characters selected by // the Node.js project that is present only once in the entire binary. It is @@ -64,7 +66,7 @@ class SeaSerializer : public BlobSerializer { template <> size_t SeaSerializer::Write(const SeaResource& sea) { - sink.reserve(SeaResource::kHeaderSize + sea.code.size()); + sink.reserve(SeaResource::kHeaderSize + sea.main_code_or_snapshot.size()); Debug("Write SEA magic %x\n", kMagic); size_t written_total = WriteArithmetic(kMagic); @@ -74,10 +76,14 @@ size_t SeaSerializer::Write(const SeaResource& sea) { written_total += WriteArithmetic(flags); DCHECK_EQ(written_total, SeaResource::kHeaderSize); - Debug("Write SEA resource code %p, size=%zu\n", - sea.code.data(), - sea.code.size()); - written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent); + Debug("Write SEA resource %s %p, size=%zu\n", + sea.use_snapshot() ? "snapshot" : "code", + sea.main_code_or_snapshot.data(), + sea.main_code_or_snapshot.size()); + written_total += + WriteStringView(sea.main_code_or_snapshot, + sea.use_snapshot() ? StringLogMode::kAddressOnly + : StringLogMode::kAddressAndContent); return written_total; } @@ -103,8 +109,15 @@ SeaResource SeaDeserializer::Read() { Debug("Read SEA flags %x\n", static_cast(flags)); CHECK_EQ(read_total, SeaResource::kHeaderSize); - std::string_view code = ReadStringView(StringLogMode::kAddressAndContent); - Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size()); + bool use_snapshot = static_cast(flags & SeaFlags::kUseSnapshot); + std::string_view code = + ReadStringView(use_snapshot ? StringLogMode::kAddressOnly + : StringLogMode::kAddressAndContent); + + Debug("Read SEA resource %s %p, size=%zu\n", + use_snapshot ? "snapshot" : "code", + code.data(), + code.size()); return {flags, code}; } @@ -133,6 +146,10 @@ std::string_view FindSingleExecutableBlob() { } // anonymous namespace +bool SeaResource::use_snapshot() const { + return static_cast(flags & SeaFlags::kUseSnapshot); +} + SeaResource FindSingleExecutableResource() { static const SeaResource sea_resource = []() -> SeaResource { std::string_view blob = FindSingleExecutableBlob(); @@ -151,6 +168,13 @@ bool IsSingleExecutable() { } void IsExperimentalSeaWarningNeeded(const FunctionCallbackInfo& args) { + bool is_building_sea = + !per_process::cli_options->experimental_sea_config.empty(); + if (is_building_sea) { + args.GetReturnValue().Set(true); + return; + } + if (!IsSingleExecutable()) { args.GetReturnValue().Set(false); return; @@ -235,10 +259,58 @@ std::optional ParseSingleExecutableConfig( result.flags |= SeaFlags::kDisableExperimentalSeaWarning; } + std::optional use_snapshot = parser.GetTopLevelBoolField("useSnapshot"); + if (!use_snapshot.has_value()) { + FPrintF( + stderr, "\"useSnapshot\" field of %s is not a Boolean\n", config_path); + return std::nullopt; + } + if (use_snapshot.value()) { + result.flags |= SeaFlags::kUseSnapshot; + } + return result; } -ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { +ExitCode GenerateSnapshotForSEA(const SeaConfig& config, + const std::vector& args, + const std::vector& exec_args, + const std::string& main_script, + std::vector* snapshot_blob) { + SnapshotData snapshot; + // TODO(joyeecheung): make the arguments configurable through the JSON + // config or a programmatic API. + std::vector patched_args = {args[0], config.main_path}; + ExitCode exit_code = SnapshotBuilder::Generate( + &snapshot, patched_args, exec_args, main_script); + if (exit_code != ExitCode::kNoFailure) { + return exit_code; + } + auto& persistents = snapshot.env_info.principal_realm.persistent_values; + auto it = std::find_if( + persistents.begin(), persistents.end(), [](const PropInfo& prop) { + return prop.name == "snapshot_deserialize_main"; + }); + if (it == persistents.end()) { + FPrintF( + stderr, + "%s does not invoke " + "v8.startupSnapshot.setDeserializeMainFunction(), which is required " + "for snapshot scripts used to build single executable applications." + "\n", + config.main_path); + return ExitCode::kGenericUserError; + } + // We need the temporary variable for copy elision. + std::vector temp = snapshot.ToBlob(); + *snapshot_blob = std::move(temp); + return ExitCode::kNoFailure; +} + +ExitCode GenerateSingleExecutableBlob( + const SeaConfig& config, + const std::vector& args, + const std::vector& exec_args) { std::string main_script; // TODO(joyeecheung): unify the file utils. int r = ReadFileSync(&main_script, config.main_path.c_str()); @@ -248,7 +320,22 @@ ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { return ExitCode::kGenericUserError; } - SeaResource sea{config.flags, main_script}; + std::vector snapshot_blob; + bool builds_snapshot_from_main = + static_cast(config.flags & SeaFlags::kUseSnapshot); + if (builds_snapshot_from_main) { + ExitCode exit_code = GenerateSnapshotForSEA( + config, args, exec_args, main_script, &snapshot_blob); + if (exit_code != ExitCode::kNoFailure) { + return exit_code; + } + } + + SeaResource sea{ + config.flags, + builds_snapshot_from_main + ? std::string_view{snapshot_blob.data(), snapshot_blob.size()} + : std::string_view{main_script.data(), main_script.size()}}; SeaSerializer serializer; serializer.Write(sea); @@ -269,11 +356,14 @@ ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { } // anonymous namespace -ExitCode BuildSingleExecutableBlob(const std::string& config_path) { +ExitCode BuildSingleExecutableBlob(const std::string& config_path, + const std::vector& args, + const std::vector& exec_args) { std::optional config_opt = ParseSingleExecutableConfig(config_path); if (config_opt.has_value()) { - ExitCode code = GenerateSingleExecutableBlob(config_opt.value()); + ExitCode code = + GenerateSingleExecutableBlob(config_opt.value(), args, exec_args); return code; } diff --git a/src/node_sea.h b/src/node_sea.h index 8b0877df3eb0d7..f9eb5ca79d4543 100644 --- a/src/node_sea.h +++ b/src/node_sea.h @@ -6,8 +6,10 @@ #if !defined(DISABLE_SINGLE_EXECUTABLE_APPLICATION) #include +#include #include #include +#include #include "node_exit_code.h" namespace node { @@ -21,19 +23,24 @@ const uint32_t kMagic = 0x143da20; enum class SeaFlags : uint32_t { kDefault = 0, kDisableExperimentalSeaWarning = 1 << 0, + kUseSnapshot = 1 << 1, }; struct SeaResource { SeaFlags flags = SeaFlags::kDefault; - std::string_view code; + std::string_view main_code_or_snapshot; + bool use_snapshot() const; static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags); }; bool IsSingleExecutable(); SeaResource FindSingleExecutableResource(); std::tuple FixupArgsForSEA(int argc, char** argv); -node::ExitCode BuildSingleExecutableBlob(const std::string& config_path); +node::ExitCode BuildSingleExecutableBlob( + const std::string& config_path, + const std::vector& args, + const std::vector& exec_args); } // namespace sea } // namespace node diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 59bcea00d3e7a6..797cd16d87c603 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -585,7 +585,9 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { // [ ... ] code_cache std::vector SnapshotData::ToBlob() const { + std::vector result; SnapshotSerializer w; + w.Debug("SnapshotData::ToBlob()\n"); size_t written_total = 0; @@ -603,7 +605,10 @@ std::vector SnapshotData::ToBlob() const { w.Debug("Write code_cache\n"); written_total += w.WriteVector(code_cache); w.Debug("SnapshotData::ToBlob() Wrote %d bytes\n", written_total); - return w.sink; + + // Return using the temporary value to enable copy elision. + std::swap(result, w.sink); + return result; } void SnapshotData::ToFile(FILE* out) const { diff --git a/test/sequential/test-single-executable-application-snapshot.js b/test/sequential/test-single-executable-application-snapshot.js new file mode 100644 index 00000000000000..d1c44b6dbab3b7 --- /dev/null +++ b/test/sequential/test-single-executable-application-snapshot.js @@ -0,0 +1,89 @@ +'use strict'; + +require('../common'); + +const { + injectAndCodeSign, + skipIfSingleExecutableIsNotSupported, +} = require('../common/sea'); + +skipIfSingleExecutableIsNotSupported(); + +// This tests the snapshot support in single executable applications. + +const tmpdir = require('../common/tmpdir'); +const { copyFileSync, writeFileSync, existsSync } = require('fs'); +const { spawnSync } = require('child_process'); +const { join } = require('path'); +const assert = require('assert'); + +const configFile = join(tmpdir.path, 'sea-config.json'); +const seaPrepBlob = join(tmpdir.path, 'sea-prep.blob'); +const outputFile = join(tmpdir.path, process.platform === 'win32' ? 'sea.exe' : 'sea'); + +{ + tmpdir.refresh(); + + writeFileSync(join(tmpdir.path, 'snapshot.js'), '', 'utf-8'); + writeFileSync(configFile, ` + { + "main": "snapshot.js", + "output": "sea-prep.blob", + "useSnapshot": true + } + `); + + const child = spawnSync( + process.execPath, + ['--experimental-sea-config', 'sea-config.json'], + { + cwd: tmpdir.path + }); + + assert.match( + child.stderr.toString(), + /snapshot\.js does not invoke v8\.startupSnapshot\.setDeserializeMainFunction\(\)/); +} + +{ + tmpdir.refresh(); + const code = ` + const { + setDeserializeMainFunction, + } = require('v8').startupSnapshot; + + setDeserializeMainFunction(() => { + console.log('Hello from snapshot'); + }); + `; + + writeFileSync(join(tmpdir.path, 'snapshot.js'), code, 'utf-8'); + writeFileSync(configFile, ` + { + "main": "snapshot.js", + "output": "sea-prep.blob", + "useSnapshot": true + } + `); + + let child = spawnSync( + process.execPath, + ['--experimental-sea-config', 'sea-config.json'], + { + cwd: tmpdir.path + }); + assert.match( + child.stderr.toString(), + /Single executable application is an experimental feature/); + + assert(existsSync(seaPrepBlob)); + + copyFileSync(process.execPath, outputFile); + injectAndCodeSign(outputFile, seaPrepBlob); + + child = spawnSync(outputFile); + assert.strictEqual(child.stdout.toString().trim(), 'Hello from snapshot'); + assert.doesNotMatch( + child.stderr.toString(), + /Single executable application is an experimental feature/); +}