From 776b3ee8c29555ad428d72617dbed83f58cbad86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Lorber?= Date: Tue, 22 Oct 2024 12:40:57 +0200 Subject: [PATCH] fix(core): fix i18n sites SSG memory leak - require.cache (#10599) --- .gitignore | 2 +- .../docusaurus/src/client/serverEntry.tsx | 2 +- .../docusaurus/src/commands/build/build.ts | 122 +++++++++++++++++ .../{build.ts => build/buildLocale.ts} | 124 ++---------------- packages/docusaurus/src/commands/deploy.ts | 2 +- packages/docusaurus/src/commands/serve.ts | 2 +- packages/docusaurus/src/common.d.ts | 10 +- packages/docusaurus/src/index.ts | 2 +- packages/docusaurus/src/ssg/ssg.ts | 22 +++- packages/docusaurus/src/ssg/ssgNodeRequire.ts | 49 +++++++ 10 files changed, 215 insertions(+), 122 deletions(-) create mode 100644 packages/docusaurus/src/commands/build/build.ts rename packages/docusaurus/src/commands/{build.ts => build/buildLocale.ts} (65%) create mode 100644 packages/docusaurus/src/ssg/ssgNodeRequire.ts diff --git a/.gitignore b/.gitignore index 520e3f50358d..51636c8c18d4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ package-lock.json .eslintcache yarn-error.log -build +website/build coverage .docusaurus .cache-loader diff --git a/packages/docusaurus/src/client/serverEntry.tsx b/packages/docusaurus/src/client/serverEntry.tsx index b06068c45816..569be29d94d9 100644 --- a/packages/docusaurus/src/client/serverEntry.tsx +++ b/packages/docusaurus/src/client/serverEntry.tsx @@ -18,7 +18,7 @@ import { } from './BrokenLinksContext'; import type {PageCollectedData, AppRenderer} from '../common'; -const render: AppRenderer = async ({pathname}) => { +const render: AppRenderer['render'] = async ({pathname}) => { await preload(pathname); const modules = new Set(); diff --git a/packages/docusaurus/src/commands/build/build.ts b/packages/docusaurus/src/commands/build/build.ts new file mode 100644 index 000000000000..d0334e34bf80 --- /dev/null +++ b/packages/docusaurus/src/commands/build/build.ts @@ -0,0 +1,122 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import fs from 'fs-extra'; +import logger, {PerfLogger} from '@docusaurus/logger'; +import {mapAsyncSequential} from '@docusaurus/utils'; +import {loadContext, type LoadContextParams} from '../../server/site'; +import {loadI18n} from '../../server/i18n'; +import {buildLocale, type BuildLocaleParams} from './buildLocale'; + +export type BuildCLIOptions = Pick< + LoadContextParams, + 'config' | 'locale' | 'outDir' +> & { + bundleAnalyzer?: boolean; + minify?: boolean; + dev?: boolean; +}; + +export async function build( + siteDirParam: string = '.', + cliOptions: Partial = {}, +): Promise { + process.env.BABEL_ENV = 'production'; + process.env.NODE_ENV = 'production'; + process.env.DOCUSAURUS_CURRENT_LOCALE = cliOptions.locale; + if (cliOptions.dev) { + logger.info`Building in dev mode`; + process.env.BABEL_ENV = 'development'; + process.env.NODE_ENV = 'development'; + } + + const siteDir = await fs.realpath(siteDirParam); + + ['SIGINT', 'SIGTERM'].forEach((sig) => { + process.on(sig, () => process.exit()); + }); + + const locales = await PerfLogger.async('Get locales to build', () => + getLocalesToBuild({siteDir, cliOptions}), + ); + + if (locales.length > 1) { + logger.info`Website will be built for all these locales: ${locales}`; + } + + await PerfLogger.async(`Build`, () => + mapAsyncSequential(locales, async (locale) => { + await tryToBuildLocale({siteDir, locale, cliOptions}); + }), + ); + + logger.info`Use code=${'npm run serve'} command to test your build locally.`; +} + +async function getLocalesToBuild({ + siteDir, + cliOptions, +}: { + siteDir: string; + cliOptions: BuildCLIOptions; +}): Promise<[string, ...string[]]> { + if (cliOptions.locale) { + return [cliOptions.locale]; + } + + const context = await loadContext({ + siteDir, + outDir: cliOptions.outDir, + config: cliOptions.config, + locale: cliOptions.locale, + localizePath: cliOptions.locale ? false : undefined, + }); + const i18n = await loadI18n(context.siteConfig, { + locale: cliOptions.locale, + }); + if (i18n.locales.length > 1) { + logger.info`Website will be built for all these locales: ${i18n.locales}`; + } + + // We need the default locale to always be the 1st in the list. If we build it + // last, it would "erase" the localized sites built in sub-folders + return [ + i18n.defaultLocale, + ...i18n.locales.filter((locale) => locale !== i18n.defaultLocale), + ]; +} + +async function tryToBuildLocale(params: BuildLocaleParams) { + try { + await PerfLogger.async(`${logger.name(params.locale)}`, async () => { + // Note: I tried to run buildLocale in worker_threads (still sequentially) + // It didn't work and I got SIGSEGV / SIGBUS errors + // See https://x.com/sebastienlorber/status/1848413716372480338 + await runBuildLocaleTask(params); + }); + } catch (err) { + throw new Error( + logger.interpolate`Unable to build website for locale name=${params.locale}.`, + { + cause: err, + }, + ); + } +} + +async function runBuildLocaleTask(params: BuildLocaleParams) { + // Note: I tried to run buildLocale task in worker_threads (sequentially) + // It didn't work and I got SIGSEGV / SIGBUS errors + // Goal was to isolate memory of each localized site build + // See also https://x.com/sebastienlorber/status/1848413716372480338 + // + // Running in child_process worked but is more complex and requires + // specifying the memory of the child process + weird logging issues to fix + // + // Note in the future we could try to enable concurrent localized site builds + await buildLocale(params); +} diff --git a/packages/docusaurus/src/commands/build.ts b/packages/docusaurus/src/commands/build/buildLocale.ts similarity index 65% rename from packages/docusaurus/src/commands/build.ts rename to packages/docusaurus/src/commands/build/buildLocale.ts index 7dbcd0ca83bf..1d8c30f219e9 100644 --- a/packages/docusaurus/src/commands/build.ts +++ b/packages/docusaurus/src/commands/build/buildLocale.ts @@ -10,130 +10,34 @@ import path from 'path'; import _ from 'lodash'; import {compile} from '@docusaurus/bundler'; import logger, {PerfLogger} from '@docusaurus/logger'; -import {mapAsyncSequential} from '@docusaurus/utils'; -import {loadSite, loadContext, type LoadContextParams} from '../server/site'; -import {handleBrokenLinks} from '../server/brokenLinks'; -import {createBuildClientConfig} from '../webpack/client'; -import createServerConfig from '../webpack/server'; +import {loadSite} from '../../server/site'; +import {handleBrokenLinks} from '../../server/brokenLinks'; +import {createBuildClientConfig} from '../../webpack/client'; +import createServerConfig from '../../webpack/server'; import { createConfigureWebpackUtils, executePluginsConfigureWebpack, -} from '../webpack/configure'; -import {loadI18n} from '../server/i18n'; -import {executeSSG} from '../ssg/ssgExecutor'; +} from '../../webpack/configure'; +import {executeSSG} from '../../ssg/ssgExecutor'; import type { ConfigureWebpackUtils, LoadedPlugin, Props, } from '@docusaurus/types'; -import type {SiteCollectedData} from '../common'; +import type {SiteCollectedData} from '../../common'; +import {BuildCLIOptions} from './build'; -export type BuildCLIOptions = Pick< - LoadContextParams, - 'config' | 'locale' | 'outDir' -> & { - bundleAnalyzer?: boolean; - minify?: boolean; - dev?: boolean; -}; - -export async function build( - siteDirParam: string = '.', - cliOptions: Partial = {}, -): Promise { - process.env.BABEL_ENV = 'production'; - process.env.NODE_ENV = 'production'; - process.env.DOCUSAURUS_CURRENT_LOCALE = cliOptions.locale; - if (cliOptions.dev) { - logger.info`Building in dev mode`; - process.env.BABEL_ENV = 'development'; - process.env.NODE_ENV = 'development'; - } - - const siteDir = await fs.realpath(siteDirParam); - - ['SIGINT', 'SIGTERM'].forEach((sig) => { - process.on(sig, () => process.exit()); - }); - - async function tryToBuildLocale({locale}: {locale: string}) { - try { - await PerfLogger.async(`${logger.name(locale)}`, () => - buildLocale({ - siteDir, - locale, - cliOptions, - }), - ); - } catch (err) { - throw new Error( - logger.interpolate`Unable to build website for locale name=${locale}.`, - { - cause: err, - }, - ); - } - } - - const locales = await PerfLogger.async('Get locales to build', () => - getLocalesToBuild({siteDir, cliOptions}), - ); - - if (locales.length > 1) { - logger.info`Website will be built for all these locales: ${locales}`; - } - - await PerfLogger.async(`Build`, () => - mapAsyncSequential(locales, async (locale) => { - await tryToBuildLocale({locale}); - }), - ); - - logger.info`Use code=${'npm run serve'} command to test your build locally.`; -} - -async function getLocalesToBuild({ - siteDir, - cliOptions, -}: { +export type BuildLocaleParams = { siteDir: string; - cliOptions: BuildCLIOptions; -}): Promise<[string, ...string[]]> { - if (cliOptions.locale) { - return [cliOptions.locale]; - } - - const context = await loadContext({ - siteDir, - outDir: cliOptions.outDir, - config: cliOptions.config, - locale: cliOptions.locale, - localizePath: cliOptions.locale ? false : undefined, - }); - const i18n = await loadI18n(context.siteConfig, { - locale: cliOptions.locale, - }); - if (i18n.locales.length > 1) { - logger.info`Website will be built for all these locales: ${i18n.locales}`; - } - - // We need the default locale to always be the 1st in the list. If we build it - // last, it would "erase" the localized sites built in sub-folders - return [ - i18n.defaultLocale, - ...i18n.locales.filter((locale) => locale !== i18n.defaultLocale), - ]; -} + locale: string; + cliOptions: Partial; +}; -async function buildLocale({ +export async function buildLocale({ siteDir, locale, cliOptions, -}: { - siteDir: string; - locale: string; - cliOptions: Partial; -}): Promise { +}: BuildLocaleParams): Promise { // Temporary workaround to unlock the ability to translate the site config // We'll remove it if a better official API can be designed // See https://github.com/facebook/docusaurus/issues/4542 diff --git a/packages/docusaurus/src/commands/deploy.ts b/packages/docusaurus/src/commands/deploy.ts index f2b1471642ab..6db5bae9978c 100644 --- a/packages/docusaurus/src/commands/deploy.ts +++ b/packages/docusaurus/src/commands/deploy.ts @@ -12,7 +12,7 @@ import logger from '@docusaurus/logger'; import shell from 'shelljs'; import {hasSSHProtocol, buildSshUrl, buildHttpsUrl} from '@docusaurus/utils'; import {loadContext, type LoadContextParams} from '../server/site'; -import {build} from './build'; +import {build} from './build/build'; export type DeployCLIOptions = Pick< LoadContextParams, diff --git a/packages/docusaurus/src/commands/serve.ts b/packages/docusaurus/src/commands/serve.ts index 002b04564ec6..85b02dd2af0f 100644 --- a/packages/docusaurus/src/commands/serve.ts +++ b/packages/docusaurus/src/commands/serve.ts @@ -14,7 +14,7 @@ import serveHandler from 'serve-handler'; import openBrowser from 'react-dev-utils/openBrowser'; import {applyTrailingSlash} from '@docusaurus/utils-common'; import {loadSiteConfig} from '../server/config'; -import {build} from './build'; +import {build} from './build/build'; import {getHostPort, type HostPortOptions} from '../server/getHostPort'; import type {LoadContextParams} from '../server/site'; diff --git a/packages/docusaurus/src/common.d.ts b/packages/docusaurus/src/common.d.ts index c8271207da27..b6db9b2e55ae 100644 --- a/packages/docusaurus/src/common.d.ts +++ b/packages/docusaurus/src/common.d.ts @@ -15,9 +15,13 @@ export type AppRenderResult = { collectedData: PageCollectedData; }; -export type AppRenderer = (params: { - pathname: string; -}) => Promise; +export type AppRenderer = { + render: (params: {pathname: string}) => Promise; + + // It's important to shut down the app renderer + // Otherwise Node.js require cache leaks memory + shutdown: () => Promise; +}; export type PageCollectedData = { // TODO Docusaurus v4 refactor: helmet state is non-serializable diff --git a/packages/docusaurus/src/index.ts b/packages/docusaurus/src/index.ts index 82ba70af2e93..0009867c0d97 100644 --- a/packages/docusaurus/src/index.ts +++ b/packages/docusaurus/src/index.ts @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -export {build} from './commands/build'; +export {build} from './commands/build/build'; export {clear} from './commands/clear'; export {deploy} from './commands/deploy'; export {externalCommand} from './commands/external'; diff --git a/packages/docusaurus/src/ssg/ssg.ts b/packages/docusaurus/src/ssg/ssg.ts index d7de5b4f181e..baaaeaf6442b 100644 --- a/packages/docusaurus/src/ssg/ssg.ts +++ b/packages/docusaurus/src/ssg/ssg.ts @@ -6,9 +6,10 @@ */ import fs from 'fs-extra'; -import {createRequire} from 'module'; import path from 'path'; import _ from 'lodash'; +// TODO eval is archived / unmaintained: https://github.com/pierrec/node-eval +// We should internalize/modernize it import evaluate from 'eval'; import pMap from 'p-map'; import logger, {PerfLogger} from '@docusaurus/logger'; @@ -19,6 +20,7 @@ import { type SSGTemplateCompiled, } from './ssgTemplate'; import {SSGConcurrency, writeStaticFile} from './ssgUtils'; +import {createSSGRequire} from './ssgNodeRequire'; import type {SSGParams} from './ssgParams'; import type {AppRenderer, AppRenderResult, SiteCollectedData} from '../common'; import type {HtmlMinifier} from '@docusaurus/bundler'; @@ -58,6 +60,8 @@ export async function loadAppRenderer({ const filename = path.basename(serverBundlePath); + const ssgRequire = createSSGRequire(serverBundlePath); + const globals = { // When using "new URL('file.js', import.meta.url)", Webpack will emit // __filename, and this plugin will throw. not sure the __filename value @@ -67,7 +71,7 @@ export async function loadAppRenderer({ // This uses module.createRequire() instead of very old "require-like" lib // See also: https://github.com/pierrec/node-eval/issues/33 - require: createRequire(serverBundlePath), + require: ssgRequire.require, }; const serverEntry = await PerfLogger.async( @@ -86,7 +90,15 @@ export async function loadAppRenderer({ `Server bundle export from "${filename}" must be a function that renders the Docusaurus React app.`, ); } - return serverEntry.default; + + async function shutdown() { + ssgRequire.cleanup(); + } + + return { + render: serverEntry.default, + shutdown, + }; } export function printSSGWarnings( @@ -191,6 +203,8 @@ export async function generateStaticFiles({ {concurrency: SSGConcurrency}, ); + await renderer.shutdown(); + printSSGWarnings(results); const [allSSGErrors, allSSGSuccesses] = _.partition( @@ -235,7 +249,7 @@ async function generateStaticFile({ }): Promise { try { // This only renders the app HTML - const result = await renderer({ + const result = await renderer.render({ pathname, }); // This renders the full page HTML, including head tags... diff --git a/packages/docusaurus/src/ssg/ssgNodeRequire.ts b/packages/docusaurus/src/ssg/ssgNodeRequire.ts new file mode 100644 index 000000000000..682473936c89 --- /dev/null +++ b/packages/docusaurus/src/ssg/ssgNodeRequire.ts @@ -0,0 +1,49 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import {createRequire} from 'module'; + +export type SSGNodeRequire = { + require: NodeJS.Require; + cleanup: () => void; +}; + +// The eval/vm.Script used for running the server bundle need a require() impl +// This impl has to be relative to the server bundler path +// This enables the server bundle to resolve relative paths such as: +// - require('./assets/js/some-chunk.123456.js') +// +// Unfortunately, Node.js vm.Script doesn't isolate memory / require.cache +// This means that if we build multiple Docusaurus localized sites in a row +// The Node.js require cache will keep growing and retain in memory the JS +// assets of the former SSG builds +// We have to clean up the node require cache manually to avoid leaking memory! +// See also https://x.com/sebastienlorber/status/1848399310116831702 +export function createSSGRequire(serverBundlePath: string): SSGNodeRequire { + const realRequire = createRequire(serverBundlePath); + + const allRequiredIds: string[] = []; + + const ssgRequireFunction: NodeJS.Require = (id) => { + const module = realRequire(id); + allRequiredIds.push(id); + return module; + }; + + const cleanup = () => { + allRequiredIds.forEach((id) => { + delete realRequire.cache[realRequire.resolve(id)]; + }); + }; + + ssgRequireFunction.resolve = realRequire.resolve; + ssgRequireFunction.cache = realRequire.cache; + ssgRequireFunction.extensions = realRequire.extensions; + ssgRequireFunction.main = realRequire.main; + + return {require: ssgRequireFunction, cleanup}; +}