From aebf66ce39dad19808a598a9269a547cf9e36aec Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:39:44 -0800 Subject: [PATCH 01/14] add `parseSpecifier` helper --- src/node-file-trace.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index df6dbb24..16f1b45b 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -12,6 +12,30 @@ const fsReadFile = fs.promises.readFile; const fsReadlink = fs.promises.readlink; const fsStat = fs.promises.stat; +type ParseSpecifierResult = { + path: string; + queryString: string | null +} + +// Splits an ESM specifier into path and querystring (including the leading `?`). (If the specifier is CJS, +// it is passed through untouched.) +export function parseSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult { + let path = specifier; + let queryString = null; + + if (!cjsResolve) { + // Regex which splits a specifier into path and querystring, inspired by that in `enhanced-resolve` + // https://github.com/webpack/enhanced-resolve/blob/157ed9bcc381857d979e56d2f20a5a17c6362bff/lib/util/identifier.js#L8 + const match = /^(#?(?:\0.|[^?#\0])*)(\?(?:\0.|[^\0])*)?$/.exec(specifier); + if (match) { + path = match[1] + queryString = match[2] + } + } + + return {path, queryString}; +} + function inPath (path: string, parent: string) { const pathWithSep = join(parent, sep); return path.startsWith(pathWithSep) && path !== pathWithSep; From fb936206d4e0ff4786ff0cdcc0f966518bfc2de2 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:43:03 -0800 Subject: [PATCH 02/14] strip querystring from ESM imports in `resolveDependency` --- src/resolve-dependency.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/resolve-dependency.ts b/src/resolve-dependency.ts index b50684f4..88ca47b3 100644 --- a/src/resolve-dependency.ts +++ b/src/resolve-dependency.ts @@ -1,11 +1,16 @@ import { isAbsolute, resolve, sep } from 'path'; -import { Job } from './node-file-trace'; +import { Job, parseSpecifier } from './node-file-trace'; // node resolver // custom implementation to emit only needed package.json files for resolver // (package.json files are emitted as they are hit) export default async function resolveDependency (specifier: string, parent: string, job: Job, cjsResolve = true): Promise { let resolved: string | string[]; + + // ESM imports are allowed to have querystrings, but the native Node behavior is to ignore them when doing + // file resolution, so emulate that here by stripping any querystring off before continuing + specifier = parseSpecifier(specifier, cjsResolve).path + if (isAbsolute(specifier) || specifier === '.' || specifier === '..' || specifier.startsWith('./') || specifier.startsWith('../')) { const trailingSlash = specifier.endsWith('/'); resolved = await resolvePath(resolve(parent, '..', specifier) + (trailingSlash ? '/' : ''), parent, job); From e0bfe0d83e66105f3bd7fc917352ecfddd6cce23 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:52:50 -0800 Subject: [PATCH 03/14] pass `cjsResolve` to `emitDependency` implementation method --- src/node-file-trace.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 16f1b45b..6d53ac75 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -268,12 +268,12 @@ export class Job { for (const item of resolved) { // ignore builtins if (item.startsWith('node:')) return; - await this.emitDependency(item, path); + await this.analyzeAndEmitDependency(item, path, cjsResolve); } } else { // ignore builtins if (resolved.startsWith('node:')) return; - await this.emitDependency(resolved, path); + await this.analyzeAndEmitDependency(resolved, path, cjsResolve); } } @@ -367,6 +367,10 @@ export class Job { } async emitDependency (path: string, parent?: string) { + return this.analyzeAndEmitDependency(path, parent) + } + + private async analyzeAndEmitDependency(path: string, parent?: string, cjsResolve?: boolean) { if (this.processed.has(path)) { if (parent) { await this.emitFile(path, 'dependency', parent) @@ -417,7 +421,7 @@ export class Job { const ext = extname(asset); if (ext === '.js' || ext === '.mjs' || ext === '.node' || ext === '' || this.ts && (ext === '.ts' || ext === '.tsx') && asset.startsWith(this.base) && asset.slice(this.base.length).indexOf(sep + 'node_modules' + sep) === -1) - await this.emitDependency(asset, path); + await this.analyzeAndEmitDependency(asset, path, !isESM); else await this.emitFile(asset, 'asset', path); }), From 77c83bd53b0de21bda63415567ab932bccc6a8f4 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:56:12 -0800 Subject: [PATCH 04/14] strip querystring from ESM imports in `analyzeAndEmitDependency` --- src/node-file-trace.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 6d53ac75..fb2e80ee 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -370,7 +370,11 @@ export class Job { return this.analyzeAndEmitDependency(path, parent) } - private async analyzeAndEmitDependency(path: string, parent?: string, cjsResolve?: boolean) { + private async analyzeAndEmitDependency(rawPath: string, parent?: string, cjsResolve?: boolean) { + + // Strip the querystring, if any. (Only affects ESM dependencies.) + const { path } = parseSpecifier(rawPath, cjsResolve) + if (this.processed.has(path)) { if (parent) { await this.emitFile(path, 'dependency', parent) From a8c294d2f9d467d4ccded6d7114244a5d1e6ce29 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:57:27 -0800 Subject: [PATCH 05/14] cache seen status and `analyze` results under full path --- src/node-file-trace.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index fb2e80ee..6f8cccd7 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -375,13 +375,15 @@ export class Job { // Strip the querystring, if any. (Only affects ESM dependencies.) const { path } = parseSpecifier(rawPath, cjsResolve) - if (this.processed.has(path)) { + // Since different querystrings may lead to different results, include the full path + // when noting whether or not we've already seen this path + if (this.processed.has(rawPath)) { if (parent) { await this.emitFile(path, 'dependency', parent) } return }; - this.processed.add(path); + this.processed.add(rawPath); const emitted = await this.emitFile(path, 'dependency', parent); if (!emitted) return; @@ -400,7 +402,8 @@ export class Job { let analyzeResult: AnalyzeResult; - const cachedAnalysis = this.analysisCache.get(path); + // Since different querystrings may lead to analyses, include the full path when caching + const cachedAnalysis = this.analysisCache.get(rawPath); if (cachedAnalysis) { analyzeResult = cachedAnalysis; } @@ -411,7 +414,7 @@ export class Job { // directly as this will not be included in the cachedAnalysis and won't // be emit for successive runs that leverage the cache analyzeResult = await analyze(path, source.toString(), this); - this.analysisCache.set(path, analyzeResult); + this.analysisCache.set(rawPath, analyzeResult); } const { deps, imports, assets, isESM } = analyzeResult; From 142658ce8cce1afdb0198d9147de34c5fbb26bc5 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 08:58:57 -0800 Subject: [PATCH 06/14] try both `rawPath` and `path` when calling `readFile` --- src/node-file-trace.ts | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 6f8cccd7..7d712575 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -408,8 +408,23 @@ export class Job { analyzeResult = cachedAnalysis; } else { - const source = await this.readFile(path); - if (source === null) throw new Error('File ' + path + ' does not exist.'); + // By default, `this.readFile` is the regular `fs.readFile`, but a different implementation + // can be specified via a user option in the main `nodeFileTrace` entrypoint. Depending on + // that implementation, having a querystring on the end of the path may either a) be necessary + // in order to specify the right module from which to read, or b) lead to an `ENOENT: no such + // file or directory` error because it thinks the querystring is part of the filename. We + // therefore try it with the querystring first, but have the non-querystringed version as a + // fallback. (If there's no `?` in the given path, `rawPath` will equal `path`, so order is a + // moot point.) + const source = await this.readFile(rawPath) || await this.readFile(path) ; + + if (source === null) { + const errorMessage = path === rawPath + ? 'File ' + path + ' does not exist.' + : 'Neither ' + path + ' nor ' + rawPath + ' exists.' + throw new Error(errorMessage); + } + // analyze should not have any side-effects e.g. calling `job.emitFile` // directly as this will not be included in the cachedAnalysis and won't // be emit for successive runs that leverage the cache From 8c3ec63d4acb84ec3c382bdd53db89209c2d3872 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 09:05:07 -0800 Subject: [PATCH 07/14] strip and store querystring in `maybeEmitDep` --- src/node-file-trace.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 7d712575..fb593b85 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -239,6 +239,8 @@ export class Job { } private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => { + // Only affects ESM dependencies + const { path: strippedDep, queryString = '' } = parseSpecifier(dep, cjsResolve) let resolved: string | string[] = ''; let error: Error | undefined; try { @@ -447,8 +449,8 @@ export class Job { else await this.emitFile(asset, 'asset', path); }), - ...[...deps].map(async dep => this.maybeEmitDep(dep, path, !isESM)), - ...[...imports].map(async dep => this.maybeEmitDep(dep, path, false)), + ...[...deps].map(async dep => this.maybeEmitDep(dep, rawPath, !isESM)), + ...[...imports].map(async dep => this.maybeEmitDep(dep, rawPath, false)), ]); } } \ No newline at end of file From 54431dbc83601b666a17659ffc73bd0bf9697ec8 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 09:06:13 -0800 Subject: [PATCH 08/14] use non-querystringed path when changing file extension in `maybeEmitDep` --- src/node-file-trace.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index fb593b85..378c9d69 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -248,12 +248,12 @@ export class Job { } catch (e1: any) { error = e1; try { - if (this.ts && dep.endsWith('.js') && e1 instanceof NotFoundError) { + if (this.ts && strippedDep.endsWith('.js') && e1 instanceof NotFoundError) { // TS with ESM relative import paths need full extensions // (we have to write import "./foo.js" instead of import "./foo") // See https://www.typescriptlang.org/docs/handbook/esm-node.html - const depTS = dep.slice(0, -3) + '.ts'; - resolved = await this.resolve(depTS, path, this, cjsResolve); + const strippedDepTS = strippedDep.slice(0, -3) + '.ts'; + resolved = await this.resolve(strippedDepTS + queryString, path, this, cjsResolve); error = undefined; } } catch (e2: any) { From cc660e2ebe4349d1c00320df15d177885798914f Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 09:09:03 -0800 Subject: [PATCH 09/14] force `resolved` to be an array in `maybeEmitDep` --- src/node-file-trace.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 378c9d69..0869485f 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -266,16 +266,12 @@ export class Job { return; } - if (Array.isArray(resolved)) { - for (const item of resolved) { - // ignore builtins - if (item.startsWith('node:')) return; - await this.analyzeAndEmitDependency(item, path, cjsResolve); - } - } else { + // For simplicity, force `resolved` to be an array + resolved = Array.isArray(resolved) ? resolved : [resolved]; + for (const item of resolved) { // ignore builtins - if (resolved.startsWith('node:')) return; - await this.analyzeAndEmitDependency(resolved, path, cjsResolve); + if (item.startsWith('node:')) return; + await this.analyzeAndEmitDependency(item, path, cjsResolve); } } From bd5feeee1f62b9d6f1cd605c633ff888a048929b Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Nov 2022 09:21:34 -0800 Subject: [PATCH 10/14] restore querystring when making recursive `analyzeAndEmitDependency` call in `maybeEmitDep` --- src/node-file-trace.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 0869485f..41e0e31c 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -268,9 +268,16 @@ export class Job { // For simplicity, force `resolved` to be an array resolved = Array.isArray(resolved) ? resolved : [resolved]; - for (const item of resolved) { - // ignore builtins + for (let item of resolved) { + // ignore builtins for the purposes of both tracing and querystring handling (neither Node + // nor Webpack can handle querystrings on `node:xxx` imports). if (item.startsWith('node:')) return; + + // If querystring was stripped during resolution, restore it + if (queryString && !item.endsWith(queryString)) { + item += queryString; + } + await this.analyzeAndEmitDependency(item, path, cjsResolve); } } From a7c482290db444d67950ac4f14184027d684ed6a Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Wed, 16 Nov 2022 06:56:55 -0800 Subject: [PATCH 11/14] add tests --- .gitignore | 1 + test/unit.test.js | 55 +++++++++++++++---- test/unit/cjs-querystring/input.js | 7 +++ .../noPunctuation/whowhatidk.js | 12 ++++ test/unit/cjs-querystring/output.js | 5 ++ .../animalFacts/aardvark.mjs | 4 ++ .../esm-querystring-mjs/animalFacts/bear.mjs | 4 ++ .../animalFacts/cheetah.mjs | 1 + test/unit/esm-querystring-mjs/input.js | 6 ++ test/unit/esm-querystring-mjs/output.js | 7 +++ test/unit/esm-querystring-mjs/package.json | 4 ++ .../esm-querystring/animalFacts/aardvark.js | 4 ++ test/unit/esm-querystring/animalFacts/bear.js | 4 ++ .../esm-querystring/animalFacts/cheetah.js | 1 + test/unit/esm-querystring/input.js | 5 ++ test/unit/esm-querystring/output.js | 7 +++ test/unit/esm-querystring/package.json | 4 ++ test/unit/querystring-self-import/base.js | 5 ++ test/unit/querystring-self-import/dep.js | 1 + test/unit/querystring-self-import/input.js | 10 ++++ test/unit/querystring-self-import/output.js | 6 ++ 21 files changed, 143 insertions(+), 10 deletions(-) create mode 100644 test/unit/cjs-querystring/input.js create mode 100644 test/unit/cjs-querystring/noPunctuation/whowhatidk.js create mode 100644 test/unit/cjs-querystring/output.js create mode 100644 test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs create mode 100644 test/unit/esm-querystring-mjs/animalFacts/bear.mjs create mode 100644 test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs create mode 100644 test/unit/esm-querystring-mjs/input.js create mode 100644 test/unit/esm-querystring-mjs/output.js create mode 100644 test/unit/esm-querystring-mjs/package.json create mode 100644 test/unit/esm-querystring/animalFacts/aardvark.js create mode 100644 test/unit/esm-querystring/animalFacts/bear.js create mode 100644 test/unit/esm-querystring/animalFacts/cheetah.js create mode 100644 test/unit/esm-querystring/input.js create mode 100644 test/unit/esm-querystring/output.js create mode 100644 test/unit/esm-querystring/package.json create mode 100644 test/unit/querystring-self-import/base.js create mode 100644 test/unit/querystring-self-import/dep.js create mode 100644 test/unit/querystring-self-import/input.js create mode 100644 test/unit/querystring-self-import/output.js diff --git a/.gitignore b/.gitignore index ea00b8f0..657bcd13 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ out coverage test/**/dist test/**/actual.js +test/unit/cjs-querystring/who?what?idk!.js diff --git a/test/unit.test.js b/test/unit.test.js index ce269b38..e774439c 100644 --- a/test/unit.test.js +++ b/test/unit.test.js @@ -1,23 +1,39 @@ const fs = require('fs'); -const { join, relative } = require('path'); +const { join, relative, sep } = require('path'); const { nodeFileTrace } = require('../out/node-file-trace'); global._unit = true; -const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink']; +const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink', 'cjs-querystring']; const unitTestDirs = fs.readdirSync(join(__dirname, 'unit')); const unitTests = [ - ...unitTestDirs.map(testName => ({testName, isRoot: false})), ...unitTestDirs.map(testName => ({testName, isRoot: true})), + ...unitTestDirs.map(testName => ({testName, isRoot: false})), ]; for (const { testName, isRoot } of unitTests) { const testSuffix = `${testName} from ${isRoot ? 'root' : 'cwd'}`; - if (process.platform === 'win32' && (isRoot || skipOnWindows.includes(testName))) { - console.log(`Skipping unit test on Windows: ${testSuffix}`); - continue; - }; const unitPath = join(__dirname, 'unit', testName); + + if (process.platform === 'win32') { + if (isRoot || skipOnWindows.includes(testName)) { + console.log(`Skipping unit test on Windows: ${testSuffix}`); + continue; + } + } else { + if (testName === 'cjs-querystring') { + // Create (a git-ignored copy of) the file we need, since committing it + // breaks CI on Windows. See https://github.com/vercel/nft/pull/322. + const currentFilepath = join(unitPath, 'noPunctuation', 'whowhatidk.js'); + const newFilepath = currentFilepath.replace( + 'noPunctuation' + sep + 'whowhatidk.js', + 'who?what?idk!.js' + ); + if (!fs.existsSync(newFilepath)) { + fs.copyFileSync(currentFilepath, newFilepath); + } + } + } it(`should correctly trace ${testSuffix}`, async () => { @@ -41,6 +57,25 @@ for (const { testName, isRoot } of unitTests) { return null } } + + // mock an in-memory module store (such as webpack's) where the same filename with + // two different querystrings can correspond to two different modules, one importing + // the other + if (testName === 'querystring-self-import') { + if (id.endsWith('input.js') || id.endsWith('base.js') || id.endsWith('dep.js')) { + return fs.readFileSync(id).toString() + } + + if (id.endsWith('base.js?__withQuery')) { + return ` + import * as origBase from './base'; + export const dogs = origBase.dogs.concat('Cory', 'Bodhi'); + export const cats = origBase.cats.concat('Teaberry', 'Sassafras', 'Persephone'); + export const rats = origBase.rats; + `; + } + } + return this.constructor.prototype.readFile.apply(this, arguments); }); @@ -67,8 +102,8 @@ for (const { testName, isRoot } of unitTests) { if (testName === 'multi-input') { inputFileNames.push('input-2.js', 'input-3.js', 'input-4.js'); } - - const { fileList, reasons } = await nodeFileTrace( + + const { fileList, reasons, warnings } = await nodeFileTrace( inputFileNames.map(file => join(unitPath, file)), { base: isRoot ? '/' : `${__dirname}/../`, @@ -193,7 +228,7 @@ for (const { testName, isRoot } of unitTests) { expect(sortedFileList).toEqual(expected); } catch (e) { - console.warn(reasons); + console.warn({reasons, warnings}); fs.writeFileSync(join(unitPath, 'actual.js'), JSON.stringify(sortedFileList, null, 2)); throw e; } diff --git a/test/unit/cjs-querystring/input.js b/test/unit/cjs-querystring/input.js new file mode 100644 index 00000000..5a1baf7e --- /dev/null +++ b/test/unit/cjs-querystring/input.js @@ -0,0 +1,7 @@ +// Test that CJS files treat question marks in filenames as any other character, +// matching Node behavior + +// https://www.youtube.com/watch?v=2ve20PVNZ18 + +const baseball = require('./who?what?idk!'); +console.log(baseball.players); diff --git a/test/unit/cjs-querystring/noPunctuation/whowhatidk.js b/test/unit/cjs-querystring/noPunctuation/whowhatidk.js new file mode 100644 index 00000000..4c4c4b72 --- /dev/null +++ b/test/unit/cjs-querystring/noPunctuation/whowhatidk.js @@ -0,0 +1,12 @@ +module.exports = { + players: { + first: 'Who', + second: 'What', + third: "I Don't Know", + left: 'Why', + center: 'Because', + pitcher: 'Tomorrow', + catcher: 'Today', + shortstop: "I Don't Give a Damn!", + }, +}; diff --git a/test/unit/cjs-querystring/output.js b/test/unit/cjs-querystring/output.js new file mode 100644 index 00000000..296a204b --- /dev/null +++ b/test/unit/cjs-querystring/output.js @@ -0,0 +1,5 @@ +[ + "package.json", + "test/unit/cjs-querystring/input.js", + "test/unit/cjs-querystring/who?what?idk!.js" +] diff --git a/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs b/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs new file mode 100644 index 00000000..a2efff8a --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs @@ -0,0 +1,4 @@ +import { numSpecies } from "./bear.mjs?beaver?bison"; +console.log(`There are ${numSpecies} species of bears.`); + +export const food = "termites"; diff --git a/test/unit/esm-querystring-mjs/animalFacts/bear.mjs b/test/unit/esm-querystring-mjs/animalFacts/bear.mjs new file mode 100644 index 00000000..a8ee2dd0 --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/bear.mjs @@ -0,0 +1,4 @@ +import * as cheetah from "./cheetah.mjs?cow=chipmunk"; +console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`); + +export const numSpecies = 8; diff --git a/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs b/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs new file mode 100644 index 00000000..836b4ddf --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs @@ -0,0 +1 @@ +export const topSpeed = 65; diff --git a/test/unit/esm-querystring-mjs/input.js b/test/unit/esm-querystring-mjs/input.js new file mode 100644 index 00000000..a07ace77 --- /dev/null +++ b/test/unit/esm-querystring-mjs/input.js @@ -0,0 +1,6 @@ +// Test that querystrings of various forms get stripped from esm imports when those +// imports contain the `.mjs` file extension + +import * as aardvark from "./animalFacts/aardvark.mjs?anteater"; + +console.log(`Aardvarks eat ${aardvark.food}.`); diff --git a/test/unit/esm-querystring-mjs/output.js b/test/unit/esm-querystring-mjs/output.js new file mode 100644 index 00000000..95137ed7 --- /dev/null +++ b/test/unit/esm-querystring-mjs/output.js @@ -0,0 +1,7 @@ +[ + "test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs", + "test/unit/esm-querystring-mjs/animalFacts/bear.mjs", + "test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs", + "test/unit/esm-querystring-mjs/input.js", + "test/unit/esm-querystring-mjs/package.json" +] \ No newline at end of file diff --git a/test/unit/esm-querystring-mjs/package.json b/test/unit/esm-querystring-mjs/package.json new file mode 100644 index 00000000..e986b24b --- /dev/null +++ b/test/unit/esm-querystring-mjs/package.json @@ -0,0 +1,4 @@ +{ + "private": true, + "type": "module" +} diff --git a/test/unit/esm-querystring/animalFacts/aardvark.js b/test/unit/esm-querystring/animalFacts/aardvark.js new file mode 100644 index 00000000..4c497265 --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/aardvark.js @@ -0,0 +1,4 @@ +import { numSpecies } from './bear?beaver?bison'; +console.log(`There are ${numSpecies} species of bears.`); + +export const food = 'termites'; diff --git a/test/unit/esm-querystring/animalFacts/bear.js b/test/unit/esm-querystring/animalFacts/bear.js new file mode 100644 index 00000000..4578358b --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/bear.js @@ -0,0 +1,4 @@ +import * as cheetah from './cheetah?cow=chipmunk'; +console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`); + +export const numSpecies = 8; diff --git a/test/unit/esm-querystring/animalFacts/cheetah.js b/test/unit/esm-querystring/animalFacts/cheetah.js new file mode 100644 index 00000000..836b4ddf --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/cheetah.js @@ -0,0 +1 @@ +export const topSpeed = 65; diff --git a/test/unit/esm-querystring/input.js b/test/unit/esm-querystring/input.js new file mode 100644 index 00000000..b0d51697 --- /dev/null +++ b/test/unit/esm-querystring/input.js @@ -0,0 +1,5 @@ +// Test that querystrings of various forms get stripped from esm imports + +import * as aardvark from './animalFacts/aardvark?anteater'; + +console.log(`Aardvarks eat ${aardvark.food}.`); diff --git a/test/unit/esm-querystring/output.js b/test/unit/esm-querystring/output.js new file mode 100644 index 00000000..d03f7cb8 --- /dev/null +++ b/test/unit/esm-querystring/output.js @@ -0,0 +1,7 @@ +[ + "test/unit/esm-querystring/animalFacts/aardvark.js", + "test/unit/esm-querystring/animalFacts/bear.js", + "test/unit/esm-querystring/animalFacts/cheetah.js", + "test/unit/esm-querystring/input.js", + "test/unit/esm-querystring/package.json" +] \ No newline at end of file diff --git a/test/unit/esm-querystring/package.json b/test/unit/esm-querystring/package.json new file mode 100644 index 00000000..e986b24b --- /dev/null +++ b/test/unit/esm-querystring/package.json @@ -0,0 +1,4 @@ +{ + "private": true, + "type": "module" +} diff --git a/test/unit/querystring-self-import/base.js b/test/unit/querystring-self-import/base.js new file mode 100644 index 00000000..8f48a38a --- /dev/null +++ b/test/unit/querystring-self-import/base.js @@ -0,0 +1,5 @@ +import * as dep from './dep'; + +export const dogs = ['Charlie', 'Maisey']; +export const cats = ['Piper']; +export const rats = dep.rats; diff --git a/test/unit/querystring-self-import/dep.js b/test/unit/querystring-self-import/dep.js new file mode 100644 index 00000000..aaff193a --- /dev/null +++ b/test/unit/querystring-self-import/dep.js @@ -0,0 +1 @@ +export const rats = ['Debra']; diff --git a/test/unit/querystring-self-import/input.js b/test/unit/querystring-self-import/input.js new file mode 100644 index 00000000..25f53ce9 --- /dev/null +++ b/test/unit/querystring-self-import/input.js @@ -0,0 +1,10 @@ +// Test that if a file and the same file with a querystring correspond to different +// modules in memory, one can successfully import the other. The import chain +// goes `input` (this file) -> `base?__withQuery` -> `base` -> `dep`, which means +// that if `dep` shows up in `output`, we know that both `base?__withQuery` and +// `base` have been loaded successfully. + +import * as baseWithQuery from './base?__withQuery'; +console.log('Dogs:', baseWithQuery.dogs); +console.log('Cats:', baseWithQuery.cats); +console.log('Rats:', baseWithQuery.rats); diff --git a/test/unit/querystring-self-import/output.js b/test/unit/querystring-self-import/output.js new file mode 100644 index 00000000..d6dc6b2c --- /dev/null +++ b/test/unit/querystring-self-import/output.js @@ -0,0 +1,6 @@ +[ + "package.json", + "test/unit/querystring-self-import/base.js", + "test/unit/querystring-self-import/dep.js", + "test/unit/querystring-self-import/input.js" +] From b76cfeba8f28cc380e62cbac4e1647da5c0a49f7 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 21 Dec 2022 10:12:07 +0100 Subject: [PATCH 12/14] use url.parse instead of custom RegExp --- src/node-file-trace.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 41e0e31c..1ebf1b4e 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -1,11 +1,11 @@ import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, Stats, NodeFileTraceReasonType } from './types'; -import { basename, dirname, extname, relative, resolve, sep } from 'path'; +import { basename, dirname, extname, relative, resolve, sep, join } from 'path'; +import * as url from 'url'; import fs from 'graceful-fs'; import analyze, { AnalyzeResult } from './analyze'; import resolveDependency, { NotFoundError } from './resolve-dependency'; import { isMatch } from 'micromatch'; import { sharedLibEmit } from './utils/sharedlib-emit'; -import { join } from 'path'; import { Sema } from 'async-sema'; const fsReadFile = fs.promises.readFile; @@ -24,12 +24,13 @@ export function parseSpecifier(specifier: string, cjsResolve: boolean = true): P let queryString = null; if (!cjsResolve) { - // Regex which splits a specifier into path and querystring, inspired by that in `enhanced-resolve` - // https://github.com/webpack/enhanced-resolve/blob/157ed9bcc381857d979e56d2f20a5a17c6362bff/lib/util/identifier.js#L8 - const match = /^(#?(?:\0.|[^?#\0])*)(\?(?:\0.|[^\0])*)?$/.exec(specifier); - if (match) { - path = match[1] - queryString = match[2] + const specifierUrl = url.parse(specifier); + queryString = specifierUrl.search; + + if (specifierUrl.search) { + path = specifier.replace(specifierUrl.search, ''); + } else { + path = specifier; } } From 9eeed58bdb7b724444129d482bd39eddc8844467 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 3 Jan 2023 13:01:55 +0100 Subject: [PATCH 13/14] rename helper function and return values --- src/node-file-trace.ts | 16 ++++++++-------- src/resolve-dependency.ts | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 1ebf1b4e..4ded78e8 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -13,14 +13,14 @@ const fsReadlink = fs.promises.readlink; const fsStat = fs.promises.stat; type ParseSpecifierResult = { - path: string; + remainingSpecifier: string; queryString: string | null } // Splits an ESM specifier into path and querystring (including the leading `?`). (If the specifier is CJS, // it is passed through untouched.) -export function parseSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult { - let path = specifier; +export function splitQueryStringFromSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult { + let remainingSpecifier = specifier; let queryString = null; if (!cjsResolve) { @@ -28,13 +28,13 @@ export function parseSpecifier(specifier: string, cjsResolve: boolean = true): P queryString = specifierUrl.search; if (specifierUrl.search) { - path = specifier.replace(specifierUrl.search, ''); + remainingSpecifier = specifier.replace(specifierUrl.search, ''); } else { - path = specifier; + remainingSpecifier = specifier; } } - return {path, queryString}; + return {queryString, remainingSpecifier}; } function inPath (path: string, parent: string) { @@ -241,7 +241,7 @@ export class Job { private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => { // Only affects ESM dependencies - const { path: strippedDep, queryString = '' } = parseSpecifier(dep, cjsResolve) + const { remainingSpecifier: strippedDep, queryString = '' } = splitQueryStringFromSpecifier(dep, cjsResolve) let resolved: string | string[] = ''; let error: Error | undefined; try { @@ -379,7 +379,7 @@ export class Job { private async analyzeAndEmitDependency(rawPath: string, parent?: string, cjsResolve?: boolean) { // Strip the querystring, if any. (Only affects ESM dependencies.) - const { path } = parseSpecifier(rawPath, cjsResolve) + const { remainingSpecifier: path } = splitQueryStringFromSpecifier(rawPath, cjsResolve) // Since different querystrings may lead to different results, include the full path // when noting whether or not we've already seen this path diff --git a/src/resolve-dependency.ts b/src/resolve-dependency.ts index 88ca47b3..01fcff8e 100644 --- a/src/resolve-dependency.ts +++ b/src/resolve-dependency.ts @@ -1,5 +1,5 @@ import { isAbsolute, resolve, sep } from 'path'; -import { Job, parseSpecifier } from './node-file-trace'; +import { Job, splitQueryStringFromSpecifier } from './node-file-trace'; // node resolver // custom implementation to emit only needed package.json files for resolver @@ -9,7 +9,7 @@ export default async function resolveDependency (specifier: string, parent: stri // ESM imports are allowed to have querystrings, but the native Node behavior is to ignore them when doing // file resolution, so emulate that here by stripping any querystring off before continuing - specifier = parseSpecifier(specifier, cjsResolve).path + specifier = splitQueryStringFromSpecifier(specifier, cjsResolve).remainingSpecifier if (isAbsolute(specifier) || specifier === '.' || specifier === '..' || specifier.startsWith('./') || specifier.startsWith('../')) { const trailingSlash = specifier.endsWith('/'); From 44f3d3f4b92b05c5e37113e9bfed057e8f318b4e Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 3 Jan 2023 13:14:58 +0100 Subject: [PATCH 14/14] use `new URL` instead of `url.parse` --- src/node-file-trace.ts | 3 +-- tsconfig.json | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index 4ded78e8..6cbbba2c 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -1,6 +1,5 @@ import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, Stats, NodeFileTraceReasonType } from './types'; import { basename, dirname, extname, relative, resolve, sep, join } from 'path'; -import * as url from 'url'; import fs from 'graceful-fs'; import analyze, { AnalyzeResult } from './analyze'; import resolveDependency, { NotFoundError } from './resolve-dependency'; @@ -24,7 +23,7 @@ export function splitQueryStringFromSpecifier(specifier: string, cjsResolve: boo let queryString = null; if (!cjsResolve) { - const specifierUrl = url.parse(specifier); + const specifierUrl = new URL(specifier, "placeholder-protocol://"); queryString = specifierUrl.search; if (specifierUrl.search) { diff --git a/tsconfig.json b/tsconfig.json index 27218b7f..ab4973ca 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,7 +4,10 @@ "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "incremental": true, - "lib": ["esnext"], + "lib": [ + "esnext", + "DOM" // needed for "new URL()" + ], "module": "commonjs", "moduleResolution": "node", "noEmitOnError": true, @@ -21,4 +24,4 @@ }, "include": ["src/**/*"], "exclude": ["node_modules", "test/**/*"] -} \ No newline at end of file +}