diff --git a/lib/internal/fs/glob.js b/lib/internal/fs/glob.js new file mode 100644 index 00000000000000..323ef2a25d434c --- /dev/null +++ b/lib/internal/fs/glob.js @@ -0,0 +1,384 @@ +'use strict'; +const { lstatSync, readdirSync } = require('fs'); +const { join, resolve } = require('path'); + +const { + kEmptyObject, +} = require('internal/util'); +const { + validateFunction, + validateObject, +} = require('internal/validators'); + +const { + ArrayFrom, + ArrayPrototypeAt, + ArrayPrototypeMap, + ArrayPrototypeFlatMap, + ArrayPrototypePop, + ArrayPrototypePush, + ArrayPrototypeSome, + SafeMap, + SafeSet, + StringPrototypeEndsWith, +} = primordials; + +let minimatch; +function lazyMinimatch() { + minimatch ??= require('internal/deps/minimatch/index'); + return minimatch; +} + +const isWindows = process.platform === 'win32'; +const isOSX = process.platform === 'darwin'; + +class Cache { + #cache = new SafeMap(); + #statsCache = new SafeMap(); + #readdirCache = new SafeMap(); + + statSync(path) { + const cached = this.#statsCache.get(path); + if (cached) { + return cached; + } + let val; + try { + val = lstatSync(path); + } catch { + val = null; + } + this.#statsCache.set(path, val); + return val; + } + addToStatCache(path, val) { + this.#statsCache.set(path, val); + } + readdirSync(path) { + const cached = this.#readdirCache.get(path); + if (cached) { + return cached; + } + let val; + try { + val = readdirSync(path, { __proto__: null, withFileTypes: true }); + } catch { + val = []; + } + this.#readdirCache.set(path, val); + return val; + } + add(path, pattern) { + let cache = this.#cache.get(path); + if (!cache) { + cache = new SafeSet(); + this.#cache.set(path, cache); + } + const originalSize = cache.size; + pattern.indexes.forEach((index) => cache.add(pattern.cacheKey(index))); + return cache.size !== originalSize + pattern.indexes.size; + } + seen(path, pattern, index) { + return this.#cache.get(path)?.has(pattern.cacheKey(index)); + } +} + +class Pattern { + #pattern; + #globStrings; + indexes; + symlinks; + last; + + constructor(pattern, globStrings, indexes, symlinks) { + this.#pattern = pattern; + this.#globStrings = globStrings; + this.indexes = indexes; + this.symlinks = symlinks; + this.last = pattern.length - 1; + } + + isLast(isDirectory) { + return this.indexes.has(this.last) || + (this.at(-1) === '' && isDirectory && + this.indexes.has(this.last - 1) && this.at(-2) === lazyMinimatch().GLOBSTAR); + } + isFirst() { + return this.indexes.has(0); + } + get hasSeenSymlinks() { + return ArrayPrototypeSome(ArrayFrom(this.indexes), (i) => !this.symlinks.has(i)); + } + at(index) { + return ArrayPrototypeAt(this.#pattern, index); + } + child(indexes, symlinks = new SafeSet()) { + return new Pattern(this.#pattern, this.#globStrings, indexes, symlinks); + } + test(index, path) { + if (index > this.#pattern.length) { + return false; + } + const pattern = this.#pattern[index]; + if (pattern === lazyMinimatch().GLOBSTAR) { + return true; + } + if (typeof pattern === 'string') { + return pattern === path; + } + if (typeof pattern?.test === 'function') { + return pattern.test(path); + } + return false; + } + + cacheKey(index) { + let key = ''; + for (let i = index; i < this.#globStrings.length; i++) { + key += this.#globStrings[i]; + if (i !== this.#globStrings.length - 1) { + key += '/'; + } + } + return key; + } +} + +class Glob { + #root; + #exclude; + #cache = new Cache(); + #results = []; + #queue = []; + #subpatterns = new SafeMap(); + constructor(patterns, options = kEmptyObject) { + validateObject(options, 'options'); + const { exclude, cwd } = options; + if (exclude != null) { + validateFunction(exclude, 'options.exclude'); + } + this.#root = cwd ?? '.'; + this.#exclude = exclude; + this.matchers = ArrayPrototypeMap(patterns, (pattern) => new (lazyMinimatch().Minimatch)(pattern, { + __proto__: null, + nocase: isWindows || isOSX, + windowsPathsNoEscape: true, + nonegate: true, + nocomment: true, + optimizationLevel: 2, + platform: process.platform, + nocaseMagicOnly: true, + })); + } + + globSync() { + ArrayPrototypePush(this.#queue, { + __proto__: null, + path: '.', + patterns: ArrayPrototypeFlatMap(this.matchers, (matcher) => ArrayPrototypeMap(matcher.set, + (pattern, i) => new Pattern( + pattern, + matcher.globParts[i], + new SafeSet([0]), + new SafeSet(), + ))), + }); + + while (this.#queue.length > 0) { + const item = ArrayPrototypePop(this.#queue); + for (let i = 0; i < item.patterns.length; i++) { + this.#addSubpatterns(item.path, item.patterns[i]); + } + this.#subpatterns + .forEach((patterns, path) => ArrayPrototypePush(this.#queue, { __proto__: null, path, patterns })); + this.#subpatterns.clear(); + } + return this.#results; + } + #addSubpattern(path, pattern) { + if (!this.#subpatterns.has(path)) { + this.#subpatterns.set(path, [pattern]); + } else { + ArrayPrototypePush(this.#subpatterns.get(path), pattern); + } + } + #addSubpatterns(path, pattern) { + const seen = this.#cache.add(path, pattern); + if (seen) { + return; + } + const fullpath = resolve(this.#root, path); + const stat = this.#cache.statSync(fullpath); + const last = pattern.last; + const isDirectory = stat?.isDirectory() || (stat?.isSymbolicLink() && pattern.hasSeenSymlinks); + const isLast = pattern.isLast(isDirectory); + const isFirst = pattern.isFirst(); + + if (isFirst && isWindows && typeof pattern.at(0) === 'string' && StringPrototypeEndsWith(pattern.at(0), ':')) { + // Absolute path, go to root + this.#addSubpattern(`${pattern.at(0)}\\`, pattern.child(new SafeSet([1]))); + return; + } + if (isFirst && pattern.at(0) === '') { + // Absolute path, go to root + this.#addSubpattern('/', pattern.child(new SafeSet([1]))); + return; + } + if (isFirst && pattern.at(0) === '..') { + // Start with .., go to parent + this.#addSubpattern('../', pattern.child(new SafeSet([1]))); + return; + } + if (isFirst && pattern.at(0) === '.') { + // Start with ., proceed + this.#addSubpattern('.', pattern.child(new SafeSet([1]))); + return; + } + + if (isLast && typeof pattern.at(-1) === 'string') { + // Add result if it exists + const p = pattern.at(-1); + const stat = this.#cache.statSync(join(fullpath, p)); + if (stat && (p || isDirectory)) { + ArrayPrototypePush(this.#results, join(path, p)); + } + if (pattern.indexes.size === 1 && pattern.indexes.has(last)) { + return; + } + } else if (isLast && pattern.at(-1) === lazyMinimatch().GLOBSTAR && + (path !== '.' || pattern.at(0) === '.' || (last === 0 && stat))) { + // If pattern ends with **, add to results + // if path is ".", add it only if pattern starts with "." or pattern is exactly "**" + ArrayPrototypePush(this.#results, path); + } + + if (!isDirectory) { + return; + } + + let children; + const firstPattern = pattern.indexes.size === 1 && pattern.at(pattern.indexes.values().next().value); + if (typeof firstPattern === 'string') { + const stat = this.#cache.statSync(join(fullpath, firstPattern)); + if (stat) { + stat.name = firstPattern; + children = [stat]; + } else { + children = []; + } + } else { + children = this.#cache.readdirSync(fullpath); + } + + for (let i = 0; i < children.length; i++) { + const entry = children[i]; + const entryPath = join(path, entry.name); + this.#cache.addToStatCache(join(fullpath, entry.name), entry); + + const subPatterns = new SafeSet(); + const nSymlinks = new SafeSet(); + for (const index of pattern.indexes) { + // For each child, chek potential patterns + if (this.#cache.seen(entryPath, pattern, index) || this.#cache.seen(entryPath, pattern, index + 1)) { + return; + } + const current = pattern.at(index); + const nextIndex = index + 1; + const next = pattern.at(nextIndex); + const fromSymlink = pattern.symlinks.has(index); + + if (current === lazyMinimatch().GLOBSTAR) { + if (entry.name[0] === '.' || (this.#exclude && this.#exclude(entry.name))) { + continue; + } + if (!fromSymlink && entry.isDirectory()) { + // If directory, add ** to its potential patterns + subPatterns.add(index); + } else if (!fromSymlink && index === last) { + // If ** is last, add to results + ArrayPrototypePush(this.#results, entryPath); + } + + // Any pattern after ** is also a potential pattern + // so we can already test it here + const nextMatches = pattern.test(nextIndex, entry.name); + if (nextMatches && nextIndex === last && !isLast) { + // If next pattern is the last one, add to results + ArrayPrototypePush(this.#results, entryPath); + } else if (nextMatches && entry.isDirectory()) { + // Pattern mached, meaning two patterns forward + // are also potential patterns + // e.g **/b/c when entry is a/b - add c to potential patterns + subPatterns.add(index + 2); + } + if ((nextMatches || pattern.at(0) === '.') && + (entry.isDirectory() || entry.isSymbolicLink()) && !fromSymlink) { + // If pattern after ** matches, or pattern starts with "." + // and entry is a directory or symlink, add to potential patterns + subPatterns.add(nextIndex); + } + + if (entry.isSymbolicLink()) { + nSymlinks.add(index); + } + + if (next === '..' && entry.isDirectory()) { + // In case pattern is "**/..", + // both parent and current directory should be added to the queue + // if this is the last pattern, add to results instead + const parent = join(path, '..'); + if (nextIndex < last) { + if (!this.#subpatterns.has(path) && !this.#cache.seen(path, pattern, nextIndex + 1)) { + this.#subpatterns.set(path, [pattern.child(new SafeSet([nextIndex + 1]))]); + } + if (!this.#subpatterns.has(parent) && !this.#cache.seen(parent, pattern, nextIndex + 1)) { + this.#subpatterns.set(parent, [pattern.child(new SafeSet([nextIndex + 1]))]); + } + } else { + if (!this.#cache.seen(path, pattern, nextIndex)) { + this.#cache.add(path, pattern.child(new SafeSet([nextIndex]))); + ArrayPrototypePush(this.#results, path); + } + if (!this.#cache.seen(path, pattern, nextIndex) || !this.#cache.seen(parent, pattern, nextIndex)) { + this.#cache.add(parent, pattern.child(new SafeSet([nextIndex]))); + ArrayPrototypePush(this.#results, parent); + } + } + } + } + if (typeof current === 'string') { + if (pattern.test(index, entry.name) && index !== last) { + // If current pattern matches entry name + // the next pattern is a potential pattern + subPatterns.add(nextIndex); + } else if (current === '.' && pattern.test(nextIndex, entry.name)) { + // If current pattern is ".", proceed to test next pattern + if (nextIndex === last) { + ArrayPrototypePush(this.#results, entryPath); + } else { + subPatterns.add(nextIndex + 1); + } + } + } + if (typeof current === 'object' && pattern.test(index, entry.name)) { + // If current pattern is a regex that matches entry name (e.g *.js) + // add next pattern to potential patterns, or to results if it's the last pattern + if (index === last) { + ArrayPrototypePush(this.#results, entryPath); + } else if (entry.isDirectory()) { + subPatterns.add(nextIndex); + } + } + } + if (subPatterns.size > 0) { + // If there are potential patterns, add to queue + this.#addSubpattern(entryPath, pattern.child(subPatterns, nSymlinks)); + } + } + } +} + +module.exports = { + __proto__: null, + Glob, +}; diff --git a/test/parallel/test-fs-glob.mjs b/test/parallel/test-fs-glob.mjs new file mode 100644 index 00000000000000..b1420fec272923 --- /dev/null +++ b/test/parallel/test-fs-glob.mjs @@ -0,0 +1,309 @@ +// Flags: --expose-internals +import * as common from '../common/index.mjs'; +import tmpdir from '../common/tmpdir.js'; +import { resolve, dirname, sep } from 'node:path'; +import { mkdir, writeFile, symlink } from 'node:fs/promises'; +import { test } from 'node:test'; +import assert from 'node:assert'; +import glob from 'internal/fs/glob'; + +tmpdir.refresh(); + +const fixtureDir = resolve(tmpdir.path, 'fixtures'); +const absDir = resolve(tmpdir.path, 'abs'); + +async function setup() { + await mkdir(fixtureDir, { recursive: true }); + await mkdir(absDir, { recursive: true }); + const files = [ + 'a/.abcdef/x/y/z/a', + 'a/abcdef/g/h', + 'a/abcfed/g/h', + 'a/b/c/d', + 'a/bc/e/f', + 'a/c/d/c/b', + 'a/cb/e/f', + 'a/x/.y/b', + 'a/z/.y/b', + ].map((f) => resolve(fixtureDir, f)); + + const symlinkTo = resolve(fixtureDir, 'a/symlink/a/b/c'); + const symlinkFrom = '../..'; + + for (const file of files) { + const f = resolve(fixtureDir, file); + const d = dirname(f); + await mkdir(d, { recursive: true }); + await writeFile(f, 'i like tests'); + } + + if (!common.isWindows) { + const d = dirname(symlinkTo); + await mkdir(d, { recursive: true }); + await symlink(symlinkFrom, symlinkTo, 'dir'); + } + + await Promise.all(['foo', 'bar', 'baz', 'asdf', 'quux', 'qwer', 'rewq'].map(async function(w) { + await mkdir(resolve(absDir, w), { recursive: true }); + })); +} + +await setup(); + +const patterns = { + 'a/c/d/*/b': ['a/c/d/c/b'], + 'a//c//d//*//b': ['a/c/d/c/b'], + 'a/*/d/*/b': ['a/c/d/c/b'], + 'a/*/+(c|g)/./d': ['a/b/c/d'], + 'a/**/[cg]/../[cg]': [ + 'a/abcdef/g', + 'a/abcfed/g', + 'a/b/c', + 'a/c', + 'a/c/d/c', + common.isWindows ? null : 'a/symlink/a/b/c', + ], + 'a/{b,c,d,e,f}/**/g': [], + 'a/b/**': ['a/b', 'a/b/c', 'a/b/c/d'], + './**/g': ['a/abcdef/g', 'a/abcfed/g'], + 'a/abc{fed,def}/g/h': ['a/abcdef/g/h', 'a/abcfed/g/h'], + 'a/abc{fed/g,def}/**/': ['a/abcdef', 'a/abcdef/g', 'a/abcfed/g'], + 'a/abc{fed/g,def}/**///**/': ['a/abcdef', 'a/abcdef/g', 'a/abcfed/g'], + '**/a': common.isWindows ? ['a'] : ['a', 'a/symlink/a'], + '**/a/**': [ + 'a', + 'a/abcdef', + 'a/abcdef/g', + 'a/abcdef/g/h', + 'a/abcfed', + 'a/abcfed/g', + 'a/abcfed/g/h', + 'a/b', + 'a/b/c', + 'a/b/c/d', + 'a/bc', + 'a/bc/e', + 'a/bc/e/f', + 'a/c', + 'a/c/d', + 'a/c/d/c', + 'a/c/d/c/b', + 'a/cb', + 'a/cb/e', + 'a/cb/e/f', + ...(common.isWindows ? [] : [ + 'a/symlink', + 'a/symlink/a', + 'a/symlink/a/b', + 'a/symlink/a/b/c', + ]), + 'a/x', + 'a/z', + ], + './**/a': common.isWindows ? ['a'] : ['a', 'a/symlink/a', 'a/symlink/a/b/c/a'], + './**/a/**/': [ + 'a', + 'a/abcdef', + 'a/abcdef/g', + 'a/abcfed', + 'a/abcfed/g', + 'a/b', + 'a/b/c', + 'a/bc', + 'a/bc/e', + 'a/c', + 'a/c/d', + 'a/c/d/c', + 'a/cb', + 'a/cb/e', + ...(common.isWindows ? [] : [ + 'a/symlink', + 'a/symlink/a', + 'a/symlink/a/b', + 'a/symlink/a/b/c', + 'a/symlink/a/b/c/a', + 'a/symlink/a/b/c/a/b', + 'a/symlink/a/b/c/a/b/c', + ]), + 'a/x', + 'a/z', + ], + './**/a/**': [ + 'a', + 'a/abcdef', + 'a/abcdef/g', + 'a/abcdef/g/h', + 'a/abcfed', + 'a/abcfed/g', + 'a/abcfed/g/h', + 'a/b', + 'a/b/c', + 'a/b/c/d', + 'a/bc', + 'a/bc/e', + 'a/bc/e/f', + 'a/c', + 'a/c/d', + 'a/c/d/c', + 'a/c/d/c/b', + 'a/cb', + 'a/cb/e', + 'a/cb/e/f', + ...(common.isWindows ? [] : [ + 'a/symlink', + 'a/symlink/a', + 'a/symlink/a/b', + 'a/symlink/a/b/c', + 'a/symlink/a/b/c/a', + 'a/symlink/a/b/c/a/b', + 'a/symlink/a/b/c/a/b/c', + ]), + 'a/x', + 'a/z', + ], + './**/a/**/a/**/': common.isWindows ? [] : [ + 'a/symlink/a', + 'a/symlink/a/b', + 'a/symlink/a/b/c', + 'a/symlink/a/b/c/a', + 'a/symlink/a/b/c/a/b', + 'a/symlink/a/b/c/a/b/c', + 'a/symlink/a/b/c/a/b/c/a', + 'a/symlink/a/b/c/a/b/c/a/b', + 'a/symlink/a/b/c/a/b/c/a/b/c', + ], + '+(a|b|c)/a{/,bc*}/**': [ + 'a/abcdef', + 'a/abcdef/g', + 'a/abcdef/g/h', + 'a/abcfed', + 'a/abcfed/g', + 'a/abcfed/g/h', + ], + '*/*/*/f': ['a/bc/e/f', 'a/cb/e/f'], + './**/f': ['a/bc/e/f', 'a/cb/e/f'], + 'a/symlink/a/b/c/a/b/c/a/b/c//a/b/c////a/b/c/**/b/c/**': common.isWindows ? [] : [ + 'a/symlink/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c', + 'a/symlink/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a', + 'a/symlink/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b', + 'a/symlink/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c/a/b/c', + ], + [`{./*/*,${absDir}/*}`]: [ + `${absDir}/asdf`, + `${absDir}/bar`, + `${absDir}/baz`, + `${absDir}/foo`, + `${absDir}/quux`, + `${absDir}/qwer`, + `${absDir}/rewq`, + 'a/abcdef', + 'a/abcfed', + 'a/b', + 'a/bc', + 'a/c', + 'a/cb', + common.isWindows ? null : 'a/symlink', + 'a/x', + 'a/z', + ], + [`{${absDir}/*,*}`]: [ + `${absDir}/asdf`, + `${absDir}/bar`, + `${absDir}/baz`, + `${absDir}/foo`, + `${absDir}/quux`, + `${absDir}/qwer`, + `${absDir}/rewq`, + 'a', + ], + 'a/!(symlink)/**': [ + 'a/abcdef', + 'a/abcdef/g', + 'a/abcdef/g/h', + 'a/abcfed', + 'a/abcfed/g', + 'a/abcfed/g/h', + 'a/b', + 'a/b/c', + 'a/b/c/d', + 'a/bc', + 'a/bc/e', + 'a/bc/e/f', + 'a/c', + 'a/c/d', + 'a/c/d/c', + 'a/c/d/c/b', + 'a/cb', + 'a/cb/e', + 'a/cb/e/f', + 'a/x', + 'a/z', + ], + 'a/symlink/a/**/*': common.isWindows ? [] : [ + 'a/symlink/a/b', + 'a/symlink/a/b/c', + 'a/symlink/a/b/c/a', + ], + 'a/!(symlink)/**/..': [ + 'a', + 'a/abcdef', + 'a/abcfed', + 'a/b', + 'a/bc', + 'a/c', + 'a/c/d', + 'a/cb', + ], + 'a/!(symlink)/**/../': [ + 'a', + 'a/abcdef', + 'a/abcfed', + 'a/b', + 'a/bc', + 'a/c', + 'a/c/d', + 'a/cb', + ], + 'a/!(symlink)/**/../*': [ + 'a/abcdef', + 'a/abcdef/g', + 'a/abcfed', + 'a/abcfed/g', + 'a/b', + 'a/b/c', + 'a/bc', + 'a/bc/e', + 'a/c', + 'a/c/d', + 'a/c/d/c', + 'a/cb', + 'a/cb/e', + common.isWindows ? null : 'a/symlink', + 'a/x', + 'a/z', + ], + 'a/!(symlink)/**/../*/*': [ + 'a/abcdef/g', + 'a/abcdef/g/h', + 'a/abcfed/g', + 'a/abcfed/g/h', + 'a/b/c', + 'a/b/c/d', + 'a/bc/e', + 'a/bc/e/f', + 'a/c/d', + 'a/c/d/c', + 'a/c/d/c/b', + 'a/cb/e', + 'a/cb/e/f', + common.isWindows ? null : 'a/symlink/a', + ], +}; + +for (const [pattern, expected] of Object.entries(patterns)) { + test(pattern, () => { + const actual = new glob.Glob([pattern], { cwd: fixtureDir }).globSync().sort(); + const normalized = expected.filter(Boolean).map((item) => item.replaceAll('/', sep)).sort(); + assert.deepStrictEqual(actual, normalized); + }); +}