From 207625e32e5e9eab6773d4718d1ba8974e5b623e Mon Sep 17 00:00:00 2001 From: Moshe Atlow Date: Sun, 9 Apr 2023 09:18:48 +0300 Subject: [PATCH] path: add `path.glob` --- lib/path.js | 10 +++ node.gyp | 2 + src/node_binding.cc | 1 + src/node_external_reference.h | 1 + src/node_path.cc | 120 +++++++++++++++++++++++++++++++ src/node_path.h | 17 +++++ test/parallel/test-path-glob.mjs | 96 +++++++++++++++++++++++++ 7 files changed, 247 insertions(+) create mode 100644 src/node_path.cc create mode 100644 src/node_path.h create mode 100644 test/parallel/test-path-glob.mjs diff --git a/lib/path.js b/lib/path.js index 625b6261042940..1878d0d37f9cca 100644 --- a/lib/path.js +++ b/lib/path.js @@ -31,6 +31,8 @@ const { StringPrototypeToLowerCase, } = primordials; +const { glob: _glob } = internalBinding('path'); + const { CHAR_UPPERCASE_A, CHAR_LOWERCASE_A, @@ -153,6 +155,12 @@ function _format(sep, pathObject) { return dir === pathObject.root ? `${dir}${base}` : `${dir}${sep}${base}`; } +function glob(pattern, name) { + validateString(pattern, 'pattern'); + validateString(name, 'name'); + return _glob(pattern, name); +} + const win32 = { /** * path.resolve([from ...], to) @@ -1064,6 +1072,7 @@ const win32 = { return ret; }, + glob, sep: '\\', delimiter: ';', @@ -1530,6 +1539,7 @@ const posix = { return ret; }, + glob, sep: '/', delimiter: ':', diff --git a/node.gyp b/node.gyp index 601c458421628a..c671faf27800f0 100644 --- a/node.gyp +++ b/node.gyp @@ -106,6 +106,7 @@ 'src/node_metadata.cc', 'src/node_options.cc', 'src/node_os.cc', + 'src/node_path.cc', 'src/node_perf.cc', 'src/node_platform.cc', 'src/node_postmortem_metadata.cc', @@ -225,6 +226,7 @@ 'src/node_object_wrap.h', 'src/node_options.h', 'src/node_options-inl.h', + 'src/node_path.h', 'src/node_perf.h', 'src/node_perf_common.h', 'src/node_platform.h', diff --git a/src/node_binding.cc b/src/node_binding.cc index 90855aada5dab9..93be229d779173 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -52,6 +52,7 @@ V(mksnapshot) \ V(options) \ V(os) \ + V(path) \ V(performance) \ V(permission) \ V(pipe_wrap) \ diff --git a/src/node_external_reference.h b/src/node_external_reference.h index b2a90ba5194316..23bc650577d12a 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -90,6 +90,7 @@ class ExternalReferenceRegistry { V(module_wrap) \ V(options) \ V(os) \ + V(path) \ V(performance) \ V(permission) \ V(process_methods) \ diff --git a/src/node_path.cc b/src/node_path.cc new file mode 100644 index 00000000000000..1f4acb214e6cd6 --- /dev/null +++ b/src/node_path.cc @@ -0,0 +1,120 @@ +#include "node_path.h" +#include "env-inl.h" +#include "node_errors.h" +#include "node_external_reference.h" + +namespace node { + +namespace path { +using v8::Context; +using v8::FunctionCallbackInfo; +using v8::Local; +using v8::Object; +using v8::Value; + +// extracted from +// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/glob.c +bool glob(char const* pat, char const* str) { + /* + * Backtrack to previous * on mismatch and retry starting one + * character later in the string. Because * matches all characters + * (no exception for /), it can be easily proved that there's + * never a need to backtrack multiple levels. + */ + char const* back_pat = nullptr; + char const* back_str = nullptr; + + /* + * Loop over each token (character or class) in pat, matching + * it against the remaining unmatched tail of str. Return false + * on mismatch, or true after matching the trailing nul bytes. + */ + for (;;) { + unsigned char c = *str++; + unsigned char d = *pat++; + + switch (d) { + case '?': /* Wildcard: anything but nul */ + if (c == '\0') return false; + break; + case '*': /* Any-length wildcard */ + if (*pat == '\0') /* Optimize trailing * case */ + return true; + back_pat = pat; + back_str = --str; /* Allow zero-length match */ + break; + case '[': { /* Character class */ + bool match = false, inverted = (*pat == '!'); + char const* cls = pat + inverted; + unsigned char a = *cls++; + + /* + * Iterate over each span in the character class. + * A span is either a single character a, or a + * range a-b. The first span may begin with ']'. + */ + do { + unsigned char b = a; + + if (a == '\0') /* Malformed */ + goto literal; + + if (cls[0] == '-' && cls[1] != ']') { + b = cls[1]; + + if (b == '\0') goto literal; + + cls += 2; + /* Any special action if a > b? */ + } + match |= (a <= c && c <= b); + } while ((a = *cls++) != ']'); + + if (match == inverted) goto backtrack; + pat = cls; + } break; + case '\\': + d = *pat++; + [[fallthrough]]; + default: /* Literal character */ + literal: + if (c == d) { + if (d == '\0') return true; + break; + } + backtrack: + if (c == '\0' || !back_pat) return false; /* No point continuing */ + /* Try again from last *, one character later in str. */ + pat = back_pat; + str = ++back_str; + break; + } + } +} +void glob(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsString()); + + std::string pattern = Utf8Value(env->isolate(), args[0]).ToString(); + std::string str = Utf8Value(env->isolate(), args[1]).ToString(); + args.GetReturnValue().Set(glob(pattern.c_str(), str.c_str())); +} + +void Initialize(Local target, + Local unused, + Local context, + void* priv) { + SetMethod(context, target, "glob", glob); +} + +void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(glob); +} +} // namespace path + +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL(path, node::path::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE(path, node::path::RegisterExternalReferences) diff --git a/src/node_path.h b/src/node_path.h new file mode 100644 index 00000000000000..7a700ac743cc5e --- /dev/null +++ b/src/node_path.h @@ -0,0 +1,17 @@ +#ifndef SRC_NODE_PATH_H_ +#define SRC_NODE_PATH_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include "base_object.h" +#include "node_snapshotable.h" +#include "v8.h" + +namespace node { + +namespace path {} // namespace path +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_PATH_H_ diff --git a/test/parallel/test-path-glob.mjs b/test/parallel/test-path-glob.mjs new file mode 100644 index 00000000000000..952dcad1f71a21 --- /dev/null +++ b/test/parallel/test-path-glob.mjs @@ -0,0 +1,96 @@ +import '../common/index.mjs'; +import { describe, it } from 'node:test'; +import * as assert from 'node:assert'; +import * as path from 'node:path'; + + +// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/globtest.c +const patterns = [ + { expected: true, pattern: 'a', name: 'a' }, + { expected: false, pattern: 'a', name: 'b' }, + { expected: false, pattern: 'a', name: 'aa' }, + { expected: false, pattern: 'a', name: '' }, + { expected: true, pattern: '', name: '' }, + { expected: false, pattern: '', name: 'a' }, + /* Simple character class tests */ + { expected: true, pattern: '[a]', name: 'a' }, + { expected: false, pattern: '[a]', name: 'b' }, + { expected: false, pattern: '[!a]', name: 'a' }, + { expected: true, pattern: '[!a]', name: 'b' }, + { expected: true, pattern: '[ab]', name: 'a' }, + { expected: true, pattern: '[ab]', name: 'b' }, + { expected: false, pattern: '[ab]', name: 'c' }, + { expected: true, pattern: '[!ab]', name: 'c' }, + { expected: true, pattern: '[a-c]', name: 'b' }, + { expected: false, pattern: '[a-c]', name: 'd' }, + /* Corner cases in character class parsing */ + { expected: true, pattern: '[a-c-e-g]', name: '-' }, + { expected: false, pattern: '[a-c-e-g]', name: 'd' }, + { expected: true, pattern: '[a-c-e-g]', name: 'f' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: 'a' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: ']' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: '[' }, + { expected: true, pattern: '[]a-ceg-ik[]', name: 'h' }, + { expected: false, pattern: '[]a-ceg-ik[]', name: 'f' }, + { expected: false, pattern: '[!]a-ceg-ik[]', name: 'h' }, + { expected: false, pattern: '[!]a-ceg-ik[]', name: ']' }, + { expected: true, pattern: '[!]a-ceg-ik[]', name: 'f' }, + /* Simple wild cards */ + { expected: true, pattern: '?', name: 'a' }, + { expected: false, pattern: '?', name: 'aa' }, + { expected: false, pattern: '??', name: 'a' }, + { expected: true, pattern: '?x?', name: 'axb' }, + { expected: false, pattern: '?x?', name: 'abx' }, + { expected: false, pattern: '?x?', name: 'xab' }, + /* Asterisk wild cards (backtracking) */ + { expected: false, pattern: '*??', name: 'a' }, + { expected: true, pattern: '*??', name: 'ab' }, + { expected: true, pattern: '*??', name: 'abc' }, + { expected: true, pattern: '*??', name: 'abcd' }, + { expected: false, pattern: '??*', name: 'a' }, + { expected: true, pattern: '??*', name: 'ab' }, + { expected: true, pattern: '??*', name: 'abc' }, + { expected: true, pattern: '??*', name: 'abcd' }, + { expected: false, pattern: '?*?', name: 'a' }, + { expected: true, pattern: '?*?', name: 'ab' }, + { expected: true, pattern: '?*?', name: 'abc' }, + { expected: true, pattern: '?*?', name: 'abcd' }, + { expected: true, pattern: '*b', name: 'b' }, + { expected: true, pattern: '*b', name: 'ab' }, + { expected: false, pattern: '*b', name: 'ba' }, + { expected: true, pattern: '*b', name: 'bb' }, + { expected: true, pattern: '*b', name: 'abb' }, + { expected: true, pattern: '*b', name: 'bab' }, + { expected: true, pattern: '*bc', name: 'abbc' }, + { expected: true, pattern: '*bc', name: 'bc' }, + { expected: true, pattern: '*bc', name: 'bbc' }, + { expected: true, pattern: '*bc', name: 'bcbc' }, + /* Multiple asterisks (complex backtracking) */ + { expected: true, pattern: '*ac*', name: 'abacadaeafag' }, + { expected: true, pattern: '*ac*ae*ag*', name: 'abacadaeafag' }, + { expected: true, pattern: '*a*b*[bc]*[ef]*g*', name: 'abacadaeafag' }, + { expected: false, pattern: '*a*b*[ef]*[cd]*g*', name: 'abacadaeafag' }, + { expected: true, pattern: '*abcd*', name: 'abcabcabcabcdefg' }, + { expected: true, pattern: '*ab*cd*', name: 'abcabcabcabcdefg' }, + { expected: true, pattern: '*abcd*abcdef*', name: 'abcabcdabcdeabcdefg' }, + { expected: false, pattern: '*abcd*', name: 'abcabcabcabcefg' }, + { expected: false, pattern: '*ab*cd*', name: 'abcabcabcabcefg' }, +]; + +const invalid = [null, undefined, 1, Number.MAX_SAFE_INTEGER, true, false, Symbol(), {}, [], () => {}]; + +describe('path.glob', () => { + for (const { expected, pattern, name } of patterns) { + it(`pattern "${pattern}" should ${expected ? '' : 'not '}match "${name}"`, () => { + assert.strictEqual(path.glob(pattern, name), expected); + }); + } + + for (const x of invalid) { + const name = typeof x === 'symbol' ? 'Symnol()' : x; + it(`${name} should throw as a parameter`, () => { + assert.throws(() => path.glob(x, ''), { code: 'ERR_INVALID_ARG_TYPE' }); + assert.throws(() => path.glob('', x), { code: 'ERR_INVALID_ARG_TYPE' }); + }); + } +});