-
Notifications
You must be signed in to change notification settings - Fork 29.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
path: add path.glob
#47490
path: add path.glob
#47490
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ | |
V(mksnapshot) \ | ||
V(options) \ | ||
V(os) \ | ||
V(path) \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a strong opinion, but maybe we should name the binding and file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cjihrig I'm planning on moving a couple of functions to C++ (actually done the same exact change), so this change is beneficial for me too. |
||
V(performance) \ | ||
V(permission) \ | ||
V(pipe_wrap) \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#include "env-inl.h" | ||
#include "node_errors.h" | ||
#include "node_external_reference.h" | ||
#include "util-inl.h" | ||
#include "v8-fast-api-calls.h" | ||
|
||
namespace node { | ||
|
||
namespace path { | ||
using v8::Context; | ||
using v8::FunctionCallbackInfo; | ||
using v8::Local; | ||
using v8::Object; | ||
using v8::Value; | ||
|
||
// extracted from | ||
MoLow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/glob.c | ||
bool glob(char const* pat, char const* str) { | ||
MoLow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/* | ||
* Backtrack to previous * on mismatch and retry starting one | ||
* character later in the string. Because * matches all characters | ||
* (no exception for /), it can be easily proved that there's | ||
* never a need to backtrack multiple levels. | ||
*/ | ||
char const* back_pat = nullptr; | ||
char const* back_str = nullptr; | ||
|
||
/* | ||
* Loop over each token (character or class) in pat, matching | ||
* it against the remaining unmatched tail of str. Return false | ||
* on mismatch, or true after matching the trailing nul bytes. | ||
*/ | ||
for (;;) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: There is no need to iterate each character. input.find_first_of("?*[") until the input is finished might produce better results if performance is the priority. If not, we can keep it as it is, and later improve it. Your call. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand these comments - is there really motivation in diverging from the implementation lifted from If we have improvement suggestions wouldn't it be better to upstream them to linux glob? (genuinely asking, not sure it's. just not really intuitive) |
||
unsigned char c = *str++; | ||
unsigned char d = *pat++; | ||
|
||
switch (d) { | ||
case '?': /* Wildcard: anything but nul */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: This switch case can be removed and be simplified for better performance. |
||
if (c == '\0') return false; | ||
break; | ||
case '*': /* Any-length wildcard */ | ||
if (*pat == '\0') /* Optimize trailing * case */ | ||
return true; | ||
back_pat = pat; | ||
back_str = --str; /* Allow zero-length match */ | ||
break; | ||
case '[': { /* Character class */ | ||
bool match = false, inverted = (*pat == '!'); | ||
char const* cls = pat + inverted; | ||
unsigned char a = *cls++; | ||
|
||
/* | ||
* Iterate over each span in the character class. | ||
* A span is either a single character a, or a | ||
* range a-b. The first span may begin with ']'. | ||
*/ | ||
do { | ||
unsigned char b = a; | ||
|
||
if (a == '\0') /* Malformed */ | ||
goto literal; | ||
|
||
if (cls[0] == '-' && cls[1] != ']') { | ||
b = cls[1]; | ||
|
||
if (b == '\0') goto literal; | ||
|
||
cls += 2; | ||
/* Any special action if a > b? */ | ||
} | ||
match |= (a <= c && c <= b); | ||
} while ((a = *cls++) != ']'); | ||
|
||
if (match == inverted) goto backtrack; | ||
pat = cls; | ||
} break; | ||
case '\\': | ||
d = *pat++; | ||
[[fallthrough]]; | ||
default: /* Literal character */ | ||
literal: | ||
if (c == d) { | ||
if (d == '\0') return true; | ||
break; | ||
} | ||
backtrack: | ||
if (c == '\0' || !back_pat) return false; /* No point continuing */ | ||
/* Try again from last *, one character later in str. */ | ||
pat = back_pat; | ||
str = ++back_str; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
void SlowGlob(const FunctionCallbackInfo<Value>& args) { | ||
Environment* env = Environment::GetCurrent(args); | ||
CHECK_GE(args.Length(), 2); | ||
CHECK(args[0]->IsString()); | ||
CHECK(args[1]->IsString()); | ||
|
||
std::string pattern = Utf8Value(env->isolate(), args[0]).ToString(); | ||
anonrig marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::string str = Utf8Value(env->isolate(), args[1]).ToString(); | ||
args.GetReturnValue().Set(glob(pattern.c_str(), str.c_str())); | ||
} | ||
bool FastGlob(Local<Value> receiver, | ||
const v8::FastOneByteString& pattern, | ||
const v8::FastOneByteString& str) { | ||
return glob(pattern.data, str.data); | ||
} | ||
|
||
v8::CFunction fast_glob_(v8::CFunction::Make(FastGlob)); | ||
|
||
void Initialize(Local<Object> target, | ||
Local<Value> unused, | ||
Local<Context> context, | ||
void* priv) { | ||
SetFastMethod(context, target, "glob", SlowGlob, &fast_glob_); | ||
} | ||
|
||
void RegisterExternalReferences(ExternalReferenceRegistry* registry) { | ||
registry->Register(SlowGlob); | ||
registry->Register(FastGlob); | ||
registry->Register(fast_glob_.GetTypeInfo()); | ||
} | ||
} // namespace path | ||
|
||
} // namespace node | ||
|
||
NODE_BINDING_CONTEXT_AWARE_INTERNAL(path, node::path::Initialize) | ||
NODE_BINDING_EXTERNAL_REFERENCE(path, node::path::RegisterExternalReferences) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import '../common/index.mjs'; | ||
import { describe, it } from 'node:test'; | ||
import * as assert from 'node:assert'; | ||
import * as path from 'node:path'; | ||
|
||
|
||
// https://github.com/torvalds/linux/blob/cdc9718d5e590d6905361800b938b93f2b66818e/lib/globtest.c | ||
const patterns = [ | ||
{ expected: true, pattern: 'a', name: 'a' }, | ||
{ expected: false, pattern: 'a', name: 'b' }, | ||
{ expected: false, pattern: 'a', name: 'aa' }, | ||
{ expected: false, pattern: 'a', name: '' }, | ||
{ expected: true, pattern: '', name: '' }, | ||
{ expected: false, pattern: '', name: 'a' }, | ||
/* Simple character class tests */ | ||
{ expected: true, pattern: '[a]', name: 'a' }, | ||
{ expected: false, pattern: '[a]', name: 'b' }, | ||
{ expected: false, pattern: '[!a]', name: 'a' }, | ||
{ expected: true, pattern: '[!a]', name: 'b' }, | ||
{ expected: true, pattern: '[ab]', name: 'a' }, | ||
{ expected: true, pattern: '[ab]', name: 'b' }, | ||
{ expected: false, pattern: '[ab]', name: 'c' }, | ||
{ expected: true, pattern: '[!ab]', name: 'c' }, | ||
{ expected: true, pattern: '[a-c]', name: 'b' }, | ||
{ expected: false, pattern: '[a-c]', name: 'd' }, | ||
/* Corner cases in character class parsing */ | ||
{ expected: true, pattern: '[a-c-e-g]', name: '-' }, | ||
{ expected: false, pattern: '[a-c-e-g]', name: 'd' }, | ||
{ expected: true, pattern: '[a-c-e-g]', name: 'f' }, | ||
{ expected: true, pattern: '[]a-ceg-ik[]', name: 'a' }, | ||
{ expected: true, pattern: '[]a-ceg-ik[]', name: ']' }, | ||
{ expected: true, pattern: '[]a-ceg-ik[]', name: '[' }, | ||
{ expected: true, pattern: '[]a-ceg-ik[]', name: 'h' }, | ||
{ expected: false, pattern: '[]a-ceg-ik[]', name: 'f' }, | ||
{ expected: false, pattern: '[!]a-ceg-ik[]', name: 'h' }, | ||
{ expected: false, pattern: '[!]a-ceg-ik[]', name: ']' }, | ||
{ expected: true, pattern: '[!]a-ceg-ik[]', name: 'f' }, | ||
/* Simple wild cards */ | ||
{ expected: true, pattern: '?', name: 'a' }, | ||
{ expected: false, pattern: '?', name: 'aa' }, | ||
{ expected: false, pattern: '??', name: 'a' }, | ||
{ expected: true, pattern: '?x?', name: 'axb' }, | ||
{ expected: false, pattern: '?x?', name: 'abx' }, | ||
{ expected: false, pattern: '?x?', name: 'xab' }, | ||
/* Asterisk wild cards (backtracking) */ | ||
{ expected: false, pattern: '*??', name: 'a' }, | ||
{ expected: true, pattern: '*??', name: 'ab' }, | ||
{ expected: true, pattern: '*??', name: 'abc' }, | ||
{ expected: true, pattern: '*??', name: 'abcd' }, | ||
{ expected: false, pattern: '??*', name: 'a' }, | ||
{ expected: true, pattern: '??*', name: 'ab' }, | ||
{ expected: true, pattern: '??*', name: 'abc' }, | ||
{ expected: true, pattern: '??*', name: 'abcd' }, | ||
{ expected: false, pattern: '?*?', name: 'a' }, | ||
{ expected: true, pattern: '?*?', name: 'ab' }, | ||
{ expected: true, pattern: '?*?', name: 'abc' }, | ||
{ expected: true, pattern: '?*?', name: 'abcd' }, | ||
{ expected: true, pattern: '*b', name: 'b' }, | ||
{ expected: true, pattern: '*b', name: 'ab' }, | ||
{ expected: false, pattern: '*b', name: 'ba' }, | ||
{ expected: true, pattern: '*b', name: 'bb' }, | ||
{ expected: true, pattern: '*b', name: 'abb' }, | ||
{ expected: true, pattern: '*b', name: 'bab' }, | ||
{ expected: true, pattern: '*bc', name: 'abbc' }, | ||
{ expected: true, pattern: '*bc', name: 'bc' }, | ||
{ expected: true, pattern: '*bc', name: 'bbc' }, | ||
{ expected: true, pattern: '*bc', name: 'bcbc' }, | ||
/* Multiple asterisks (complex backtracking) */ | ||
{ expected: true, pattern: '*ac*', name: 'abacadaeafag' }, | ||
{ expected: true, pattern: '*ac*ae*ag*', name: 'abacadaeafag' }, | ||
{ expected: true, pattern: '*a*b*[bc]*[ef]*g*', name: 'abacadaeafag' }, | ||
{ expected: false, pattern: '*a*b*[ef]*[cd]*g*', name: 'abacadaeafag' }, | ||
{ expected: true, pattern: '*abcd*', name: 'abcabcabcabcdefg' }, | ||
{ expected: true, pattern: '*ab*cd*', name: 'abcabcabcabcdefg' }, | ||
{ expected: true, pattern: '*abcd*abcdef*', name: 'abcabcdabcdeabcdefg' }, | ||
{ expected: false, pattern: '*abcd*', name: 'abcabcabcabcefg' }, | ||
{ expected: false, pattern: '*ab*cd*', name: 'abcabcabcabcefg' }, | ||
]; | ||
|
||
const invalid = [null, undefined, 1, Number.MAX_SAFE_INTEGER, true, false, Symbol(), {}, [], () => {}]; | ||
|
||
describe('path.glob', () => { | ||
for (const { expected, pattern, name } of patterns) { | ||
it(`pattern "${pattern}" should ${expected ? '' : 'not '}match "${name}"`, () => { | ||
assert.strictEqual(path.glob(pattern, name), expected); | ||
}); | ||
} | ||
|
||
for (const x of invalid) { | ||
MoLow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const name = typeof x === 'symbol' ? 'Symnol()' : x; | ||
MoLow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
it(`${name} should throw as a parameter`, () => { | ||
assert.throws(() => path.glob(x, ''), { code: 'ERR_INVALID_ARG_TYPE' }); | ||
assert.throws(() => path.glob('', x), { code: 'ERR_INVALID_ARG_TYPE' }); | ||
}); | ||
} | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we following this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are instructions for maintainers of the Linux kernel:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/LICENSES/preferred/MIT?h=v6.3-rc6
I am not really sure how we should embed this since it only really appears here
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/glob.c?h=v6.3-rc6#n10
see also the comment on top of
#define MODULE_LICENSE
:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did what I best understood from what I found in the Linux kernel and in other license examples so if anyone can help out with what really should be used as the license - I'd appreciate