Skip to content

Commit

Permalink
Respect Windows Path directory separators in File.match?
Browse files Browse the repository at this point in the history
  • Loading branch information
HertzDevil committed Oct 27, 2023
1 parent ad7d42c commit d3a3dcf
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 73 deletions.
155 changes: 92 additions & 63 deletions spec/std/file_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ private def it_raises_on_null_byte(operation, file = __FILE__, line = __LINE__,
end
end

private def assert_file_matches(pattern, path : String, *, file = __FILE__, line = __LINE__)
File.match?(pattern, path).should be_true, file: file, line: line
File.match?(pattern, Path.posix(path)).should be_true, file: file, line: line
File.match?(pattern, Path.posix(path).to_windows).should be_true, file: file, line: line
end

private def refute_file_matches(pattern, path : String, *, file = __FILE__, line = __LINE__)
File.match?(pattern, path).should be_false, file: file, line: line
File.match?(pattern, Path.posix(path)).should be_false, file: file, line: line
File.match?(pattern, Path.posix(path).to_windows).should be_false, file: file, line: line
end

private def normalize_permissions(permissions, *, directory)
{% if flag?(:win32) %}
normalized_permissions = 0o444
Expand Down Expand Up @@ -1449,73 +1461,89 @@ describe "File" do

describe ".match?" do
it "matches basics" do
File.match?("abc", Path["abc"]).should be_true
File.match?("abc", "abc").should be_true
File.match?("*", "abc").should be_true
File.match?("*c", "abc").should be_true
File.match?("a*", "a").should be_true
File.match?("a*", "abc").should be_true
File.match?("a*/b", "abc/b").should be_true
File.match?("*x", "xxx").should be_true
assert_file_matches "abc", "abc"
assert_file_matches "*", "abc"
assert_file_matches "*c", "abc"
assert_file_matches "a*", "a"
assert_file_matches "a*", "abc"
assert_file_matches "a*/b", "abc/b"
assert_file_matches "*x", "xxx"
end

it "matches multiple expansions" do
File.match?("a*b*c*d*e*/f", "axbxcxdxe/f").should be_true
File.match?("a*b*c*d*e*/f", "axbxcxdxexxx/f").should be_true
File.match?("a*b?c*x", "abxbbxdbxebxczzx").should be_true
File.match?("a*b?c*x", "abxbbxdbxebxczzy").should be_false
assert_file_matches "a*b*c*d*e*/f", "axbxcxdxe/f"
assert_file_matches "a*b*c*d*e*/f", "axbxcxdxexxx/f"
assert_file_matches "a*b?c*x", "abxbbxdbxebxczzx"
refute_file_matches "a*b?c*x", "abxbbxdbxebxczzy"
end

it "matches unicode characters" do
File.match?("a?b", "a☺b").should be_true
File.match?("a???b", "a☺b").should be_false
end
it "* don't match /" do
File.match?("a*", "ab/c").should be_false
File.match?("a*/b", "a/c/b").should be_false
File.match?("a*b*c*d*e*/f", "axbxcxdxe/xxx/f").should be_false
File.match?("a*b*c*d*e*/f", "axbxcxdxexxx/fff").should be_false
end
it "** matches /" do
File.match?("a**", "ab/c").should be_true
File.match?("a**/b", "a/c/b").should be_true
File.match?("a*b*c*d*e**/f", "axbxcxdxe/xxx/f").should be_true
File.match?("a*b*c*d*e**/f", "axbxcxdxexxx/f").should be_true
File.match?("a*b*c*d*e**/f", "axbxcxdxexxx/fff").should be_false
assert_file_matches "a?b", "a☺b"
refute_file_matches "a???b", "a☺b"
end

it "* don't match path separator" do
refute_file_matches "a*", "ab/c"
refute_file_matches "a*/b", "a/c/b"
refute_file_matches "a*b*c*d*e*/f", "axbxcxdxe/xxx/f"
refute_file_matches "a*b*c*d*e*/f", "axbxcxdxexxx/fff"
end

it "** matches path separator" do
assert_file_matches "a**", "ab/c"
assert_file_matches "a**/b", "a/c/b"
assert_file_matches "a*b*c*d*e**/f", "axbxcxdxe/xxx/f"
assert_file_matches "a*b*c*d*e**/f", "axbxcxdxexxx/f"
refute_file_matches "a*b*c*d*e**/f", "axbxcxdxexxx/fff"
end

it "classes" do
File.match?("ab[c]", "abc").should be_true
File.match?("ab[b-d]", "abc").should be_true
File.match?("ab[d-b]", "abc").should be_false
File.match?("ab[e-g]", "abc").should be_false
File.match?("ab[e-gc]", "abc").should be_true
File.match?("ab[^c]", "abc").should be_false
File.match?("ab[^b-d]", "abc").should be_false
File.match?("ab[^e-g]", "abc").should be_true
File.match?("a[^a]b", "a☺b").should be_true
File.match?("a[^a][^a][^a]b", "a☺b").should be_false
File.match?("[a-ζ]*", "α").should be_true
File.match?("*[a-ζ]", "A").should be_false
assert_file_matches "ab[c]", "abc"
assert_file_matches "ab[b-d]", "abc"
refute_file_matches "ab[d-b]", "abc"
refute_file_matches "ab[e-g]", "abc"
assert_file_matches "ab[e-gc]", "abc"
refute_file_matches "ab[^c]", "abc"
refute_file_matches "ab[^b-d]", "abc"
assert_file_matches "ab[^e-g]", "abc"
assert_file_matches "a[^a]b", "a☺b"
refute_file_matches "a[^a][^a][^a]b", "a☺b"
assert_file_matches "[a-ζ]*", "α"
refute_file_matches "*[a-ζ]", "A"
end

it "escape" do
# NOTE: `*` is forbidden in Windows paths
File.match?("a\\*b", "a*b").should be_true
File.match?("a\\*b", "ab").should be_false
refute_file_matches "a\\*b", "ab"
File.match?("a\\**b", "a*bb").should be_true
File.match?("a\\**b", "abb").should be_false
refute_file_matches "a\\**b", "abb"
File.match?("a*\\*b", "ab*b").should be_true
File.match?("a*\\*b", "abb").should be_false
refute_file_matches "a*\\*b", "abb"

assert_file_matches "a\\[b\\]", "a[b]"
refute_file_matches "a\\[b\\]", "ab"
assert_file_matches "a\\[bb\\]", "a[bb]"
refute_file_matches "a\\[bb\\]", "abb"
assert_file_matches "a[b]\\[b\\]", "ab[b]"
refute_file_matches "a[b]\\[b\\]", "abb"
end

it "special chars" do
File.match?("a?b", "a/b").should be_false
File.match?("a*b", "a/b").should be_false
refute_file_matches "a?b", "a/b"
refute_file_matches "a*b", "a/b"
end

it "classes escapes" do
File.match?("[\\]a]", "]").should be_true
File.match?("[\\-]", "-").should be_true
File.match?("[x\\-]", "x").should be_true
File.match?("[x\\-]", "-").should be_true
File.match?("[x\\-]", "z").should be_false
File.match?("[\\-x]", "x").should be_true
File.match?("[\\-x]", "-").should be_true
File.match?("[\\-x]", "a").should be_false
assert_file_matches "[\\]a]", "]"
assert_file_matches "[\\-]", "-"
assert_file_matches "[x\\-]", "x"
assert_file_matches "[x\\-]", "-"
refute_file_matches "[x\\-]", "z"
assert_file_matches "[\\-x]", "x"
assert_file_matches "[\\-x]", "-"
refute_file_matches "[\\-x]", "a"

expect_raises(File::BadPatternError, "empty character set") do
File.match?("[]a]", "]")
end
Expand Down Expand Up @@ -1547,18 +1575,19 @@ describe "File" do
File.match?("a[", "a")
end
end

it "alternates" do
File.match?("{abc,def}", "abc").should be_true
File.match?("ab{c,}", "abc").should be_true
File.match?("ab{c,}", "ab").should be_true
File.match?("ab{d,e}", "abc").should be_false
File.match?("ab{*,/cde}", "abcde").should be_true
File.match?("ab{*,/cde}", "ab/cde").should be_true
File.match?("ab{?,/}de", "abcde").should be_true
File.match?("ab{?,/}de", "ab/de").should be_true
File.match?("ab{{c,d}ef,}", "ab").should be_true
File.match?("ab{{c,d}ef,}", "abcef").should be_true
File.match?("ab{{c,d}ef,}", "abdef").should be_true
assert_file_matches "{abc,def}", "abc"
assert_file_matches "ab{c,}", "abc"
assert_file_matches "ab{c,}", "ab"
refute_file_matches "ab{d,e}", "abc"
assert_file_matches "ab{*,/cde}", "abcde"
assert_file_matches "ab{*,/cde}", "ab/cde"
assert_file_matches "ab{?,/}de", "abcde"
assert_file_matches "ab{?,/}de", "ab/de"
assert_file_matches "ab{{c,d}ef,}", "ab"
assert_file_matches "ab{{c,d}ef,}", "abcef"
assert_file_matches "ab{{c,d}ef,}", "abdef"
end
end

Expand Down
25 changes: 15 additions & 10 deletions src/file.cr
Original file line number Diff line number Diff line change
Expand Up @@ -458,40 +458,45 @@ class File < IO::FileDescriptor
#
# The pattern syntax is similar to shell filename globbing. It may contain the following metacharacters:
#
# * `*` matches an unlimited number of arbitrary characters excluding `/`.
# * `*` matches an unlimited number of arbitrary characters, excluding any directory separators.
# * `"*"` matches all regular files.
# * `"c*"` matches all files beginning with `c`.
# * `"*c"` matches all files ending with `c`.
# * `"*c*"` matches all files that have `c` in them (including at the beginning or end).
# * `**` matches directories recursively if followed by `/`.
# If this path segment contains any other characters, it is the same as the usual `*`.
# * `?` matches any one character excluding `/`.
# * `?` matches one character, excluding any directory separators.
# * character sets:
# * `[abc]` matches any one of these character.
# * `[abc]` matches any one of these characters.
# * `[^abc]` matches any one character other than these.
# * `[a-z]` matches any one character in the range.
# * `{a,b}` matches subpattern `a` or `b`.
# * `\\` escapes the next character.
#
# NOTE: Only `/` is recognized as path separator in both *pattern* and *path*.
# If *path* is a `Path`, all directory separators supported by *path* are
# recognized, according to the path's kind. If *path* is a `String`, only `/`
# is considered a directory separator.
#
# NOTE: Only `/` in *pattern* matches directory separators in *path*.
def self.match?(pattern : String, path : Path | String) : Bool
expanded_patterns = [] of String
File.expand_brace_pattern(pattern, expanded_patterns)

expanded_patterns.each do |expanded_pattern|
return true if match_single_pattern(expanded_pattern, path.to_s)
return true if match_single_pattern(expanded_pattern, path)
end
false
end

private def self.match_single_pattern(pattern : String, path : String)
private def self.match_single_pattern(pattern : String, path : Path | String)
# linear-time algorithm adapted from https://research.swtch.com/glob
preader = Char::Reader.new(pattern)
sreader = Char::Reader.new(path)
sreader = Char::Reader.new(path.to_s)
next_ppos = 0
next_spos = 0
strlen = path.bytesize
strlen = path.to_s.bytesize
escaped = false
separators = Path.separators(path.is_a?(Path) ? path.@kind : Path::Kind::POSIX)

while true
pnext = preader.has_next?
Expand All @@ -509,14 +514,14 @@ class File < IO::FileDescriptor
preader.next_char
next
when {'?', false}
if snext && char != '/'
if snext && !char.in?(separators)
preader.next_char
sreader.next_char
next
end
when {'*', false}
double_star = preader.peek_next_char == '*'
if char == '/' && !double_star
if char.in?(separators) && !double_star
preader.next_char
next_spos = 0
next
Expand Down

0 comments on commit d3a3dcf

Please sign in to comment.