From d3a3dcfb5560cca610876d9c92acbc7aeb1fe148 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Fri, 27 Oct 2023 19:02:20 +0800 Subject: [PATCH] Respect Windows `Path` directory separators in `File.match?` --- spec/std/file_spec.cr | 155 +++++++++++++++++++++++++----------------- src/file.cr | 25 ++++--- 2 files changed, 107 insertions(+), 73 deletions(-) diff --git a/spec/std/file_spec.cr b/spec/std/file_spec.cr index 4dbe8e51b53a..53d28bdcee4e 100644 --- a/spec/std/file_spec.cr +++ b/spec/std/file_spec.cr @@ -8,6 +8,18 @@ private def it_raises_on_null_byte(operation, file = __FILE__, line = __LINE__, end end +private def assert_file_matches(pattern, path : String, *, file = __FILE__, line = __LINE__) + File.match?(pattern, path).should be_true, file: file, line: line + File.match?(pattern, Path.posix(path)).should be_true, file: file, line: line + File.match?(pattern, Path.posix(path).to_windows).should be_true, file: file, line: line +end + +private def refute_file_matches(pattern, path : String, *, file = __FILE__, line = __LINE__) + File.match?(pattern, path).should be_false, file: file, line: line + File.match?(pattern, Path.posix(path)).should be_false, file: file, line: line + File.match?(pattern, Path.posix(path).to_windows).should be_false, file: file, line: line +end + private def normalize_permissions(permissions, *, directory) {% if flag?(:win32) %} normalized_permissions = 0o444 @@ -1449,73 +1461,89 @@ describe "File" do describe ".match?" do it "matches basics" do - File.match?("abc", Path["abc"]).should be_true - File.match?("abc", "abc").should be_true - File.match?("*", "abc").should be_true - File.match?("*c", "abc").should be_true - File.match?("a*", "a").should be_true - File.match?("a*", "abc").should be_true - File.match?("a*/b", "abc/b").should be_true - File.match?("*x", "xxx").should be_true + assert_file_matches "abc", "abc" + assert_file_matches "*", "abc" + assert_file_matches "*c", "abc" + assert_file_matches "a*", "a" + assert_file_matches "a*", "abc" + assert_file_matches "a*/b", "abc/b" + assert_file_matches "*x", "xxx" end + it "matches multiple expansions" do - File.match?("a*b*c*d*e*/f", "axbxcxdxe/f").should be_true - File.match?("a*b*c*d*e*/f", "axbxcxdxexxx/f").should be_true - File.match?("a*b?c*x", "abxbbxdbxebxczzx").should be_true - File.match?("a*b?c*x", "abxbbxdbxebxczzy").should be_false + assert_file_matches "a*b*c*d*e*/f", "axbxcxdxe/f" + assert_file_matches "a*b*c*d*e*/f", "axbxcxdxexxx/f" + assert_file_matches "a*b?c*x", "abxbbxdbxebxczzx" + refute_file_matches "a*b?c*x", "abxbbxdbxebxczzy" end + it "matches unicode characters" do - File.match?("a?b", "a☺b").should be_true - File.match?("a???b", "a☺b").should be_false - end - it "* don't match /" do - File.match?("a*", "ab/c").should be_false - File.match?("a*/b", "a/c/b").should be_false - File.match?("a*b*c*d*e*/f", "axbxcxdxe/xxx/f").should be_false - File.match?("a*b*c*d*e*/f", "axbxcxdxexxx/fff").should be_false - end - it "** matches /" do - File.match?("a**", "ab/c").should be_true - File.match?("a**/b", "a/c/b").should be_true - File.match?("a*b*c*d*e**/f", "axbxcxdxe/xxx/f").should be_true - File.match?("a*b*c*d*e**/f", "axbxcxdxexxx/f").should be_true - File.match?("a*b*c*d*e**/f", "axbxcxdxexxx/fff").should be_false + assert_file_matches "a?b", "a☺b" + refute_file_matches "a???b", "a☺b" + end + + it "* don't match path separator" do + refute_file_matches "a*", "ab/c" + refute_file_matches "a*/b", "a/c/b" + refute_file_matches "a*b*c*d*e*/f", "axbxcxdxe/xxx/f" + refute_file_matches "a*b*c*d*e*/f", "axbxcxdxexxx/fff" end + + it "** matches path separator" do + assert_file_matches "a**", "ab/c" + assert_file_matches "a**/b", "a/c/b" + assert_file_matches "a*b*c*d*e**/f", "axbxcxdxe/xxx/f" + assert_file_matches "a*b*c*d*e**/f", "axbxcxdxexxx/f" + refute_file_matches "a*b*c*d*e**/f", "axbxcxdxexxx/fff" + end + it "classes" do - File.match?("ab[c]", "abc").should be_true - File.match?("ab[b-d]", "abc").should be_true - File.match?("ab[d-b]", "abc").should be_false - File.match?("ab[e-g]", "abc").should be_false - File.match?("ab[e-gc]", "abc").should be_true - File.match?("ab[^c]", "abc").should be_false - File.match?("ab[^b-d]", "abc").should be_false - File.match?("ab[^e-g]", "abc").should be_true - File.match?("a[^a]b", "a☺b").should be_true - File.match?("a[^a][^a][^a]b", "a☺b").should be_false - File.match?("[a-ζ]*", "α").should be_true - File.match?("*[a-ζ]", "A").should be_false + assert_file_matches "ab[c]", "abc" + assert_file_matches "ab[b-d]", "abc" + refute_file_matches "ab[d-b]", "abc" + refute_file_matches "ab[e-g]", "abc" + assert_file_matches "ab[e-gc]", "abc" + refute_file_matches "ab[^c]", "abc" + refute_file_matches "ab[^b-d]", "abc" + assert_file_matches "ab[^e-g]", "abc" + assert_file_matches "a[^a]b", "a☺b" + refute_file_matches "a[^a][^a][^a]b", "a☺b" + assert_file_matches "[a-ζ]*", "α" + refute_file_matches "*[a-ζ]", "A" end + it "escape" do + # NOTE: `*` is forbidden in Windows paths File.match?("a\\*b", "a*b").should be_true - File.match?("a\\*b", "ab").should be_false + refute_file_matches "a\\*b", "ab" File.match?("a\\**b", "a*bb").should be_true - File.match?("a\\**b", "abb").should be_false + refute_file_matches "a\\**b", "abb" File.match?("a*\\*b", "ab*b").should be_true - File.match?("a*\\*b", "abb").should be_false + refute_file_matches "a*\\*b", "abb" + + assert_file_matches "a\\[b\\]", "a[b]" + refute_file_matches "a\\[b\\]", "ab" + assert_file_matches "a\\[bb\\]", "a[bb]" + refute_file_matches "a\\[bb\\]", "abb" + assert_file_matches "a[b]\\[b\\]", "ab[b]" + refute_file_matches "a[b]\\[b\\]", "abb" end + it "special chars" do - File.match?("a?b", "a/b").should be_false - File.match?("a*b", "a/b").should be_false + refute_file_matches "a?b", "a/b" + refute_file_matches "a*b", "a/b" end + it "classes escapes" do - File.match?("[\\]a]", "]").should be_true - File.match?("[\\-]", "-").should be_true - File.match?("[x\\-]", "x").should be_true - File.match?("[x\\-]", "-").should be_true - File.match?("[x\\-]", "z").should be_false - File.match?("[\\-x]", "x").should be_true - File.match?("[\\-x]", "-").should be_true - File.match?("[\\-x]", "a").should be_false + assert_file_matches "[\\]a]", "]" + assert_file_matches "[\\-]", "-" + assert_file_matches "[x\\-]", "x" + assert_file_matches "[x\\-]", "-" + refute_file_matches "[x\\-]", "z" + assert_file_matches "[\\-x]", "x" + assert_file_matches "[\\-x]", "-" + refute_file_matches "[\\-x]", "a" + expect_raises(File::BadPatternError, "empty character set") do File.match?("[]a]", "]") end @@ -1547,18 +1575,19 @@ describe "File" do File.match?("a[", "a") end end + it "alternates" do - File.match?("{abc,def}", "abc").should be_true - File.match?("ab{c,}", "abc").should be_true - File.match?("ab{c,}", "ab").should be_true - File.match?("ab{d,e}", "abc").should be_false - File.match?("ab{*,/cde}", "abcde").should be_true - File.match?("ab{*,/cde}", "ab/cde").should be_true - File.match?("ab{?,/}de", "abcde").should be_true - File.match?("ab{?,/}de", "ab/de").should be_true - File.match?("ab{{c,d}ef,}", "ab").should be_true - File.match?("ab{{c,d}ef,}", "abcef").should be_true - File.match?("ab{{c,d}ef,}", "abdef").should be_true + assert_file_matches "{abc,def}", "abc" + assert_file_matches "ab{c,}", "abc" + assert_file_matches "ab{c,}", "ab" + refute_file_matches "ab{d,e}", "abc" + assert_file_matches "ab{*,/cde}", "abcde" + assert_file_matches "ab{*,/cde}", "ab/cde" + assert_file_matches "ab{?,/}de", "abcde" + assert_file_matches "ab{?,/}de", "ab/de" + assert_file_matches "ab{{c,d}ef,}", "ab" + assert_file_matches "ab{{c,d}ef,}", "abcef" + assert_file_matches "ab{{c,d}ef,}", "abdef" end end diff --git a/src/file.cr b/src/file.cr index 31b8e5420937..0d1bbffa5090 100644 --- a/src/file.cr +++ b/src/file.cr @@ -458,40 +458,45 @@ class File < IO::FileDescriptor # # The pattern syntax is similar to shell filename globbing. It may contain the following metacharacters: # - # * `*` matches an unlimited number of arbitrary characters excluding `/`. + # * `*` matches an unlimited number of arbitrary characters, excluding any directory separators. # * `"*"` matches all regular files. # * `"c*"` matches all files beginning with `c`. # * `"*c"` matches all files ending with `c`. # * `"*c*"` matches all files that have `c` in them (including at the beginning or end). # * `**` matches directories recursively if followed by `/`. # If this path segment contains any other characters, it is the same as the usual `*`. - # * `?` matches any one character excluding `/`. + # * `?` matches one character, excluding any directory separators. # * character sets: - # * `[abc]` matches any one of these character. + # * `[abc]` matches any one of these characters. # * `[^abc]` matches any one character other than these. # * `[a-z]` matches any one character in the range. # * `{a,b}` matches subpattern `a` or `b`. # * `\\` escapes the next character. # - # NOTE: Only `/` is recognized as path separator in both *pattern* and *path*. + # If *path* is a `Path`, all directory separators supported by *path* are + # recognized, according to the path's kind. If *path* is a `String`, only `/` + # is considered a directory separator. + # + # NOTE: Only `/` in *pattern* matches directory separators in *path*. def self.match?(pattern : String, path : Path | String) : Bool expanded_patterns = [] of String File.expand_brace_pattern(pattern, expanded_patterns) expanded_patterns.each do |expanded_pattern| - return true if match_single_pattern(expanded_pattern, path.to_s) + return true if match_single_pattern(expanded_pattern, path) end false end - private def self.match_single_pattern(pattern : String, path : String) + private def self.match_single_pattern(pattern : String, path : Path | String) # linear-time algorithm adapted from https://research.swtch.com/glob preader = Char::Reader.new(pattern) - sreader = Char::Reader.new(path) + sreader = Char::Reader.new(path.to_s) next_ppos = 0 next_spos = 0 - strlen = path.bytesize + strlen = path.to_s.bytesize escaped = false + separators = Path.separators(path.is_a?(Path) ? path.@kind : Path::Kind::POSIX) while true pnext = preader.has_next? @@ -509,14 +514,14 @@ class File < IO::FileDescriptor preader.next_char next when {'?', false} - if snext && char != '/' + if snext && !char.in?(separators) preader.next_char sreader.next_char next end when {'*', false} double_star = preader.peek_next_char == '*' - if char == '/' && !double_star + if char.in?(separators) && !double_star preader.next_char next_spos = 0 next