From c3d13b64ee3fca309640ff908e81891f550aa46b Mon Sep 17 00:00:00 2001 From: Yura Sokolov Date: Sun, 17 Sep 2023 03:06:16 +0300 Subject: [PATCH] Add ability to use String and Char as pattern in StringScanner --- spec/std/string_scanner_spec.cr | 98 +++++++++++++++++++-- src/string_scanner.cr | 145 ++++++++++++++++++++++++++++++-- 2 files changed, 225 insertions(+), 18 deletions(-) diff --git a/spec/std/string_scanner_spec.cr b/spec/std/string_scanner_spec.cr index ae241360a5ec..5513e44b7902 100644 --- a/spec/std/string_scanner_spec.cr +++ b/spec/std/string_scanner_spec.cr @@ -4,8 +4,9 @@ require "string_scanner" describe StringScanner, "#scan" do it "returns the string matched and advances the offset" do s = StringScanner.new("this is a string") - s.scan(/\w+\s/).should eq("this ") - s.scan(/\w+\s/).should eq("is ") + s.scan(/\w+/).should eq("this") + s.scan(' ').should eq(" ") + s.scan("is ").should eq("is ") s.scan(/\w+\s/).should eq("a ") s.scan(/\w+/).should eq("string") end @@ -14,6 +15,8 @@ describe StringScanner, "#scan" do s = StringScanner.new("test string") s.scan(/\w+/).should_not be_nil # => "test" s.scan(/\w+/).should be_nil + s.scan('s').should be_nil + s.scan("string").should be_nil s.scan(/\s\w+/).should_not be_nil # => " string" s.scan(/.*/).should_not be_nil # => "" end @@ -22,15 +25,20 @@ end describe StringScanner, "#scan_until" do it "returns the string matched and advances the offset" do s = StringScanner.new("test string") - s.scan_until(/tr/).should eq("test str") + s.scan_until(/t /).should eq("test ") + s.offset.should eq(5) + s.scan_until("tr").should eq("str") s.offset.should eq(8) - s.scan_until(/g/).should eq("ing") + s.scan_until('n').should eq("in") + s.offset.should eq(10) end it "returns nil if it can't match from the offset" do s = StringScanner.new("test string") s.offset = 8 s.scan_until(/tr/).should be_nil + s.scan_until('r').should be_nil + s.scan_until("tr").should be_nil end end @@ -45,7 +53,10 @@ describe StringScanner, "#skip" do s.skip(/\d+/).should eq(nil) s.offset.should eq(5) - s.skip(/\w+\s/).should eq(3) + s.skip('i').should eq(1) + s.offset.should eq(6) + + s.skip("s ").should eq(2) s.offset.should eq(8) s.skip(/\w+\s/).should eq(2) @@ -64,12 +75,17 @@ describe StringScanner, "#skip_until" do s.offset.should eq(0) s[0]?.should be_nil - s.skip_until(/a\s/).should eq(10) - s.offset.should eq(10) + s.skip_until(/\sis\s/).should eq(8) + s.offset.should eq(8) s[0]?.should_not be_nil - s.skip_until(/ng/).should eq(6) + s.skip_until("st").should eq(4) + s.offset.should eq(12) + s[0]?.should_not be_nil + + s.skip_until("ng").should eq(4) s.offset.should eq(16) + s[0]?.should_not be_nil end end @@ -91,11 +107,17 @@ describe StringScanner, "#check" do s.offset.should eq(5) s.check(/\w+\s/).should eq("is ") s.offset.should eq(5) + s.check('i').should eq("i") + s.offset.should eq(5) + s.check("is ").should eq("is ") + s.offset.should eq(5) end it "returns nil if it can't match from the offset" do s = StringScanner.new("test string") s.check(/\d+/).should be_nil + s.check('0').should be_nil + s.check("01").should be_nil end end @@ -104,14 +126,24 @@ describe StringScanner, "#check_until" do s = StringScanner.new("test string") s.check_until(/tr/).should eq("test str") s.offset.should eq(0) + s.check_until('r').should eq("test str") + s.offset.should eq(0) + s.check_until("tr").should eq("test str") + s.offset.should eq(0) s.check_until(/g/).should eq("test string") s.offset.should eq(0) + s.check_until('g').should eq("test string") + s.offset.should eq(0) + s.check_until("ng").should eq("test string") + s.offset.should eq(0) end it "returns nil if it can't match from the offset" do s = StringScanner.new("test string") s.offset = 8 s.check_until(/tr/).should be_nil + s.check_until('r').should be_nil + s.check_until("tr").should be_nil end end @@ -140,23 +172,47 @@ describe StringScanner, "#[]" do s["wday"].should eq("Fri") s["month"].should eq("Dec") s["day"].should eq("12") + + s.scan(' ').should eq(" ") + s[0].should eq(" ") + s.scan("1975").should eq("1975") + s[0].should eq("1975") end it "raises when there is no last match" do s = StringScanner.new("Fri Dec 12 1975 14:39") + s.scan(/this is not there/) + expect_raises(Exception, "Nil assertion failed") { s[0] } + s.scan('t') + expect_raises(Exception, "Nil assertion failed") { s[0] } + + s.scan("this is not there") expect_raises(Exception, "Nil assertion failed") { s[0] } end it "raises when there is no subgroup" do s = StringScanner.new("Fri Dec 12 1975 14:39") regex = /(?\w+) (?\w+) (?\d+)/ + s.scan(regex) s[0].should_not be_nil expect_raises(IndexError) { s[5] } expect_raises(KeyError, "Capture group 'something' does not exist") { s["something"] } + + s.scan(' ') + + s[0].should_not be_nil + expect_raises(IndexError) { s[1] } + expect_raises(KeyError, "Capture group 'something' does not exist") { s["something"] } + + s.scan("1975") + + s[0].should_not be_nil + expect_raises(IndexError) { s[1] } + expect_raises(KeyError, "Capture group 'something' does not exist") { s["something"] } end end @@ -173,6 +229,11 @@ describe StringScanner, "#[]?" do s["wday"]?.should eq("Fri") s["month"]?.should eq("Dec") s["day"]?.should eq("12") + + s.scan(' ').should eq(" ") + s[0]?.should eq(" ") + s.scan("1975").should eq("1975") + s[0]?.should eq("1975") end it "returns nil when there is no last match" do @@ -180,15 +241,34 @@ describe StringScanner, "#[]?" do s.scan(/this is not there/) s[0]?.should be_nil + + s.scan('t') + s[0]?.should be_nil + + s.scan("this is not there") + s[0]?.should be_nil end it "raises when there is no subgroup" do s = StringScanner.new("Fri Dec 12 1975 14:39") + s.scan(/(?\w+) (?\w+) (?\d+)/) - s[0].should_not be_nil + s[0]?.should_not be_nil s[5]?.should be_nil s["something"]?.should be_nil + + s.scan(' ') + + s[0]?.should_not be_nil + s[1]?.should be_nil + s["something"]?.should be_nil + + s.scan("1975") + + s[0]?.should_not be_nil + s[1]?.should be_nil + s["something"]?.should be_nil end end diff --git a/src/string_scanner.cr b/src/string_scanner.cr index a18a45f35cfc..c8977f034722 100644 --- a/src/string_scanner.cr +++ b/src/string_scanner.cr @@ -61,7 +61,7 @@ # * `#inspect` # * `#string` class StringScanner - @last_match : Regex::MatchData? + @last_match : Regex::MatchData | StringMatchData | Nil def initialize(@str : String) @byte_offset = 0 @@ -86,15 +86,27 @@ class StringScanner # require "string_scanner" # # s = StringScanner.new("test string") - # s.scan(/\w+/) # => "test" - # s.scan(/\w+/) # => nil - # s.scan(/\s\w+/) # => " string" - # s.scan(/.*/) # => "" + # s.scan(/\w+/) # => "test" + # s.scan(/\w+/) # => nil + # s.scan(/\s\w/) # => " s" + # s.scan('t') # => "t" + # s.scan("ring") # => "ring" + # s.scan(/.*/) # => "" # ``` def scan(pattern : Regex, *, options : Regex::MatchOptions = Regex::MatchOptions::None) : String? match(pattern, advance: true, options: options | Regex::MatchOptions::ANCHORED) end + # :ditto: + def scan(pattern : String) : String? + match(pattern, advance: true, anchored: true) + end + + # :ditto: + def scan(pattern : Char) : String? + match(pattern, advance: true, anchored: true) + end + # Scans the string _until_ the *pattern* is matched. Returns the substring up # to and including the end of the match, the last match is saved, and # advances the scan offset. Returns `nil` if no match. @@ -103,15 +115,26 @@ class StringScanner # require "string_scanner" # # s = StringScanner.new("test string") - # s.scan_until(/tr/) # => "test str" - # s.scan_until(/tr/) # => nil - # s.scan_until(/g/) # => "ing" + # s.scan_until(/ s/) # => "test s" + # s.scan_until(/ s/) # => nil + # s.scan_until('r') # => "tr" + # s.scan_until("ng") # => "ing" # ``` def scan_until(pattern : Regex, *, options : Regex::MatchOptions = Regex::MatchOptions::None) : String? match(pattern, advance: true, options: options) end - private def match(pattern, advance = true, options = Regex::MatchOptions::ANCHORED) + # :ditto: + def scan_until(pattern : String) : String? + match(pattern, advance: true, anchored: false) + end + + # :ditto: + def scan_until(pattern : Char) : String? + match(pattern, advance: true, anchored: false) + end + + private def match(pattern : Regex, advance = true, options = Regex::MatchOptions::ANCHORED) match = pattern.match_at_byte_index(@str, @byte_offset, options) @last_match = match if match @@ -125,6 +148,32 @@ class StringScanner end end + private def match(pattern : String | Char, advance = true, anchored = true) + @last_match = nil + if pattern.bytesize > @str.bytesize - @byte_offset + nil + elsif anchored + i = 0 + # check string starts with string or char + unsafe_ptr = @str.to_unsafe + @byte_offset + pattern.each_byte do |byte| + return nil unless unsafe_ptr[i] == byte + i += 1 + end + # ok, it starts + result = pattern.to_s + @last_match = StringMatchData.new(result) + @byte_offset += pattern.bytesize if advance + result + elsif (found = @str.byte_index(pattern, @byte_offset)) + finish = found + pattern.bytesize + result = @str.byte_slice(@byte_offset, finish - @byte_offset) + @byte_offset = finish if advance + @last_match = StringMatchData.new(result) + result + end + end + # Attempts to skip over the given *pattern* beginning with the scan offset. # In other words, the pattern is not anchored to the current scan offset. # @@ -139,6 +188,18 @@ class StringScanner match.size if match end + # :ditto: + def skip(pattern : String) : Int32? + match = scan(pattern) + match.size if match + end + + # :ditto: + def skip(pattern : Char) : Int32? + match = scan(pattern) + match.size if match + end + # Attempts to skip _until_ the given *pattern* is found after the scan # offset. In other words, the pattern is not anchored to the current scan # offset. @@ -155,6 +216,18 @@ class StringScanner match.size if match end + # :ditto: + def skip_until(pattern : String) : Int32? + match = scan_until(pattern) + match.size if match + end + + # :ditto: + def skip_until(pattern : Char) : Int32? + match = scan_until(pattern) + match.size if match + end + # Returns the value that `#scan` would return, without advancing the scan # offset. The last match is still saved, however. # @@ -170,6 +243,16 @@ class StringScanner match(pattern, advance: false, options: options | Regex::MatchOptions::ANCHORED) end + # :ditto: + def check(pattern : String) : String? + match(pattern, advance: false, anchored: true) + end + + # :ditto: + def check(pattern : Char) : String? + match(pattern, advance: false, anchored: true) + end + # Returns the value that `#scan_until` would return, without advancing the # scan offset. The last match is still saved, however. # @@ -184,6 +267,16 @@ class StringScanner match(pattern, advance: false, options: options) end + # :ditto: + def check_until(pattern : String) : String? + match(pattern, advance: false, anchored: false) + end + + # :ditto: + def check_until(pattern : Char) : String? + match(pattern, advance: false, anchored: false) + end + # Returns the *n*-th subgroup in the most recent match. # # Raises an exception if there was no last match or if there is no subgroup. @@ -293,4 +386,38 @@ class StringScanner start = Math.min(Math.max(offset - 2, 0), Math.max(0, @str.size - 5)) io << " \"" << @str[start, 5] << "\" >" end + + # :nodoc: + class StringMatchData + def initialize(@str : String) + end + + def []?(n : Int) : String? + return unless n == 0 || n == -1 + @str + end + + def [](n : Int) : String + self[n]? || raise IndexError.new("Invalid capture group index: #{n}") + end + + def []?(group_name : String) : String? + nil + end + + def [](group_name : String) : String + raise KeyError.new("Capture group '#{group_name}' does not exist") + end + + def []?(range : Range) : Array(String)? + start, count = Indexable.range_to_index_and_count(range, 1) || return nil + start, count = Indexable.normalize_start_and_count(start, count, 1) { return nil } + return [] of String if count == 0 + [@str] + end + + def [](range : Range) : Array(String) + self[range]? || raise IndexError.new + end + end end