diff --git a/spec/std/match_data_spec.cr b/spec/std/match_data_spec.cr index 372039fa9400..dd59c2dc1b83 100644 --- a/spec/std/match_data_spec.cr +++ b/spec/std/match_data_spec.cr @@ -48,6 +48,21 @@ describe "Regex::MatchData" do $~["g2"].should eq("ba") end + it "captures duplicated named group" do + re = /(?:(?foo)|(?bar))*/ + ("foo" =~ re).should eq(0) + $~["g1"].should eq("foo") + + ("bar" =~ re).should eq(0) + $~["g1"].should eq("bar") + + ("foobar" =~ re).should eq(0) + $~["g1"].should eq("bar") + + ("barfoo" =~ re).should eq(0) + $~["g1"].should eq("foo") + end + it "can use negative index" do "foo" =~ /(f)(oo)/ $~[-1].should eq("oo") @@ -98,6 +113,21 @@ describe "Regex::MatchData" do $~["g2"]?.should eq("ba") end + it "captures duplicated named group" do + re = /(?:(?foo)|(?bar))*/ + ("foo" =~ re).should eq(0) + $~["g1"]?.should eq("foo") + + ("bar" =~ re).should eq(0) + $~["g1"]?.should eq("bar") + + ("foobar" =~ re).should eq(0) + $~["g1"]?.should eq("bar") + + ("barfoo" =~ re).should eq(0) + $~["g1"]?.should eq("foo") + end + it "can use negative index" do "foo" =~ /(b)?(f)(oo)/ $~[-1]?.should eq("oo") @@ -167,6 +197,10 @@ describe "Regex::MatchData" do "Crystal".match(/(?Cr)(?s)?/).not_nil!.named_captures.should eq({"name1" => "Cr", "name2" => nil}) "Crystal".match(/(Cr)(?s)?(t)?(?al)?/).not_nil!.named_captures.should eq({"name1" => nil, "name2" => nil}) end + + it "gets a hash of named captures with duplicated name" do + "Crystal".match(/(?Cr)y(?s)/).not_nil!.named_captures.should eq({"name" => "s"}) + end end describe "#to_a" do @@ -211,6 +245,15 @@ describe "Regex::MatchData" do "name2" => "al", }) end + + it "converts into a hash with duplicated names" do + "Crystal".match(/(Cr)(?s)?(yst)?(?al)?/).not_nil!.to_h.should eq({ + 0 => "Crystal", + 1 => "Cr", + "name" => "al", + 3 => "yst", + }) + end end it "can check equality" do diff --git a/src/regex.cr b/src/regex.cr index c87d1ba24e59..046c1c1346b6 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -210,6 +210,8 @@ class Regex UTF_8 = 0x00000800 # :nodoc: NO_UTF8_CHECK = 0x00002000 + # :nodoc: + DUPNAMES = 0x00080000 end # Return a `Regex::Options` representing the optional flags applied to this `Regex`. @@ -240,7 +242,7 @@ class Regex source = source.gsub('\u{0}', "\\0") @source = source - @re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK), out errptr, out erroffset, nil) + @re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil) raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null? @extra = LibPCRE.study(@re, 0, out studyerrptr) raise ArgumentError.new("#{String.new(studyerrptr)}") if @extra.null? && studyerrptr @@ -255,7 +257,7 @@ class Regex # Regex.error?("(foo|bar") # => "missing ) at 8" # ``` def self.error?(source) - re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK), out errptr, out erroffset, nil) + re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil) if re nil else diff --git a/src/regex/lib_pcre.cr b/src/regex/lib_pcre.cr index 028341a6bae4..156e82dd73f8 100644 --- a/src/regex/lib_pcre.cr +++ b/src/regex/lib_pcre.cr @@ -10,6 +10,7 @@ lib LibPCRE ovector : Int*, ovecsize : Int) : Int32 fun full_info = pcre_fullinfo(code : Pcre, extra : PcreExtra, what : Int, where : Int32*) : Int fun get_stringnumber = pcre_get_stringnumber(code : Pcre, string_name : UInt8*) : Int + fun get_stringtable_entries = pcre_get_stringtable_entries(code : Pcre, name : UInt8*, first : UInt8**, last : UInt8**) : Int INFO_CAPTURECOUNT = 2 INFO_NAMEENTRYSIZE = 7 diff --git a/src/regex/match_data.cr b/src/regex/match_data.cr index 847fea85f9a8..ed23457417bc 100644 --- a/src/regex/match_data.cr +++ b/src/regex/match_data.cr @@ -157,10 +157,24 @@ class Regex # "Crystal".match(/r(?ys)/).not_nil!["ok"]? # => "ys" # "Crystal".match(/r(?ys)/).not_nil!["ng"]? # => nil # ``` + # + # When there are capture groups having same name, it returns the lastest + # matched capture group. + # + # ``` + # "Crystal".match(/(?Cr)|(?al)/).not_nil!["ok"]? # => "al" + # ``` def []?(group_name : String) - ret = LibPCRE.get_stringnumber(@code, group_name) - return if ret < 0 - self[ret]? + max_start = -1 + match = nil + named_capture_number(group_name) do |n| + start = @ovector[n * 2] + if start > max_start + max_start = start + match = self[n]? + end + end + match end # Returns the match of the capture group named by *group_name*, or @@ -170,19 +184,40 @@ class Regex # "Crystal".match(/r(?ys)/).not_nil!["ok"] # => "ys" # "Crystal".match(/r(?ys)/).not_nil!["ng"] # raises KeyError # ``` + # + # When there are capture groups having same name, it returns the lastest + # matched capture group. + # + # ``` + # "Crystal".match(/(?Cr)|(?al)/).not_nil!["ok"] # => "al" + # ``` def [](group_name : String) match = self[group_name]? unless match - ret = LibPCRE.get_stringnumber(@code, group_name) - if ret < 0 - raise KeyError.new("Capture group '#{group_name}' does not exist") - else + named_capture_number(group_name) do raise KeyError.new("Capture group '#{group_name}' was not matched") end + raise KeyError.new("Capture group '#{group_name}' does not exist") end match end + private def named_capture_number(group_name) + first = Pointer(UInt8).null + last = Pointer(UInt8).null + name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, pointerof(first), pointerof(last)) + return if name_entry_size < 0 + + while first <= last + capture_number = (first[0].to_u16 << 8) | first[1].to_u16 + yield capture_number + + first += name_entry_size + end + + nil + end + # Returns the part of the original string before the match. If the match # starts at the start of the string, returns the empty string. # @@ -243,8 +278,8 @@ class Regex caps = {} of String => String? (1...size).each do |i| - if name = name_table[i]? - caps[name] = self[i]? + if (name = name_table[i]?) && !caps.has_key?(name) + caps[name] = self[name]? end end @@ -282,7 +317,11 @@ class Regex hash = {} of (String | Int32) => String? (0...size).each do |i| - hash[name_table.fetch(i, i)] = self[i]? + if name = name_table[i]? + hash[name] = self[name]? unless hash.has_key?(name) + else + hash[i] = self[i]? + end end hash