Skip to content

Commit

Permalink
Regexp: support duplicated named captures
Browse files Browse the repository at this point in the history
  • Loading branch information
makenowjust committed Sep 30, 2017
1 parent 7e3b642 commit 84725bd
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 12 deletions.
43 changes: 43 additions & 0 deletions spec/std/match_data_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,21 @@ describe "Regex::MatchData" do
$~["g2"].should eq("ba")
end

it "captures duplicated named group" do
re = /(?:(?<g1>foo)|(?<g1>bar))*/
("foo" =~ re).should eq(0)
$~["g1"].should eq("foo")

("bar" =~ re).should eq(0)
$~["g1"].should eq("bar")

("foobar" =~ re).should eq(0)
$~["g1"].should eq("bar")

("barfoo" =~ re).should eq(0)
$~["g1"].should eq("foo")
end

it "can use negative index" do
"foo" =~ /(f)(oo)/
$~[-1].should eq("oo")
Expand Down Expand Up @@ -98,6 +113,21 @@ describe "Regex::MatchData" do
$~["g2"]?.should eq("ba")
end

it "captures duplicated named group" do
re = /(?:(?<g1>foo)|(?<g1>bar))*/
("foo" =~ re).should eq(0)
$~["g1"]?.should eq("foo")

("bar" =~ re).should eq(0)
$~["g1"]?.should eq("bar")

("foobar" =~ re).should eq(0)
$~["g1"]?.should eq("bar")

("barfoo" =~ re).should eq(0)
$~["g1"]?.should eq("foo")
end

it "can use negative index" do
"foo" =~ /(b)?(f)(oo)/
$~[-1]?.should eq("oo")
Expand Down Expand Up @@ -167,6 +197,10 @@ describe "Regex::MatchData" do
"Crystal".match(/(?<name1>Cr)(?<name2>s)?/).not_nil!.named_captures.should eq({"name1" => "Cr", "name2" => nil})
"Crystal".match(/(Cr)(?<name1>s)?(t)?(?<name2>al)?/).not_nil!.named_captures.should eq({"name1" => nil, "name2" => nil})
end

it "gets a hash of named captures with duplicated name" do
"Crystal".match(/(?<name>Cr)y(?<name>s)/).not_nil!.named_captures.should eq({"name" => "s"})
end
end

describe "#to_a" do
Expand Down Expand Up @@ -211,6 +245,15 @@ describe "Regex::MatchData" do
"name2" => "al",
})
end

it "converts into a hash with duplicated names" do
"Crystal".match(/(Cr)(?<name>s)?(yst)?(?<name>al)?/).not_nil!.to_h.should eq({
0 => "Crystal",
1 => "Cr",
"name" => "al",
3 => "yst",
})
end
end

it "can check equality" do
Expand Down
6 changes: 4 additions & 2 deletions src/regex.cr
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ class Regex
UTF_8 = 0x00000800
# :nodoc:
NO_UTF8_CHECK = 0x00002000
# :nodoc:
DUPNAMES = 0x00080000
end

# Return a `Regex::Options` representing the optional flags applied to this `Regex`.
Expand Down Expand Up @@ -240,7 +242,7 @@ class Regex
source = source.gsub('\u{0}', "\\0")
@source = source

@re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK), out errptr, out erroffset, nil)
@re = LibPCRE.compile(@source, (options | Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil)
raise ArgumentError.new("#{String.new(errptr)} at #{erroffset}") if @re.null?
@extra = LibPCRE.study(@re, 0, out studyerrptr)
raise ArgumentError.new("#{String.new(studyerrptr)}") if @extra.null? && studyerrptr
Expand All @@ -255,7 +257,7 @@ class Regex
# Regex.error?("(foo|bar") # => "missing ) at 8"
# ```
def self.error?(source)
re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK), out errptr, out erroffset, nil)
re = LibPCRE.compile(source, (Options::UTF_8 | Options::NO_UTF8_CHECK | Options::DUPNAMES), out errptr, out erroffset, nil)
if re
nil
else
Expand Down
1 change: 1 addition & 0 deletions src/regex/lib_pcre.cr
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ lib LibPCRE
ovector : Int*, ovecsize : Int) : Int32
fun full_info = pcre_fullinfo(code : Pcre, extra : PcreExtra, what : Int, where : Int32*) : Int
fun get_stringnumber = pcre_get_stringnumber(code : Pcre, string_name : UInt8*) : Int
fun get_stringtable_entries = pcre_get_stringtable_entries(code : Pcre, name : UInt8*, first : UInt8**, last : UInt8**) : Int

INFO_CAPTURECOUNT = 2
INFO_NAMEENTRYSIZE = 7
Expand Down
59 changes: 49 additions & 10 deletions src/regex/match_data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,24 @@ class Regex
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"]? # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"]? # => nil
# ```
#
# When there are capture groups having same name, it returns the lastest
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr)|(?<ok>al)/).not_nil!["ok"]? # => "al"
# ```
def []?(group_name : String)
ret = LibPCRE.get_stringnumber(@code, group_name)
return if ret < 0
self[ret]?
max_start = -1
match = nil
named_capture_number(group_name) do |n|
start = @ovector[n * 2]
if start > max_start
max_start = start
match = self[n]?
end
end
match
end

# Returns the match of the capture group named by *group_name*, or
Expand All @@ -170,19 +184,40 @@ class Regex
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ok"] # => "ys"
# "Crystal".match(/r(?<ok>ys)/).not_nil!["ng"] # raises KeyError
# ```
#
# When there are capture groups having same name, it returns the lastest
# matched capture group.
#
# ```
# "Crystal".match(/(?<ok>Cr)|(?<ok>al)/).not_nil!["ok"] # => "al"
# ```
def [](group_name : String)
match = self[group_name]?
unless match
ret = LibPCRE.get_stringnumber(@code, group_name)
if ret < 0
raise KeyError.new("Capture group '#{group_name}' does not exist")
else
named_capture_number(group_name) do
raise KeyError.new("Capture group '#{group_name}' was not matched")
end
raise KeyError.new("Capture group '#{group_name}' does not exist")
end
match
end

private def named_capture_number(group_name)
first = Pointer(UInt8).null
last = Pointer(UInt8).null
name_entry_size = LibPCRE.get_stringtable_entries(@code, group_name, pointerof(first), pointerof(last))
return if name_entry_size < 0

while first <= last
capture_number = (first[0].to_u16 << 8) | first[1].to_u16
yield capture_number

first += name_entry_size
end

nil
end

# Returns the part of the original string before the match. If the match
# starts at the start of the string, returns the empty string.
#
Expand Down Expand Up @@ -243,8 +278,8 @@ class Regex

caps = {} of String => String?
(1...size).each do |i|
if name = name_table[i]?
caps[name] = self[i]?
if (name = name_table[i]?) && !caps.has_key?(name)
caps[name] = self[name]?
end
end

Expand Down Expand Up @@ -282,7 +317,11 @@ class Regex

hash = {} of (String | Int32) => String?
(0...size).each do |i|
hash[name_table.fetch(i, i)] = self[i]?
if name = name_table[i]?
hash[name] = self[name]? unless hash.has_key?(name)
else
hash[i] = self[i]?
end
end

hash
Expand Down

0 comments on commit 84725bd

Please sign in to comment.