JuliaLang · ivarne · Jul 2, 2015 · Jun 3, 2015 · Jun 3, 2015 · Jun 24, 2015
diff --git a/base/pcre.jl b/base/pcre.jl
@@ -140,4 +140,23 @@ function substring_number_from_name(re, name)
         (Ptr{Void}, Cstring), re, name)
 end
 
+function capture_names(re)
+    name_count = info(re, INFO_NAMECOUNT, UInt32)
+    name_entry_size = info(re, INFO_NAMEENTRYSIZE, UInt32)
+    nametable_ptr = info(re, INFO_NAMETABLE, Ptr{UInt8})
+    names = Dict{Int, ASCIIString}()
+    for i=1:name_count
+        offset = (i-1)*name_entry_size + 1
+        # The capture group index corresponding to name 'i' is stored as a
+        # big-endian 16-bit value.
+        high_byte = UInt16(unsafe_load(nametable_ptr, offset))
+        low_byte = UInt16(unsafe_load(nametable_ptr, offset+1))
+        idx = (high_byte << 8) | low_byte
+        # The capture group name is a null-terminated string located directly
+        # after the index.
+        names[idx] = bytestring(nametable_ptr+offset+1)
+    end
+    names
+end
+
 end # module
diff --git a/base/regex.jl b/base/regex.jl
@@ -15,6 +15,8 @@ type Regex
     extra::Ptr{Void}
     ovec::Vector{Csize_t}
     match_data::Ptr{Void}
+    capture_name_to_idx::Dict{Symbol, Int}
+    idx_to_capture_name::Dict{Int, Symbol}
 
 
     function Regex(pattern::AbstractString, compile_options::Integer,
@@ -29,7 +31,8 @@ type Regex
             throw(ArgumentError("invalid regex match options: $match_options"))
         end
         re = compile(new(pattern, compile_options, match_options, C_NULL,
-                         C_NULL, Csize_t[], C_NULL))
+                         C_NULL, Csize_t[], C_NULL,
+                         Dict{Symbol, Int}(), Dict{Int, Symbol}()))
         finalizer(re, re->begin
                               re.regex == C_NULL || PCRE.free_re(re.regex)
                               re.match_data == C_NULL || PCRE.free_match_data(re.match_data)
@@ -57,6 +60,10 @@ function compile(regex::Regex)
         PCRE.jit_compile(regex.regex)
         regex.match_data = PCRE.create_match_data(regex.regex)
         regex.ovec = PCRE.get_ovec(regex.match_data)
+        for (idx, name) in PCRE.capture_names(regex.regex)
+            regex.capture_name_to_idx[Symbol(name)] = idx
+            regex.idx_to_capture_name[idx] = Symbol(name)
+        end
     end
     regex
 end
@@ -92,6 +99,7 @@ immutable RegexMatch
     captures::Vector{Union(Void,SubString{UTF8String})}
     offset::Int
     offsets::Vector{Int}
+    regex::Regex
 end
 
 function show(io::IO, m::RegexMatch)
@@ -100,7 +108,10 @@ function show(io::IO, m::RegexMatch)
     if !isempty(m.captures)
         print(io, ", ")
         for i = 1:length(m.captures)
-            print(io, i, "=")
+            # If the capture group is named, show the name.
+            # Otherwise show its index.
+            capture_name = get(m.regex.idx_to_capture_name, i, i)
+            print(io, capture_name, "=")
             show(io, m.captures[i])
             if i < length(m.captures)
                 print(io, ", ")
@@ -110,6 +121,13 @@ function show(io::IO, m::RegexMatch)
     print(io, ")")
 end
 
+# Capture group extraction
+getindex(m::RegexMatch, idx::Int) = m.captures[idx]
+function getindex(m::RegexMatch, name::Symbol)
+    m[m.regex.capture_name_to_idx[name]]
+end
+getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
+
 function ismatch(r::Regex, s::AbstractString, offset::Integer=0)
     compile(r)
     return PCRE.exec(r.regex, bytestring(s), offset, r.match_options,
@@ -136,7 +154,7 @@ function match(re::Regex, str::UTF8String, idx::Integer, add_opts::UInt32=UInt32
     cap = Union(Void,SubString{UTF8String})[
             ovec[2i+1] == PCRE.UNSET ? nothing : SubString(str, ovec[2i+1]+1, ovec[2i+2]) for i=1:n ]
     off = Int[ ovec[2i+1]+1 for i=1:n ]
-    RegexMatch(mat, cap, ovec[1]+1, off)
+    RegexMatch(mat, cap, ovec[1]+1, off, re)
 end
 
 match(re::Regex, str::Union(ByteString,SubString), idx::Integer, add_opts::UInt32=UInt32(0)) =

diff --git a/test/regex.jl b/test/regex.jl
@@ -37,3 +37,7 @@ show(buf, r"")
 # regex match / search string must be a ByteString
 @test_throws ArgumentError match(r"test", utf32("this is a test"))
 @test_throws ArgumentError search(utf32("this is a test"), r"test")
+
+# Named subpatterns
+m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
+@test (m[:a], m[2], m["b"]) == ("x", "y", "z")