diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 0a57ee9034a9..2bbc63f7e18e 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -1367,6 +1367,27 @@ describe "String" do "foo foo".byte_index("oo", 2).should eq(5) "こんにちは世界".byte_index("ちは").should eq(9) end + + it "gets byte index of regex" do + str = "0123x" + pattern = /x/ + + str.byte_index(pattern).should eq(4) + str.byte_index(pattern, offset: 4).should eq(4) + str.byte_index(pattern, offset: 5).should be_nil + str.byte_index(pattern, offset: -1).should eq(4) + str.byte_index(/y/).should be_nil + + str = "012abc678" + pattern = /[abc]/ + + str.byte_index(pattern).should eq(3) + str.byte_index(pattern, offset: 2).should eq(3) + str.byte_index(pattern, offset: 5).should eq(5) + str.byte_index(pattern, offset: -4).should eq(5) + str.byte_index(pattern, offset: -1).should be_nil + str.byte_index(/y/).should be_nil + end end describe "includes?" do diff --git a/src/string.cr b/src/string.cr index 4b52d08c7426..d47e87638976 100644 --- a/src/string.cr +++ b/src/string.cr @@ -3886,6 +3886,27 @@ class String nil end + # Returns the byte index of the regex *pattern* in the string, or `nil` if the pattern does not find a match. + # If *offset* is present, it defines the position to start the search. + # + # Negative *offset* can be used to start the search from the end of the string. + # + # ``` + # "hello world".byte_index(/o/) # => 4 + # "hello world".byte_index(/o/, offset: 4) # => 4 + # "hello world".byte_index(/o/, offset: 5) # => 7 + # "hello world".byte_index(/o/, offset: -1) # => nil + # "hello world".byte_index(/y/) # => nil + # ``` + def byte_index(pattern : Regex, offset = 0, options : Regex::MatchOptions = Regex::MatchOptions::None) : Int32? + offset += bytesize if offset < 0 + return if offset < 0 + + if match = pattern.match_at_byte_index(self, offset, options: options) + match.byte_begin + end + end + # Returns the byte index of a char index, or `nil` if out of bounds. # # It is valid to pass `#size` to *index*, and in this case the answer