Skip to content

Commit

Permalink
Optimize String#rchop?()
Browse files Browse the repository at this point in the history
  • Loading branch information
HertzDevil committed Nov 8, 2024
1 parent d9cb484 commit 161946f
Showing 1 changed file with 58 additions and 5 deletions.
63 changes: 58 additions & 5 deletions src/string.cr
Original file line number Diff line number Diff line change
Expand Up @@ -1798,11 +1798,7 @@ class String
def rchop? : String?
return if empty?

if to_unsafe[bytesize - 1] < 0x80 || single_byte_optimizable?
return unsafe_byte_slice_string(0, bytesize - 1)
end

self[0, size - 1]
unsafe_byte_slice_string(0, bytesize - char_bytesize_before(bytesize), @length > 0 ? @length - 1 : 0)
end

# Returns a new `String` with *suffix* removed from the end of the string if possible, else returns `nil`.
Expand Down Expand Up @@ -5482,6 +5478,63 @@ class String
1 # Invalid
end

protected def char_bytesize_before(byte_index)
String.char_bytesize_before(to_unsafe, byte_index)
end

# See also: `Char::Reader#decode_char_before`
protected def self.char_bytesize_before(start : Pointer(UInt8), byte_index : Int)
return 0 unless byte_index > 0

fourth = (start + byte_index - 1).value
if fourth <= 0x7f
return 1
end

if fourth > 0xbf || byte_index < 2
return 1 # Invalid
end

third = (start + byte_index - 2).value
if 0xc2 <= third <= 0xdf
return 2
end

if (third & 0xc0) != 0x80 || byte_index < 3
return 1 # Invalid
end

second = (start + byte_index - 3).value
if second & 0xf0 == 0xe0
if second == 0xe0 && third <= 0x9f
return 1 # Invalid
end

if second == 0xed && third >= 0xa0
return 1 # Invalid
end

return 3
end

if (second & 0xc0) != 0x80 || byte_index < 4
return 1 # Invalid
end

first = (start + byte_index - 4).value
if second <= 0x8f
unless 0xf1 <= first <= 0xf4
return 1 # Invalid
end
else
unless 0xf0 <= first <= 0xf3
return 1 # Invalid
end
end

4
end

# :nodoc:
def size_known? : Bool
@bytesize == 0 || @length > 0
Expand Down

0 comments on commit 161946f

Please sign in to comment.