Skip to content

Commit

Permalink
fix, optimize, add Regex option, expand tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vtjnash committed Jul 19, 2021
1 parent 1ea4953 commit af8e39a
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 92 deletions.
14 changes: 14 additions & 0 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,20 @@ function endswith(s::SubString, r::Regex)
return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED)
end

function chopprefix(s::AbstractString, prefix::Regex)
m = match(prefix, s, firstindex(s), PCRE.ANCHORED)
m === nothing && return SubString(s)
return SubString(s, ncodeunits(m.match) + 1)
end

function chopsuffix(s::AbstractString, suffix::Regex)
m = match(suffix, s, firstindex(s), PCRE.ENDANCHORED)
m === nothing && return SubString(s)
isempty(m.match) && return SubString(s)
return SubString(s, firstindex(s), prevind(s, m.offset))
end


"""
match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
Expand Down
75 changes: 58 additions & 17 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ true
```
"""
function startswith(a::AbstractString, b::AbstractString)
a, b = Iterators.Stateful(a), Iterators.Stateful(b)
all(splat(==), zip(a, b)) && isempty(b)
i, j = iterate(a), iterate(b)
while true
j === nothing && return true # ran out of prefix: success!
i === nothing && return false # ran out of source: failure
i[1] == j[1] || return false # mismatch: failure
i, j = iterate(a, i[2]), iterate(b, j[2])
end
end
startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars

Expand All @@ -39,9 +44,14 @@ true
```
"""
function endswith(a::AbstractString, b::AbstractString)
a = Iterators.Stateful(Iterators.reverse(a))
b = Iterators.Stateful(Iterators.reverse(b))
all(splat(==), zip(a, b)) && isempty(b)
a, b = Iterators.Reverse(a), Iterators.Reverse(b)
i, j = iterate(a), iterate(b)
while true
j === nothing && return true # ran out of suffix: success!
i === nothing && return false # ran out of source: failure
i[1] == j[1] || return false # mismatch: failure
i, j = iterate(a, i[2]), iterate(b, j[2])
end
end
endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars

Expand All @@ -51,7 +61,7 @@ function startswith(a::Union{String, SubString{String}},
if ncodeunits(a) < cub
false
elseif _memcmp(a, b, sizeof(b)) == 0
nextind(a, cub) == cub + 1
nextind(a, cub) == cub + 1 # check that end of `b` doesn't match a partial character in `a`
else
false
end
Expand All @@ -64,7 +74,7 @@ function endswith(a::Union{String, SubString{String}},
if astart < 1
false
elseif GC.@preserve(a, _memcmp(pointer(a, astart), b, sizeof(b))) == 0
thisind(a, astart) == astart
thisind(a, astart) == astart # check that end of `b` doesn't match a partial character in `a`
else
false
end
Expand Down Expand Up @@ -196,14 +206,14 @@ end
# chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s)))

"""
chopprefix(s::AbstractString, prefix::AbstractString) -> SubString
chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString
Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned.
See also [`chopsuffix`](@ref).
!!! compat "Julia 1.7"
This function is available as of Julia 1.7.
!!! compat "Julia 1.8"
This function is available as of Julia 1.8.
# Examples
```jldoctest
Expand All @@ -215,22 +225,36 @@ julia> chopprefix("Hamburger", "hotdog")
```
"""
function chopprefix(s::AbstractString, prefix::AbstractString)
k = firstindex(s)
i, j = iterate(s), iterate(prefix)
while true
j === nothing && i === nothing && return SubString(s, 1, 0) # s == prefix: empty result
j === nothing && return @inbounds SubString(s, k) # ran out of prefix: success!
i === nothing && return SubString(s) # ran out of source: failure
i[1] == j[1] || return SubString(s) # mismatch: failure
k = i[2]
i, j = iterate(s, k), iterate(prefix, j[2])
end
end

function chopprefix(s::Union{String, SubString{String}},
prefix::Union{String, SubString{String}})
if startswith(s, prefix)
SubString(s, nextind(s, lastindex(prefix), 1), lastindex(s))
SubString(s, 1 + ncodeunits(prefix))
else
SubString(s)
end
end

"""
chopsuffix(s::AbstractString, suffix::AbstractString) -> SubString
chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString
Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned.
See also [`chopprefix`](@ref).
!!! compat "Julia 1.7"
This function is available as of Julia 1.7.
!!! compat "Julia 1.8"
This function is available as of Julia 1.8.
# Examples
```jldoctest
Expand All @@ -242,13 +266,30 @@ julia> chopsuffix("Hamburger", "hotdog")
```
"""
function chopsuffix(s::AbstractString, suffix::AbstractString)
if isempty(s) || !endswith(s, suffix)
SubString(s)
a, b = Iterators.Reverse(s), Iterators.Reverse(suffix)
k = lastindex(s)
i, j = iterate(a), iterate(b)
while true
j === nothing && i === nothing && return SubString(s, 1, 0) # s == suffix: empty result
j === nothing && return @inbounds SubString(s, firstindex(s), k) # ran out of suffix: success!
i === nothing && return SubString(s) # ran out of source: failure
i[1] == j[1] || return SubString(s) # mismatch: failure
k = i[2]
i, j = iterate(a, k), iterate(b, j[2])
end
end

function chopsuffix(s::Union{String, SubString{String}},
suffix::Union{String, SubString{String}})
if !isempty(suffix) && endswith(s, suffix)
astart = ncodeunits(s) - ncodeunits(suffix) + 1
@inbounds SubString(s, firstindex(s), prevind(s, astart))
else
SubString(s, firstindex(s), prevind(s, lastindex(s), length(suffix)))
SubString(s)
end
end


"""
chomp(s::AbstractString) -> SubString
Expand Down
162 changes: 87 additions & 75 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)

@testset "padding (lpad and rpad)" begin
@test lpad("foo", 2) == "foo"
@test rpad("foo", 2) == "foo"
Expand Down Expand Up @@ -486,81 +488,91 @@ end
end

@testset "chomp/chop" begin
@test chomp("foo\n") == "foo"
@test chomp("fo∀\n") == "fo∀"
@test chomp("foo\r\n") == "foo"
@test chomp("fo∀\r\n") == "fo∀"
@test chomp("fo∀") == "fo∀"
@test chop("") == ""
@test chop("fooε") == "foo"
@test chop("foεo") == "foε"
@test chop("∃∃∃∃") == "∃∃∃"
@test chop("∀ϵ∃Δ", head=0, tail=0) == "∀ϵ∃Δ"
@test chop("∀ϵ∃Δ", head=0, tail=1) == "∀ϵ∃"
@test chop("∀ϵ∃Δ", head=0, tail=2) == "∀ϵ"
@test chop("∀ϵ∃Δ", head=0, tail=3) == ""
@test chop("∀ϵ∃Δ", head=0, tail=4) == ""
@test chop("∀ϵ∃Δ", head=0, tail=5) == ""
@test chop("∀ϵ∃Δ", head=1, tail=0) == "ϵ∃Δ"
@test chop("∀ϵ∃Δ", head=2, tail=0) == "∃Δ"
@test chop("∀ϵ∃Δ", head=3, tail=0) == "Δ"
@test chop("∀ϵ∃Δ", head=4, tail=0) == ""
@test chop("∀ϵ∃Δ", head=5, tail=0) == ""
@test chop("∀ϵ∃Δ", head=1, tail=1) == "ϵ∃"
@test chop("∀ϵ∃Δ", head=2, tail=2) == ""
@test chop("∀ϵ∃Δ", head=3, tail=3) == ""
@test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=3)
@test_throws ArgumentError chop("∀ϵ∃Δ", head=3, tail=-3)
@test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=-3)

@test chopprefix("fo∀\n", "bog") == "fo∀\n"
@test chopprefix("fo∀\n", "\n∀foΔ") == "fo∀\n"
@test chopprefix("fo∀\n", "∀foΔ") == "fo∀\n"
@test chopprefix("fo∀\n", "f") == "o∀\n"
@test chopprefix("fo∀\n", "fo") == "\n"
@test chopprefix("fo∀\n", "fo∀") == "\n"
@test chopprefix("fo∀\n", "fo∀\n") == ""
@test chopprefix("\nfo∀", "bog") == "\nfo∀"
@test chopprefix("\nfo∀", "\n∀foΔ") == "\nfo∀"
@test chopprefix("\nfo∀", "\nfo∀") == ""
@test chopprefix("\nfo∀", "\n") == "fo∀"
@test chopprefix("\nfo∀", "\nf") == "o∀"
@test chopprefix("\nfo∀", "\nfo") == ""
@test chopprefix("\nfo∀", "\nfo∀") == ""
@test chopprefix("", "") == ""
@test chopprefix("", "asdf") == ""
@test chopprefix("", "∃∃∃") == ""
@test chopprefix("εfoo", "ε") == "foo"
@test chopprefix("ofoε", "o") == "foε"
@test chopprefix("∃∃∃∃", "") == "∃∃∃"
@test chopprefix("∃∃∃∃", "") == "∃∃∃∃"

@test chopsuffix("fo∀\n", "bog") == "fo∀\n"
@test chopsuffix("fo∀\n", "\n∀foΔ") == "fo∀\n"
@test chopsuffix("fo∀\n", "∀foΔ") == "fo∀\n"
@test chopsuffix("fo∀\n", "\n") == "fo∀"
@test chopsuffix("fo∀\n", "\n") == "fo"
@test chopsuffix("fo∀\n", "o∀\n") == "f"
@test chopsuffix("fo∀\n", "fo∀\n") == ""
@test chopsuffix("\nfo∀", "bog") == "\nfo∀"
@test chopsuffix("\nfo∀", "\n∀foΔ") == "\nfo∀"
@test chopsuffix("\nfo∀", "\nfo∀") == ""
@test chopsuffix("\nfo∀", "") == "\nfo"
@test chopsuffix("\nfo∀", "o∀") == "\nf"
@test chopsuffix("\nfo∀", "fo∀") == "\n"
@test chopsuffix("\nfo∀", "\nfo∀") == ""
@test chopsuffix("", "") == ""
@test chopsuffix("", "asdf") == ""
@test chopsuffix("", "∃∃∃") == ""
@test chopsuffix("fooε", "ε") == "foo"
@test chopsuffix("εofo", "o") == "εof"
@test chopsuffix("∃∃∃∃", "") == "∃∃∃"
@test chopsuffix("∃∃∃∃", "") == "∃∃∃∃"

@test isa(chomp("foo"), SubString)
@test isa(chop("foo"), SubString)
@test isa(chopprefix("foo", "fo"), SubString)
@test isa(chopsuffix("foo", "oo"), SubString)
for S in (String, SubStr, Test.GenericString)
@test chomp(S("foo\n")) == "foo"
@test chomp(S("fo∀\n")) == "fo∀"
@test chomp(S("foo\r\n")) == "foo"
@test chomp(S("fo∀\r\n")) == "fo∀"
@test chomp(S("fo∀")) == "fo∀"
@test chop(S("")) == ""
@test chop(S("fooε")) == "foo"
@test chop(S("foεo")) == "foε"
@test chop(S("∃∃∃∃")) == "∃∃∃"
@test chop(S("∀ϵ∃Δ"), head=0, tail=0) == "∀ϵ∃Δ"
@test chop(S("∀ϵ∃Δ"), head=0, tail=1) == "∀ϵ∃"
@test chop(S("∀ϵ∃Δ"), head=0, tail=2) == "∀ϵ"
@test chop(S("∀ϵ∃Δ"), head=0, tail=3) == ""
@test chop(S("∀ϵ∃Δ"), head=0, tail=4) == ""
@test chop(S("∀ϵ∃Δ"), head=0, tail=5) == ""
@test chop(S("∀ϵ∃Δ"), head=1, tail=0) == "ϵ∃Δ"
@test chop(S("∀ϵ∃Δ"), head=2, tail=0) == "∃Δ"
@test chop(S("∀ϵ∃Δ"), head=3, tail=0) == "Δ"
@test chop(S("∀ϵ∃Δ"), head=4, tail=0) == ""
@test chop(S("∀ϵ∃Δ"), head=5, tail=0) == ""
@test chop(S("∀ϵ∃Δ"), head=1, tail=1) == "ϵ∃"
@test chop(S("∀ϵ∃Δ"), head=2, tail=2) == ""
@test chop(S("∀ϵ∃Δ"), head=3, tail=3) == ""
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=3)
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=3, tail=-3)
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=-3)

for T in (String, SubStr, Test.GenericString, Regex)
S === Test.GenericString && T === Regex && continue # not supported
@test chopprefix(S("fo∀\n"), T("bog")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("f")) == "o∀\n"
@test chopprefix(S("fo∀\n"), T("fo")) == "\n"
@test chopprefix(S("fo∀\n"), T("fo∀")) == "\n"
@test chopprefix(S("fo∀\n"), T("fo∀\n")) == ""
@test chopprefix(S("\nfo∀"), T("bog")) == "\nfo∀"
@test chopprefix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
@test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopprefix(S("\nfo∀"), T("\n")) == "fo∀"
@test chopprefix(S("\nfo∀"), T("\nf")) == "o∀"
@test chopprefix(S("\nfo∀"), T("\nfo")) == ""
@test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopprefix(S(""), T("")) == ""
@test chopprefix(S(""), T("asdf")) == ""
@test chopprefix(S(""), T("∃∃∃")) == ""
@test chopprefix(S("εfoo"), T("ε")) == "foo"
@test chopprefix(S("ofoε"), T("o")) == "foε"
@test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃"
@test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃∃"

@test chopsuffix(S("fo∀\n"), T("bog")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("\n")) == "fo∀"
@test chopsuffix(S("fo∀\n"), T("\n")) == "fo"
@test chopsuffix(S("fo∀\n"), T("o∀\n")) == "f"
@test chopsuffix(S("fo∀\n"), T("fo∀\n")) == ""
@test chopsuffix(S("\nfo∀"), T("bog")) == "\nfo∀"
@test chopsuffix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
@test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopsuffix(S("\nfo∀"), T("")) == "\nfo"
@test chopsuffix(S("\nfo∀"), T("o∀")) == "\nf"
@test chopsuffix(S("\nfo∀"), T("fo∀")) == "\n"
@test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopsuffix(S(""), T("")) == ""
@test chopsuffix(S(""), T("asdf")) == ""
@test chopsuffix(S(""), T("∃∃∃")) == ""
@test chopsuffix(S("fooε"), T("ε")) == "foo"
@test chopsuffix(S("εofo"), T("o")) == "εof"
@test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃"
@test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
end
@test isa(chomp(S("foo")), SubString)
@test isa(chop(S("foo")), SubString)

if S !== Test.GenericString
@test chopprefix(S("∃∃∃b∃"), r"∃+") == "b∃"
@test chopsuffix(S("∃b∃∃∃"), r"∃+") == "∃b"
end

@test isa(chopprefix(S("foo"), "fo"), SubString)
@test isa(chopsuffix(S("foo"), "oo"), SubString)
end
end

@testset "bytes2hex and hex2bytes" begin
Expand Down

0 comments on commit af8e39a

Please sign in to comment.