Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add chopprefix, chopsuffix #40995

Merged
merged 3 commits into from
Nov 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,8 @@ export
bytes2hex,
chomp,
chop,
chopprefix,
chopsuffix,
codepoint,
codeunit,
codeunits,
Expand Down
14 changes: 14 additions & 0 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,20 @@ function endswith(s::SubString, r::Regex)
return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED)
end

function chopprefix(s::AbstractString, prefix::Regex)
m = match(prefix, s, firstindex(s), PCRE.ANCHORED)
m === nothing && return SubString(s)
return SubString(s, ncodeunits(m.match) + 1)
end

function chopsuffix(s::AbstractString, suffix::Regex)
m = match(suffix, s, firstindex(s), PCRE.ENDANCHORED)
m === nothing && return SubString(s)
isempty(m.match) && return SubString(s)
return SubString(s, firstindex(s), prevind(s, m.offset))
end


"""
match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])

Expand Down
109 changes: 102 additions & 7 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ true
```
"""
function startswith(a::AbstractString, b::AbstractString)
a, b = Iterators.Stateful(a), Iterators.Stateful(b)
all(splat(==), zip(a, b)) && isempty(b)
i, j = iterate(a), iterate(b)
while true
j === nothing && return true # ran out of prefix: success!
i === nothing && return false # ran out of source: failure
i[1] == j[1] || return false # mismatch: failure
i, j = iterate(a, i[2]), iterate(b, j[2])
end
end
startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars

Expand All @@ -39,9 +44,14 @@ true
```
"""
function endswith(a::AbstractString, b::AbstractString)
a = Iterators.Stateful(Iterators.reverse(a))
b = Iterators.Stateful(Iterators.reverse(b))
all(splat(==), zip(a, b)) && isempty(b)
a, b = Iterators.Reverse(a), Iterators.Reverse(b)
i, j = iterate(a), iterate(b)
while true
j === nothing && return true # ran out of suffix: success!
i === nothing && return false # ran out of source: failure
i[1] == j[1] || return false # mismatch: failure
i, j = iterate(a, i[2]), iterate(b, j[2])
end
end
endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars

Expand All @@ -51,7 +61,7 @@ function startswith(a::Union{String, SubString{String}},
if ncodeunits(a) < cub
false
elseif _memcmp(a, b, sizeof(b)) == 0
nextind(a, cub) == cub + 1
nextind(a, cub) == cub + 1 # check that end of `b` doesn't match a partial character in `a`
else
false
end
Expand All @@ -64,7 +74,7 @@ function endswith(a::Union{String, SubString{String}},
if astart < 1
false
elseif GC.@preserve(a, _memcmp(pointer(a, astart), b, sizeof(b))) == 0
thisind(a, astart) == astart
thisind(a, astart) == astart # check that end of `b` doesn't match a partial character in `a`
else
false
end
Expand Down Expand Up @@ -195,6 +205,91 @@ end
# TODO: optimization for the default case based on
# chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s)))

"""
chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString

Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned.

See also [`chopsuffix`](@ref).

!!! compat "Julia 1.8"
This function is available as of Julia 1.8.

# Examples
```jldoctest
julia> chopprefix("Hamburger", "Ham")
"burger"

julia> chopprefix("Hamburger", "hotdog")
"Hamburger"
```
"""
function chopprefix(s::AbstractString, prefix::AbstractString)
k = firstindex(s)
i, j = iterate(s), iterate(prefix)
while true
j === nothing && i === nothing && return SubString(s, 1, 0) # s == prefix: empty result
j === nothing && return @inbounds SubString(s, k) # ran out of prefix: success!
i === nothing && return SubString(s) # ran out of source: failure
i[1] == j[1] || return SubString(s) # mismatch: failure
k = i[2]
i, j = iterate(s, k), iterate(prefix, j[2])
end
end

function chopprefix(s::Union{String, SubString{String}},
prefix::Union{String, SubString{String}})
if startswith(s, prefix)
SubString(s, 1 + ncodeunits(prefix))
else
SubString(s)
end
end

"""
chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString

Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned.

See also [`chopprefix`](@ref).

!!! compat "Julia 1.8"
This function is available as of Julia 1.8.

# Examples
```jldoctest
julia> chopsuffix("Hamburger", "er")
"Hamburg"

julia> chopsuffix("Hamburger", "hotdog")
"Hamburger"
```
"""
function chopsuffix(s::AbstractString, suffix::AbstractString)
a, b = Iterators.Reverse(s), Iterators.Reverse(suffix)
k = lastindex(s)
i, j = iterate(a), iterate(b)
while true
j === nothing && i === nothing && return SubString(s, 1, 0) # s == suffix: empty result
j === nothing && return @inbounds SubString(s, firstindex(s), k) # ran out of suffix: success!
i === nothing && return SubString(s) # ran out of source: failure
i[1] == j[1] || return SubString(s) # mismatch: failure
k = i[2]
i, j = iterate(a, k), iterate(b, j[2])
end
end

function chopsuffix(s::Union{String, SubString{String}},
suffix::Union{String, SubString{String}})
if !isempty(suffix) && endswith(s, suffix)
astart = ncodeunits(s) - ncodeunits(suffix) + 1
@inbounds SubString(s, firstindex(s), prevind(s, astart))
else
SubString(s)
end
end


"""
chomp(s::AbstractString) -> SubString

Expand Down
2 changes: 2 additions & 0 deletions doc/src/base/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ Base.uppercasefirst
Base.lowercasefirst
Base.join
Base.chop
Base.chopprefix
Base.chopsuffix
Base.chomp
Base.thisind
Base.nextind
Expand Down
116 changes: 87 additions & 29 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)

@testset "padding (lpad and rpad)" begin
@test lpad("foo", 2) == "foo"
@test rpad("foo", 2) == "foo"
Expand Down Expand Up @@ -486,35 +488,91 @@ end
end

@testset "chomp/chop" begin
@test chomp("foo\n") == "foo"
@test chomp("fo∀\n") == "fo∀"
@test chomp("foo\r\n") == "foo"
@test chomp("fo∀\r\n") == "fo∀"
@test chomp("fo∀") == "fo∀"
@test chop("") == ""
@test chop("fooε") == "foo"
@test chop("foεo") == "foε"
@test chop("∃∃∃∃") == "∃∃∃"
@test chop("∀ϵ∃Δ", head=0, tail=0) == "∀ϵ∃Δ"
@test chop("∀ϵ∃Δ", head=0, tail=1) == "∀ϵ∃"
@test chop("∀ϵ∃Δ", head=0, tail=2) == "∀ϵ"
@test chop("∀ϵ∃Δ", head=0, tail=3) == "∀"
@test chop("∀ϵ∃Δ", head=0, tail=4) == ""
@test chop("∀ϵ∃Δ", head=0, tail=5) == ""
@test chop("∀ϵ∃Δ", head=1, tail=0) == "ϵ∃Δ"
@test chop("∀ϵ∃Δ", head=2, tail=0) == "∃Δ"
@test chop("∀ϵ∃Δ", head=3, tail=0) == "Δ"
@test chop("∀ϵ∃Δ", head=4, tail=0) == ""
@test chop("∀ϵ∃Δ", head=5, tail=0) == ""
@test chop("∀ϵ∃Δ", head=1, tail=1) == "ϵ∃"
@test chop("∀ϵ∃Δ", head=2, tail=2) == ""
@test chop("∀ϵ∃Δ", head=3, tail=3) == ""
@test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=3)
@test_throws ArgumentError chop("∀ϵ∃Δ", head=3, tail=-3)
@test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=-3)

@test isa(chomp("foo"), SubString)
@test isa(chop("foo"), SubString)
for S in (String, SubStr, Test.GenericString)
@test chomp(S("foo\n")) == "foo"
@test chomp(S("fo∀\n")) == "fo∀"
@test chomp(S("foo\r\n")) == "foo"
@test chomp(S("fo∀\r\n")) == "fo∀"
@test chomp(S("fo∀")) == "fo∀"
@test chop(S("")) == ""
@test chop(S("fooε")) == "foo"
@test chop(S("foεo")) == "foε"
@test chop(S("∃∃∃∃")) == "∃∃∃"
@test chop(S("∀ϵ∃Δ"), head=0, tail=0) == "∀ϵ∃Δ"
@test chop(S("∀ϵ∃Δ"), head=0, tail=1) == "∀ϵ∃"
@test chop(S("∀ϵ∃Δ"), head=0, tail=2) == "∀ϵ"
@test chop(S("∀ϵ∃Δ"), head=0, tail=3) == "∀"
@test chop(S("∀ϵ∃Δ"), head=0, tail=4) == ""
@test chop(S("∀ϵ∃Δ"), head=0, tail=5) == ""
@test chop(S("∀ϵ∃Δ"), head=1, tail=0) == "ϵ∃Δ"
@test chop(S("∀ϵ∃Δ"), head=2, tail=0) == "∃Δ"
@test chop(S("∀ϵ∃Δ"), head=3, tail=0) == "Δ"
@test chop(S("∀ϵ∃Δ"), head=4, tail=0) == ""
@test chop(S("∀ϵ∃Δ"), head=5, tail=0) == ""
@test chop(S("∀ϵ∃Δ"), head=1, tail=1) == "ϵ∃"
@test chop(S("∀ϵ∃Δ"), head=2, tail=2) == ""
@test chop(S("∀ϵ∃Δ"), head=3, tail=3) == ""
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=3)
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=3, tail=-3)
@test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=-3)

for T in (String, SubStr, Test.GenericString, Regex)
S === Test.GenericString && T === Regex && continue # not supported
@test chopprefix(S("fo∀\n"), T("bog")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
@test chopprefix(S("fo∀\n"), T("f")) == "o∀\n"
@test chopprefix(S("fo∀\n"), T("fo")) == "∀\n"
@test chopprefix(S("fo∀\n"), T("fo∀")) == "\n"
@test chopprefix(S("fo∀\n"), T("fo∀\n")) == ""
@test chopprefix(S("\nfo∀"), T("bog")) == "\nfo∀"
@test chopprefix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
@test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopprefix(S("\nfo∀"), T("\n")) == "fo∀"
@test chopprefix(S("\nfo∀"), T("\nf")) == "o∀"
@test chopprefix(S("\nfo∀"), T("\nfo")) == "∀"
@test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopprefix(S(""), T("")) == ""
@test chopprefix(S(""), T("asdf")) == ""
@test chopprefix(S(""), T("∃∃∃")) == ""
@test chopprefix(S("εfoo"), T("ε")) == "foo"
@test chopprefix(S("ofoε"), T("o")) == "foε"
@test chopprefix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
@test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃∃"

@test chopsuffix(S("fo∀\n"), T("bog")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
@test chopsuffix(S("fo∀\n"), T("\n")) == "fo∀"
@test chopsuffix(S("fo∀\n"), T("∀\n")) == "fo"
@test chopsuffix(S("fo∀\n"), T("o∀\n")) == "f"
@test chopsuffix(S("fo∀\n"), T("fo∀\n")) == ""
@test chopsuffix(S("\nfo∀"), T("bog")) == "\nfo∀"
@test chopsuffix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
@test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopsuffix(S("\nfo∀"), T("∀")) == "\nfo"
@test chopsuffix(S("\nfo∀"), T("o∀")) == "\nf"
@test chopsuffix(S("\nfo∀"), T("fo∀")) == "\n"
@test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
@test chopsuffix(S(""), T("")) == ""
@test chopsuffix(S(""), T("asdf")) == ""
@test chopsuffix(S(""), T("∃∃∃")) == ""
@test chopsuffix(S("fooε"), T("ε")) == "foo"
@test chopsuffix(S("εofo"), T("o")) == "εof"
@test chopsuffix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
@test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
end
@test isa(chomp(S("foo")), SubString)
@test isa(chop(S("foo")), SubString)

if S !== Test.GenericString
@test chopprefix(S("∃∃∃b∃"), r"∃+") == "b∃"
@test chopsuffix(S("∃b∃∃∃"), r"∃+") == "∃b"
end

@test isa(chopprefix(S("foo"), "fo"), SubString)
@test isa(chopsuffix(S("foo"), "oo"), SubString)
end
end

@testset "bytes2hex and hex2bytes" begin
Expand Down