From af8e39a74512dc09888be8709c73dd64f181ec39 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 19 Jul 2021 17:07:01 -0400 Subject: [PATCH] fix, optimize, add Regex option, expand tests --- base/regex.jl | 14 ++++ base/strings/util.jl | 75 +++++++++++++++----- test/strings/util.jl | 162 +++++++++++++++++++++++-------------------- 3 files changed, 159 insertions(+), 92 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 15744fe14ce47..01f30b9c44ca1 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -335,6 +335,20 @@ function endswith(s::SubString, r::Regex) return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED) end +function chopprefix(s::AbstractString, prefix::Regex) + m = match(prefix, s, firstindex(s), PCRE.ANCHORED) + m === nothing && return SubString(s) + return SubString(s, ncodeunits(m.match) + 1) +end + +function chopsuffix(s::AbstractString, suffix::Regex) + m = match(suffix, s, firstindex(s), PCRE.ENDANCHORED) + m === nothing && return SubString(s) + isempty(m.match) && return SubString(s) + return SubString(s, firstindex(s), prevind(s, m.offset)) +end + + """ match(r::Regex, s::AbstractString[, idx::Integer[, addopts]]) diff --git a/base/strings/util.jl b/base/strings/util.jl index 9b490c4f94a42..5ed88984c7932 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -19,8 +19,13 @@ true ``` """ function startswith(a::AbstractString, b::AbstractString) - a, b = Iterators.Stateful(a), Iterators.Stateful(b) - all(splat(==), zip(a, b)) && isempty(b) + i, j = iterate(a), iterate(b) + while true + j === nothing && return true # ran out of prefix: success! + i === nothing && return false # ran out of source: failure + i[1] == j[1] || return false # mismatch: failure + i, j = iterate(a, i[2]), iterate(b, j[2]) + end end startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars @@ -39,9 +44,14 @@ true ``` """ function endswith(a::AbstractString, b::AbstractString) - a = Iterators.Stateful(Iterators.reverse(a)) - b = Iterators.Stateful(Iterators.reverse(b)) - all(splat(==), zip(a, b)) && isempty(b) + a, b = Iterators.Reverse(a), Iterators.Reverse(b) + i, j = iterate(a), iterate(b) + while true + j === nothing && return true # ran out of suffix: success! + i === nothing && return false # ran out of source: failure + i[1] == j[1] || return false # mismatch: failure + i, j = iterate(a, i[2]), iterate(b, j[2]) + end end endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars @@ -51,7 +61,7 @@ function startswith(a::Union{String, SubString{String}}, if ncodeunits(a) < cub false elseif _memcmp(a, b, sizeof(b)) == 0 - nextind(a, cub) == cub + 1 + nextind(a, cub) == cub + 1 # check that end of `b` doesn't match a partial character in `a` else false end @@ -64,7 +74,7 @@ function endswith(a::Union{String, SubString{String}}, if astart < 1 false elseif GC.@preserve(a, _memcmp(pointer(a, astart), b, sizeof(b))) == 0 - thisind(a, astart) == astart + thisind(a, astart) == astart # check that end of `b` doesn't match a partial character in `a` else false end @@ -196,14 +206,14 @@ end # chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s))) """ - chopprefix(s::AbstractString, prefix::AbstractString) -> SubString + chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned. See also [`chopsuffix`](@ref). -!!! compat "Julia 1.7" - This function is available as of Julia 1.7. +!!! compat "Julia 1.8" + This function is available as of Julia 1.8. # Examples ```jldoctest @@ -215,22 +225,36 @@ julia> chopprefix("Hamburger", "hotdog") ``` """ function chopprefix(s::AbstractString, prefix::AbstractString) + k = firstindex(s) + i, j = iterate(s), iterate(prefix) + while true + j === nothing && i === nothing && return SubString(s, 1, 0) # s == prefix: empty result + j === nothing && return @inbounds SubString(s, k) # ran out of prefix: success! + i === nothing && return SubString(s) # ran out of source: failure + i[1] == j[1] || return SubString(s) # mismatch: failure + k = i[2] + i, j = iterate(s, k), iterate(prefix, j[2]) + end +end + +function chopprefix(s::Union{String, SubString{String}}, + prefix::Union{String, SubString{String}}) if startswith(s, prefix) - SubString(s, nextind(s, lastindex(prefix), 1), lastindex(s)) + SubString(s, 1 + ncodeunits(prefix)) else SubString(s) end end """ - chopsuffix(s::AbstractString, suffix::AbstractString) -> SubString + chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned. See also [`chopprefix`](@ref). -!!! compat "Julia 1.7" - This function is available as of Julia 1.7. +!!! compat "Julia 1.8" + This function is available as of Julia 1.8. # Examples ```jldoctest @@ -242,13 +266,30 @@ julia> chopsuffix("Hamburger", "hotdog") ``` """ function chopsuffix(s::AbstractString, suffix::AbstractString) - if isempty(s) || !endswith(s, suffix) - SubString(s) + a, b = Iterators.Reverse(s), Iterators.Reverse(suffix) + k = lastindex(s) + i, j = iterate(a), iterate(b) + while true + j === nothing && i === nothing && return SubString(s, 1, 0) # s == suffix: empty result + j === nothing && return @inbounds SubString(s, firstindex(s), k) # ran out of suffix: success! + i === nothing && return SubString(s) # ran out of source: failure + i[1] == j[1] || return SubString(s) # mismatch: failure + k = i[2] + i, j = iterate(a, k), iterate(b, j[2]) + end +end + +function chopsuffix(s::Union{String, SubString{String}}, + suffix::Union{String, SubString{String}}) + if !isempty(suffix) && endswith(s, suffix) + astart = ncodeunits(s) - ncodeunits(suffix) + 1 + @inbounds SubString(s, firstindex(s), prevind(s, astart)) else - SubString(s, firstindex(s), prevind(s, lastindex(s), length(suffix))) + SubString(s) end end + """ chomp(s::AbstractString) -> SubString diff --git a/test/strings/util.jl b/test/strings/util.jl index 3333a1e887c8c..b313a0fa1af4a 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -1,5 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3) + @testset "padding (lpad and rpad)" begin @test lpad("foo", 2) == "foo" @test rpad("foo", 2) == "foo" @@ -486,81 +488,91 @@ end end @testset "chomp/chop" begin - @test chomp("foo\n") == "foo" - @test chomp("fo∀\n") == "fo∀" - @test chomp("foo\r\n") == "foo" - @test chomp("fo∀\r\n") == "fo∀" - @test chomp("fo∀") == "fo∀" - @test chop("") == "" - @test chop("fooε") == "foo" - @test chop("foεo") == "foε" - @test chop("∃∃∃∃") == "∃∃∃" - @test chop("∀ϵ∃Δ", head=0, tail=0) == "∀ϵ∃Δ" - @test chop("∀ϵ∃Δ", head=0, tail=1) == "∀ϵ∃" - @test chop("∀ϵ∃Δ", head=0, tail=2) == "∀ϵ" - @test chop("∀ϵ∃Δ", head=0, tail=3) == "∀" - @test chop("∀ϵ∃Δ", head=0, tail=4) == "" - @test chop("∀ϵ∃Δ", head=0, tail=5) == "" - @test chop("∀ϵ∃Δ", head=1, tail=0) == "ϵ∃Δ" - @test chop("∀ϵ∃Δ", head=2, tail=0) == "∃Δ" - @test chop("∀ϵ∃Δ", head=3, tail=0) == "Δ" - @test chop("∀ϵ∃Δ", head=4, tail=0) == "" - @test chop("∀ϵ∃Δ", head=5, tail=0) == "" - @test chop("∀ϵ∃Δ", head=1, tail=1) == "ϵ∃" - @test chop("∀ϵ∃Δ", head=2, tail=2) == "" - @test chop("∀ϵ∃Δ", head=3, tail=3) == "" - @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=3) - @test_throws ArgumentError chop("∀ϵ∃Δ", head=3, tail=-3) - @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=-3) - - @test chopprefix("fo∀\n", "bog") == "fo∀\n" - @test chopprefix("fo∀\n", "\n∀foΔ") == "fo∀\n" - @test chopprefix("fo∀\n", "∀foΔ") == "fo∀\n" - @test chopprefix("fo∀\n", "f") == "o∀\n" - @test chopprefix("fo∀\n", "fo") == "∀\n" - @test chopprefix("fo∀\n", "fo∀") == "\n" - @test chopprefix("fo∀\n", "fo∀\n") == "" - @test chopprefix("\nfo∀", "bog") == "\nfo∀" - @test chopprefix("\nfo∀", "\n∀foΔ") == "\nfo∀" - @test chopprefix("\nfo∀", "\nfo∀") == "" - @test chopprefix("\nfo∀", "\n") == "fo∀" - @test chopprefix("\nfo∀", "\nf") == "o∀" - @test chopprefix("\nfo∀", "\nfo") == "∀" - @test chopprefix("\nfo∀", "\nfo∀") == "" - @test chopprefix("", "") == "" - @test chopprefix("", "asdf") == "" - @test chopprefix("", "∃∃∃") == "" - @test chopprefix("εfoo", "ε") == "foo" - @test chopprefix("ofoε", "o") == "foε" - @test chopprefix("∃∃∃∃", "∃") == "∃∃∃" - @test chopprefix("∃∃∃∃", "") == "∃∃∃∃" - - @test chopsuffix("fo∀\n", "bog") == "fo∀\n" - @test chopsuffix("fo∀\n", "\n∀foΔ") == "fo∀\n" - @test chopsuffix("fo∀\n", "∀foΔ") == "fo∀\n" - @test chopsuffix("fo∀\n", "\n") == "fo∀" - @test chopsuffix("fo∀\n", "∀\n") == "fo" - @test chopsuffix("fo∀\n", "o∀\n") == "f" - @test chopsuffix("fo∀\n", "fo∀\n") == "" - @test chopsuffix("\nfo∀", "bog") == "\nfo∀" - @test chopsuffix("\nfo∀", "\n∀foΔ") == "\nfo∀" - @test chopsuffix("\nfo∀", "\nfo∀") == "" - @test chopsuffix("\nfo∀", "∀") == "\nfo" - @test chopsuffix("\nfo∀", "o∀") == "\nf" - @test chopsuffix("\nfo∀", "fo∀") == "\n" - @test chopsuffix("\nfo∀", "\nfo∀") == "" - @test chopsuffix("", "") == "" - @test chopsuffix("", "asdf") == "" - @test chopsuffix("", "∃∃∃") == "" - @test chopsuffix("fooε", "ε") == "foo" - @test chopsuffix("εofo", "o") == "εof" - @test chopsuffix("∃∃∃∃", "∃") == "∃∃∃" - @test chopsuffix("∃∃∃∃", "") == "∃∃∃∃" - - @test isa(chomp("foo"), SubString) - @test isa(chop("foo"), SubString) - @test isa(chopprefix("foo", "fo"), SubString) - @test isa(chopsuffix("foo", "oo"), SubString) + for S in (String, SubStr, Test.GenericString) + @test chomp(S("foo\n")) == "foo" + @test chomp(S("fo∀\n")) == "fo∀" + @test chomp(S("foo\r\n")) == "foo" + @test chomp(S("fo∀\r\n")) == "fo∀" + @test chomp(S("fo∀")) == "fo∀" + @test chop(S("")) == "" + @test chop(S("fooε")) == "foo" + @test chop(S("foεo")) == "foε" + @test chop(S("∃∃∃∃")) == "∃∃∃" + @test chop(S("∀ϵ∃Δ"), head=0, tail=0) == "∀ϵ∃Δ" + @test chop(S("∀ϵ∃Δ"), head=0, tail=1) == "∀ϵ∃" + @test chop(S("∀ϵ∃Δ"), head=0, tail=2) == "∀ϵ" + @test chop(S("∀ϵ∃Δ"), head=0, tail=3) == "∀" + @test chop(S("∀ϵ∃Δ"), head=0, tail=4) == "" + @test chop(S("∀ϵ∃Δ"), head=0, tail=5) == "" + @test chop(S("∀ϵ∃Δ"), head=1, tail=0) == "ϵ∃Δ" + @test chop(S("∀ϵ∃Δ"), head=2, tail=0) == "∃Δ" + @test chop(S("∀ϵ∃Δ"), head=3, tail=0) == "Δ" + @test chop(S("∀ϵ∃Δ"), head=4, tail=0) == "" + @test chop(S("∀ϵ∃Δ"), head=5, tail=0) == "" + @test chop(S("∀ϵ∃Δ"), head=1, tail=1) == "ϵ∃" + @test chop(S("∀ϵ∃Δ"), head=2, tail=2) == "" + @test chop(S("∀ϵ∃Δ"), head=3, tail=3) == "" + @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=3) + @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=3, tail=-3) + @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=-3) + + for T in (String, SubStr, Test.GenericString, Regex) + S === Test.GenericString && T === Regex && continue # not supported + @test chopprefix(S("fo∀\n"), T("bog")) == "fo∀\n" + @test chopprefix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n" + @test chopprefix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n" + @test chopprefix(S("fo∀\n"), T("f")) == "o∀\n" + @test chopprefix(S("fo∀\n"), T("fo")) == "∀\n" + @test chopprefix(S("fo∀\n"), T("fo∀")) == "\n" + @test chopprefix(S("fo∀\n"), T("fo∀\n")) == "" + @test chopprefix(S("\nfo∀"), T("bog")) == "\nfo∀" + @test chopprefix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀" + @test chopprefix(S("\nfo∀"), T("\nfo∀")) == "" + @test chopprefix(S("\nfo∀"), T("\n")) == "fo∀" + @test chopprefix(S("\nfo∀"), T("\nf")) == "o∀" + @test chopprefix(S("\nfo∀"), T("\nfo")) == "∀" + @test chopprefix(S("\nfo∀"), T("\nfo∀")) == "" + @test chopprefix(S(""), T("")) == "" + @test chopprefix(S(""), T("asdf")) == "" + @test chopprefix(S(""), T("∃∃∃")) == "" + @test chopprefix(S("εfoo"), T("ε")) == "foo" + @test chopprefix(S("ofoε"), T("o")) == "foε" + @test chopprefix(S("∃∃∃∃"), T("∃")) == "∃∃∃" + @test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃∃" + + @test chopsuffix(S("fo∀\n"), T("bog")) == "fo∀\n" + @test chopsuffix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n" + @test chopsuffix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n" + @test chopsuffix(S("fo∀\n"), T("\n")) == "fo∀" + @test chopsuffix(S("fo∀\n"), T("∀\n")) == "fo" + @test chopsuffix(S("fo∀\n"), T("o∀\n")) == "f" + @test chopsuffix(S("fo∀\n"), T("fo∀\n")) == "" + @test chopsuffix(S("\nfo∀"), T("bog")) == "\nfo∀" + @test chopsuffix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀" + @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == "" + @test chopsuffix(S("\nfo∀"), T("∀")) == "\nfo" + @test chopsuffix(S("\nfo∀"), T("o∀")) == "\nf" + @test chopsuffix(S("\nfo∀"), T("fo∀")) == "\n" + @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == "" + @test chopsuffix(S(""), T("")) == "" + @test chopsuffix(S(""), T("asdf")) == "" + @test chopsuffix(S(""), T("∃∃∃")) == "" + @test chopsuffix(S("fooε"), T("ε")) == "foo" + @test chopsuffix(S("εofo"), T("o")) == "εof" + @test chopsuffix(S("∃∃∃∃"), T("∃")) == "∃∃∃" + @test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃∃" + end + @test isa(chomp(S("foo")), SubString) + @test isa(chop(S("foo")), SubString) + + if S !== Test.GenericString + @test chopprefix(S("∃∃∃b∃"), r"∃+") == "b∃" + @test chopsuffix(S("∃b∃∃∃"), r"∃+") == "∃b" + end + + @test isa(chopprefix(S("foo"), "fo"), SubString) + @test isa(chopsuffix(S("foo"), "oo"), SubString) + end end @testset "bytes2hex and hex2bytes" begin