Skip to content

Commit

Permalink
Implement StringPairs for efficient pairs(::AbstractString)
Browse files Browse the repository at this point in the history
The default `pairs` will iterate keys and values separately. For strings, this
represents double work, since both these iterations will need to determine valid
string indices.
The introduced StringPairs type will, whenever possible, only compute valid
indices once.
Currently, this is only optimised for `String` and `SubString{String}`, and not
for `AbstractString`, nor is it optimised when reversed.
  • Loading branch information
jakobnissen committed Oct 11, 2023
1 parent 1abafe8 commit 42170bf
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
35 changes: 35 additions & 0 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1023,3 +1023,38 @@ function Base.rest(s::AbstractString, st...)
end
return String(take!(io))
end

"""
StringPairs{T}(x::AbstractString)
This internal type is an iterator over (key => value) pairs of strings.
"""
struct StringPairs{T <: AbstractString}
x::T
end

StringPairs(x) = StringPairs{typeof(x)}(x)
IteratorSize(::Type{StringPairs{T}}) where T = IteratorSize(T)
length(x::StringPairs) = length(x.x)
pairs(x::AbstractString) = StringPairs(x)

# Generic fallback
function iterate(x::StringPairs, i=firstindex(x.x))
i > ncodeunits(x.x) && return nothing
(i => x.x[i], nextind(x.x, i))
end

# In this method, exploit that string iteration's state is the index
function iterate(
x::StringPairs{<:Union{String, SubString{String}}},
state::Int=firstindex(x.x)
)
(char, i) = @something iterate(x.x, state) return nothing
(state => char, i)
end

# At this moment, Reverse{<:AbstractString} is inefficient, so this simple
# implementation is not easily optimised
function iterate(x::Iterators.Reverse{<:StringPairs}, i=lastindex(x.itr.x))
i < firstindex(x.itr.x) ? nothing : (i => x.itr.x[i], prevind(x.itr.x, i))
end
11 changes: 11 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -702,3 +702,14 @@ end
@test endswith(A, split(B, ' ')[end])
@test endswith(A, 'g')
end

@testset "pairs" begin
for s in ["", "a", "abcde", "γ", "∋γa"]
for T in (String, SubString, GenericString)
sT = T(s)
@test collect(pairs(sT)) == [k=>v for (k,v) in zip(keys(sT), sT)]
rv = Iterators.reverse(pairs(sT))
@test collect(rv) == reverse([k=>v for (k,v) in zip(keys(sT), sT)])
end
end
end

0 comments on commit 42170bf

Please sign in to comment.