Skip to content

Commit

Permalink
Implement an ordered hash dictionary
Browse files Browse the repository at this point in the history
The new implementation preserves insertion order. It may use slightly
more memory overal but is much faster to iterate because the
indices/values are stored densely (performance comparable to `Vector`,
i.e. limited by memory bandwidth). Faster insertion/deletion/resizing
due to less need to call `hash` (hashes are recorded).
  • Loading branch information
andyferris committed Feb 24, 2020
1 parent f92b3d4 commit 0708ce4
Show file tree
Hide file tree
Showing 6 changed files with 771 additions and 398 deletions.
8 changes: 5 additions & 3 deletions src/Dictionaries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using Indexing

using Base: @propagate_inbounds, Callable

export getindices, setindices!, mapview
export getindices, setindices!

export AbstractDictionary, AbstractIndices, IndexError, Indices, HashIndices, HashDictionary, Dictionary, MappedDictionary, DictionaryView, FilteredDictionary, FilteredIndices, BroadcastedDictionary

Expand Down Expand Up @@ -35,13 +35,15 @@ include("HashIndices.jl")
include("HashDictionary.jl")
include("MappedDictionary.jl")

include("OldHashIndices.jl")
include("OldHashDictionary.jl")

end # module

# # TODO
#
# * Improved printing - don't calculate length (beyond some cutoff) if it is `SizeUnknown` and limit=true.
# * Improved printing - don't calculate length (beyond some cutoff) if it is `SizeUnknown` and limit=true, fix indentiation problems for wider values
# * `hash` and `isless`
# * A manually-ordered dictionary would be quite useful, like [OrderedCollections.jl](https://github.com/JuliaCollections/OrderedCollections.jl).
# * A surface interface for updates like https://github.com/JuliaLang/julia/pull/31367
# * Soon we will have the concept of "ordered" indices/sets (sort-based dictionaries and
# B-trees). We can probably formalize an interface around a trait here. Certain operations
Expand Down
206 changes: 70 additions & 136 deletions src/HashDictionary.jl
Original file line number Diff line number Diff line change
@@ -1,102 +1,60 @@
mutable struct HashDictionary{I,T} <: AbstractDictionary{I, T}
struct HashDictionary{I, T} <: AbstractDictionary{I, T}
indices::HashIndices{I}
values::Vector{T}

HashDictionary{I, T}(indices::HashIndices{I}, values::Vector{T}, ::Nothing) where {I, T} = new(indices, values)
function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}) where {I, T}
return new(inds, values)
end
end

"""
HashDictionary{I, T}()
HashDictionary(; sizehint = 8) = HashDictionary{Any, Any}(; sizehint = sizehint)
HashDictionary{I}(; sizehint = 8) where {I} = HashDictionary{I, Any}(; sizehint = sizehint)

Construct an empty `HashDictionary` with index type `I` and element type `T`. This type of
dictionary uses hashes for fast lookup and insertion, and is both mutable and insertable.
(See `issettable` and `isinsertable`).
"""
function HashDictionary{I, T}(; sizehint::Int = 16) where {I, T}
indices = HashIndices{I}(; sizehint=sizehint)
HashDictionary{I, T}(indices, Vector{T}(undef, length(indices.slots)), nothing)
function HashDictionary{I, T}(; sizehint = 8) where {I, T}
HashDictionary{I, T}(HashIndices{I}(; sizehint = sizehint), Vector{T}())
end
HashDictionary{I}() where {I} = HashDictionary{I, Any}()
HashDictionary() = HashDictionary{Any}()

"""
HashDictionary{I, T}(indices, undef::UndefInitializer)
Construct a `HashDictionary` with index type `I` and element type `T`. The container is
initialized with `keys` that match the values of `indices`, but the values are unintialized.
"""
function HashDictionary{I, T}(indices, ::UndefInitializer) where {I, T}
return HashDictionary{I, T}(HashIndices{I}(indices), undef)
function HashDictionary(inds, values)
return HashDictionary(HashIndices(inds), values)
end

function HashDictionary{I, T}(h::HashIndices{I}, ::UndefInitializer) where {I, T}
return HashDictionary{I, T}(h, Vector{T}(undef, length(h.slots)), nothing)
function HashDictionary(inds::HashIndices{I}, values) where {I}
return HashDictionary{I}(inds, values)
end

function HashDictionary{I, T}(indices::HashIndices{I}, values) where {I, T}
vals = Vector{T}(undef, length(indices.slots))
d = HashDictionary{I, T}(indices, vals, nothing)

@inbounds for (i, v) in zip(tokens(indices), values)
vals[i] = v
end

return d
function HashDictionary{I}(inds, values) where {I}
return HashDictionary{I}(HashIndices{I}(inds), values)
end

"""
HashDictionary(indices, values)
HashDictionary{I}(indices, values)
HashDictionary{I, T}(indices, values)
Construct a `HashDictionary` with indices from `indices` and values from `values`, matched
in iteration order.
"""
function HashDictionary{I, T}(indices, values) where {I, T}
iter_size = Base.IteratorSize(indices)
if iter_size isa Union{Base.HasLength, Base.HasShape}
d = HashDictionary{I, T}(; sizehint = length(indices)*2)
else
d = HashDictionary{I, T}()
end

for (i, v) in zip(indices, values)
insert!(d, i, v)
end

return d
end
function HashDictionary{I}(indices, values) where {I}
function HashDictionary{I}(inds::HashIndices{I}, values) where {I}
if Base.IteratorEltype(values) === Base.EltypeUnknown()
# TODO: implement automatic widening from iterators of Base.EltypeUnkown
values = collect(values)
end

return HashDictionary{I, eltype(values)}(inds, values)
end

return HashDictionary{I, eltype(values)}(indices, values)
function HashDictionary{I, T}(inds, values) where {I, T}
return HashDictionary{I, T}(HashIndices{I}(inds), values)
end

function HashDictionary(indices, values)
if Base.IteratorEltype(indices) === Base.EltypeUnknown()
# TODO: implement automatic widening from iterators of Base.EltypeUnkown
indices = collect(indices)
function HashDictionary{I, T}(inds::HashIndices{I}, values) where {I, T}
iter_size = Base.IteratorSize(values)
if iter_size isa Union{Base.HasLength, Base.HasShape}
vs = Vector{T}(undef, length(values))
@inbounds for (i, v) in enumerate(values)
vs[i] = v
end
return HashDictionary{I, T}(inds, vs)
else
vs = Vector{T}()
for v in values
push!(vs, v)
end
return HashDictionary{I, T}(inds, vs)
end

return HashDictionary{eltype(indices)}(indices, values)
end

"""
HashDictionary(dict::AbstractDictionary)
HashDictionary{I}(dict::AbstractDictionary)
HashDictionary{I, T}(dict::AbstractDictionary)
Construct a copy of `dict` with the same keys and values.
(For copying an `AbstractDict` or other iterable of `Pair`s, see `dictionary`).
"""
HashDictionary(dict::AbstractDictionary) = HashDictionary(keys(dict), dict)
HashDictionary{I}(dict::AbstractDictionary) where {I} = HashDictionary{I}(keys(dict), dict)
HashDictionary{I, T}(dict::AbstractDictionary) where {I, T} = HashDictionary{I, T}(keys(dict), dict)

"""
dictionary(iter)
Expand All @@ -116,7 +74,7 @@ dictionary(p1::Pair, p2::Pair...) = dictionary((p1, p2...))
function _dictionary(::Type{Pair{I, T}}, iter) where {I, T}
iter_size = Base.IteratorSize(iter)
if iter_size isa Union{Base.HasLength, Base.HasShape}
d = HashDictionary{I, T}(; sizehint = length(iter)*2)
d = HashDictionary{I, T}(; sizehint = length(iter)*3 >>> 0x01)
else
d = HashDictionary{I, T}()
end
Expand All @@ -128,85 +86,61 @@ function _dictionary(::Type{Pair{I, T}}, iter) where {I, T}
return d
end

## Implementation
# indice

Base.keys(d::HashDictionary) = d.indices
isinsertable(d::HashDictionary) = true
issettable(d::HashDictionary) = true
Base.keys(dict::HashDictionary) = dict.indices

@propagate_inbounds function gettoken(d::HashDictionary{I}, i::I) where {I}
return gettoken(keys(d), i)
end
# tokens

@inline function gettokenvalue(d::HashDictionary, token)
return @inbounds d.values[token]
end
tokenized(dict::HashDictionary) = dict.values

function istokenassigned(d::HashDictionary, token)
return isassigned(d.values, token)
end
# values

@inline function settokenvalue!(d::HashDictionary{I, T}, token, value::T) where {I, T}
@inbounds d.values[token] = value
return d
function istokenassigned(dict::HashDictionary, (_slot, index))
return isassigned(dict.values, index)
end

function gettoken!(d::HashDictionary{T}, key::T) where {T}
indices = keys(d)
(token, values) = _gettoken!(indices, d.values, key)
if token < 0
(token, values) = _insert!(indices, values, key, -token)
d.values = values
return (false, token)
else
d.values = values
return (true, token)
end
@propagate_inbounds function gettokenvalue(dict::HashDictionary, (_slot, index))
return dict.values[index]
end

function Base.copy(d::HashDictionary{I, T}) where {I, T}
return HashDictionary{I, T}(d.indices, copy(d.values), nothing)
issettable(::HashDictionary) = true

@propagate_inbounds function settokenvalue!(dict::HashDictionary{<:Any, T}, (_slot, index), value::T) where {T}
dict.values[index] = value
return dict
end

tokenized(d::HashDictionary) = d.values
# insertion

function Base.empty!(d::HashDictionary)
empty!(d.indices)
empty!(d.values)
resize!(d.values, length(keys(d).slots))
return d
end
isinsertable(::HashDictionary) = true

function deletetoken!(d::HashDictionary{I, T}, token) where {I, T}
deletetoken!(keys(d), token)
isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), d.values, token-1)
return d
function gettoken!(dict::HashDictionary{I}, i::I) where {I}
(hadtoken, (slot, index)) = gettoken!(keys(dict), i)
if !hadtoken
resize!(dict.values, length(dict.values) + 1)
end
return (hadtoken, (slot, index))
end

function Base.sizehint!(d::HashDictionary, sz::Int)
d.values = _sizehint!(d.indices, d.values, sz)
return d
function deletetoken!(dict::HashDictionary{I, T}, (slot, index)) where {I, T}
deletetoken!(dict.indices, (slot, index))
isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), dict.values, index-1)
return dict
end

function Base.rehash!(d::HashDictionary, newsz::Int = length(d.inds))
_rehash!(d.indices, d.values, newsz)
return d
end

Base.filter!(pred, d::HashDictionary) = Base.unsafe_filter!(pred, d)
function Base.empty!(dict::HashDictionary{I, T}) where {I, T}
empty!(dict.values)
empty!(dict.indices)

# `HashDictionary` is the default mutable AbstractDictionary
# If given some other index type, create a new `HashIndices` copy of the indices
function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
return similar(HashIndices{I}(indices), T)
return dict
end

# For `HashIndices` we don't copy the indices, we allow the `keys` to remain identical (`===`)
function Base.similar(indices::HashIndices{I}, ::Type{T}) where {I, T}
return HashDictionary{I, T}(indices, undef)
end
# Factories

Base.empty(::AbstractIndices, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()

# `HashDictionary` is the default insertable AbstractDictionary
function Base.empty(::AbstractDictionary, ::Type{I}, ::Type{T}) where {I, T}
return HashDictionary{I, T}()
function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
return HashDictionary(indices, Vector{T}(undef, length(indices)))
end
Loading

0 comments on commit 0708ce4

Please sign in to comment.