From a5d049410d42b70c371a14629aace4dd9fa37967 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Mon, 24 Feb 2020 22:25:36 +1000
Subject: [PATCH 01/20] Implement an ordered hash dictionary

The new implementation preserves insertion order. It may use slightly
more memory overal but is much faster to iterate because the
indices/values are stored densely (performance comparable to `Vector`,
i.e. limited by memory bandwidth). Faster insertion/deletion/resizing
due to less need to call `hash` (hashes are recorded).
---
 src/DenseHashDictionary.jl | 213 ++++++++++++++++
 src/Dictionaries.jl        |  11 +-
 src/HashDictionary.jl      | 241 ++++++++----------
 src/HashIndices.jl         | 503 ++++++++++++++++++-------------------
 src/OldHashDictionary.jl   | 173 +++++++++++++
 src/OldHashIndices.jl      | 332 ++++++++++++++++++++++++
 src/insertion.jl           |  44 ++--
 7 files changed, 1099 insertions(+), 418 deletions(-)
 create mode 100644 src/DenseHashDictionary.jl
 create mode 100644 src/OldHashDictionary.jl
 create mode 100644 src/OldHashIndices.jl

diff --git a/src/DenseHashDictionary.jl b/src/DenseHashDictionary.jl
new file mode 100644
index 0000000..23efffa
--- /dev/null
+++ b/src/DenseHashDictionary.jl
@@ -0,0 +1,213 @@
+export DenseHashIndices, DenseHashDictionary
+
+perfect_hash(::Any) = false
+perfect_hash(::Union{Bool, UInt8, UInt16, UInt32, UInt, Int8, Int16, Int32, Int, Char}) = true
+
+struct DenseHashIndices{I} <: AbstractIndices{I}
+    # The hash table
+    slots::Vector{Int}
+
+    # Densely ordered hashes and values
+    hashes::Vector{UInt}
+    values::Vector{I}
+end
+
+function DenseHashIndices{I}(; sizehint = 8) where {I}
+    @assert sizehint > 0
+    DenseHashIndices{I}(fill(0, sizehint), Vector{UInt}(), Vector{I}())
+end
+
+function rehash!(indices::DenseHashIndices{I}, newsize::Integer) where {I}
+    slots = resize!(indices.slots, newsize)
+    fill!(slots, 0)
+    bit_mask = newsize - one(typeof(newsize)) # newsize is a power of two
+    
+    for (index, full_hash) in enumerate(indices.hashes)
+        trial_slot = reinterpret(Int, full_hash) & bit_mask
+        @inbounds while true
+            trial_slot = (trial_slot + 1)
+            if slots[trial_slot] == 0
+                slots[trial_slot] = index
+                break
+            else
+                trial_slot = trial_slot & bit_mask
+            end
+            # This is potentially an infinte loop and care must be taken by the callee not
+            # to overfill the container
+        end
+    end
+end
+
+Base.length(indices::DenseHashIndices) = length(indices.values)
+
+# Token interface
+istokenizable(::DenseHashIndices) = true
+
+tokentype(::DenseHashIndices) = Int
+
+# Duration iteration the token cannot be used for deletion - we do not worry about the slots
+@propagate_inbounds function iteratetoken(indices::DenseHashIndices)
+    if isempty(indices.values)
+        return nothing
+    end
+    return ((0, 1), 1)
+end
+
+@propagate_inbounds function iteratetoken(indices::DenseHashIndices, index::Int)
+    if index == length(indices.values)
+        return nothing
+    end
+    index = index + 1
+    return ((0, index), index)
+end
+
+
+function gettoken(indices::DenseHashIndices{I}, i::I) where {I}
+    full_hash = hash(i)
+    n_slots = length(indices.slots)
+    bit_mask = n_slots - 1 # n_slots is always a power of two
+
+    trial_slot = reinterpret(Int, full_hash) & bit_mask
+    @inbounds while true
+        trial_slot = (trial_slot + 1)
+        trial_index = indices.slots[trial_slot]
+        if trial_index == 0
+            return (false, (0, 0))
+        end
+
+        if full_hash == indices.hashes[trial_index] && (perfect_hash(i) || isequal(i, indices.values[trial_index]))
+            return (true, (trial_slot, trial_index))
+        end
+
+        trial_slot = trial_slot & bit_mask
+        # This is potentially an infinte loop and care must be taken upon insertion not
+        # to completely fill the container
+    end
+end
+
+@propagate_inbounds function gettokenvalue(indices::DenseHashIndices, (_slot, index))
+    return indices.values[index]
+end
+
+# Insertion interface
+isinsertable(::DenseHashIndices) = true
+
+function gettoken!(indices::DenseHashIndices{I}, i::I) where {I}
+    full_hash = hash(i)
+    n_slots = length(indices.slots)
+    bit_mask = n_slots - 1 # n_slots is always a power of two
+    n_values = length(indices.values)
+
+    trial_slot = reinterpret(Int, full_hash) & bit_mask
+    trial_index = 0
+    @inbounds while true
+        trial_slot = (trial_slot + 1)
+        trial_index = indices.slots[trial_slot]
+        if trial_index <= 0
+            indices.slots[trial_slot] = n_values + 1
+            break
+        end
+
+        trial_hash = indices.hashes[trial_index]
+
+        if trial_hash == full_hash && (perfect_hash(i) || isequal(i, indices.values[trial_index]))
+            return (true, (trial_slot, trial_index))
+        end
+
+        trial_slot = trial_slot & bit_mask
+        # This is potentially an infinte loop and care must be taken upon insertion not
+        # to completely fill the container
+    end
+
+    push!(indices.hashes, full_hash)
+    push!(indices.values, i)
+    
+    # Expand the hash map when it reaches 2/3rd full
+    if 3 * (n_values + 1) > 2 * n_slots
+        # Grow faster for small hash maps than for large ones
+        rehash!(indices, n_slots > 16000 ? 2 * n_slots : 4 * n_slots)
+    end
+
+    return (false, (trial_slot, n_values + 1))
+end
+
+@propagate_inbounds function deletetoken!(indices::DenseHashIndices, (slot, index))
+    indices.slots[slot] = -1
+    splice!(indices.hashes, index)
+    splice!(indices.values, index)
+
+    # Shrink the hash map when it is less than 1/4th full
+    n_slots = length(indices.slots)
+    n_values = length(indices.values)
+    if 4 * n_values < n_slots && n_slots > 8
+        # Halve each time
+        rehash!(indices, n_slots >> 0x01)
+    end
+
+    return indices
+end
+
+# Factories
+
+Base.empty(::DenseHashIndices, ::Type{I}) where {I} = DenseHashIndices{I}()
+
+########
+
+struct DenseHashDictionary{I, T} <: AbstractDictionary{I, T}
+    indices::DenseHashIndices{I}
+    values::Vector{T}
+end
+
+function DenseHashDictionary{I, T}(; sizehint = 8) where {I, T}
+    DenseHashDictionary{I, T}(DenseHashIndices{I}(; sizehint = sizehint), Vector{T}())
+end
+
+# indices
+
+Base.keys(dict::DenseHashDictionary) = dict.indices
+
+# tokens
+
+tokenized(dict::DenseHashDictionary) = dict.values
+
+# values
+
+function istokenassigned(dict::DenseHashDictionary, (_slot, index))
+    return isassigned(dict.values, index)
+end
+
+@propagate_inbounds function gettokenvalue(dict::DenseHashDictionary, (_slot, index))
+    return dict.values[index]
+end
+
+issettable(::DenseHashDictionary) = true
+
+@propagate_inbounds function settokenvalue!(dict::DenseHashDictionary{<:Any, T}, (_slot, index), value::T) where {T}
+    dict.values[index] = value
+    return dict
+end
+
+# insertion
+
+function gettoken!(dict::DenseHashDictionary{I}, i::I) where {I}
+    (hadtoken, (slot, index)) = gettoken!(keys(dict), i)
+    if !hadtoken
+        resize!(dict.values, length(dict.values) + 1)
+    end
+    return (hadtoken, (slot, index))
+end
+
+function deletetoken!(dict::DenseHashDictionary, (slot, index))
+    deletetoken!(dict.indices, (slot, index))
+    splice!(dict.values, index)
+    return dict
+end
+
+
+# Factories
+
+Base.empty(::DenseHashIndices, ::Type{I}, ::Type{T}) where {I, T} = DenseHashDictionary{I, T}()
+
+function Base.similar(indices::DenseHashIndices{I}, ::Type{T}) where {I, T}
+    return DenseHashDictionary(indices, Vector{T}(undef, length(indices)))
+end
diff --git a/src/Dictionaries.jl b/src/Dictionaries.jl
index 4739063..d12d217 100644
--- a/src/Dictionaries.jl
+++ b/src/Dictionaries.jl
@@ -6,7 +6,7 @@ using Indexing
 
 using Base: @propagate_inbounds, Callable
 
-export getindices, setindices!, mapview
+export getindices, setindices!
 
 export AbstractDictionary, AbstractIndices, IndexError, Indices, HashIndices, HashDictionary, Dictionary, MappedDictionary, DictionaryView, FilteredDictionary, FilteredIndices, BroadcastedDictionary
 
@@ -35,13 +35,18 @@ include("HashIndices.jl")
 include("HashDictionary.jl")
 include("MappedDictionary.jl")
 
+include("OldHashIndices.jl")
+include("OldHashDictionary.jl")
+
 end # module
 
 # # TODO
 #
-# * Improved printing - don't calculate length (beyond some cutoff) if it is `SizeUnknown` and limit=true.
+# * Improved printing - don't calculate length (beyond some cutoff) if it is `SizeUnknown` and limit=true, fix indentiation problems for wider values
 # * `hash` and `isless`
-# * A manually-ordered dictionary would be quite useful, like [OrderedCollections.jl](https://github.com/JuliaCollections/OrderedCollections.jl).
+# * TODO: have `delete!` return next element, `deletetoken!` return next token.
+#   For these kinds of algorithms, probably need: firstindex, firsttoken, nextind, prevind,
+#   nexttoken, prevtoken, lastindex, lasttoken.
 # * A surface interface for updates like https://github.com/JuliaLang/julia/pull/31367
 # * Soon we will have the concept of "ordered" indices/sets (sort-based dictionaries and
 #   B-trees). We can probably formalize an interface around a trait here. Certain operations
diff --git a/src/HashDictionary.jl b/src/HashDictionary.jl
index 2eda590..cd5da27 100644
--- a/src/HashDictionary.jl
+++ b/src/HashDictionary.jl
@@ -1,102 +1,60 @@
-mutable struct HashDictionary{I,T} <: AbstractDictionary{I, T}
+struct HashDictionary{I, T} <: AbstractDictionary{I, T}
     indices::HashIndices{I}
     values::Vector{T}
 
-    HashDictionary{I, T}(indices::HashIndices{I}, values::Vector{T}, ::Nothing) where {I, T} = new(indices, values)
+    function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}) where {I, T}
+        return new(inds, values)
+    end
 end
 
-"""
-    HashDictionary{I, T}()
+HashDictionary(; sizehint = 8) = HashDictionary{Any, Any}(; sizehint = sizehint)
+HashDictionary{I}(; sizehint = 8) where {I} = HashDictionary{I, Any}(; sizehint = sizehint)
 
-Construct an empty `HashDictionary` with index type `I` and element type `T`. This type of
-dictionary uses hashes for fast lookup and insertion, and is both mutable and insertable.
-(See `issettable` and `isinsertable`).
-"""
-function HashDictionary{I, T}(; sizehint::Int = 16) where {I, T}
-    indices = HashIndices{I}(; sizehint=sizehint)
-    HashDictionary{I, T}(indices, Vector{T}(undef, length(indices.slots)), nothing)
+function HashDictionary{I, T}(; sizehint = 8) where {I, T}
+    HashDictionary{I, T}(HashIndices{I}(; sizehint = sizehint), Vector{T}())
 end
-HashDictionary{I}() where {I} = HashDictionary{I, Any}()
-HashDictionary() = HashDictionary{Any}()
 
-"""
-    HashDictionary{I, T}(indices, undef::UndefInitializer)
-
-Construct a `HashDictionary` with index type `I` and element type `T`. The container is
-initialized with `keys` that match the values of `indices`, but the values are unintialized.
-"""
-function HashDictionary{I, T}(indices, ::UndefInitializer) where {I, T} 
-    return HashDictionary{I, T}(HashIndices{I}(indices), undef)
+function HashDictionary(inds, values)
+    return HashDictionary(HashIndices(inds), values)
 end
 
-function HashDictionary{I, T}(h::HashIndices{I}, ::UndefInitializer) where {I, T}
-    return HashDictionary{I, T}(h, Vector{T}(undef, length(h.slots)), nothing)
+function HashDictionary(inds::HashIndices{I}, values) where {I}
+    return HashDictionary{I}(inds, values)
 end
 
-function HashDictionary{I, T}(indices::HashIndices{I}, values) where {I, T}
-    vals = Vector{T}(undef, length(indices.slots))
-    d = HashDictionary{I, T}(indices, vals, nothing)
-
-    @inbounds for (i, v) in zip(tokens(indices), values)
-        vals[i] = v
-    end
-
-    return d
+function HashDictionary{I}(inds, values) where {I}
+    return HashDictionary{I}(HashIndices{I}(inds), values)
 end
 
-"""
-    HashDictionary(indices, values)
-    HashDictionary{I}(indices, values)
-    HashDictionary{I, T}(indices, values)
-
-Construct a `HashDictionary` with indices from `indices` and values from `values`, matched
-in iteration order.
-"""
-function HashDictionary{I, T}(indices, values) where {I, T}
-    iter_size = Base.IteratorSize(indices)
-    if iter_size isa Union{Base.HasLength, Base.HasShape}
-        d = HashDictionary{I, T}(; sizehint = length(indices)*2)
-    else
-        d = HashDictionary{I, T}()
-    end
-
-    for (i, v) in zip(indices, values)
-        insert!(d, i, v)
-    end
-
-    return d
-end
-function HashDictionary{I}(indices, values) where {I}
+function HashDictionary{I}(inds::HashIndices{I}, values) where {I}
     if Base.IteratorEltype(values) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
         values = collect(values)
     end
+    
+    return HashDictionary{I, eltype(values)}(inds, values)
+end
 
-    return HashDictionary{I, eltype(values)}(indices, values)
+function HashDictionary{I, T}(inds, values) where {I, T}
+    return HashDictionary{I, T}(HashIndices{I}(inds), values)
 end
 
-function HashDictionary(indices, values)
-    if Base.IteratorEltype(indices) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
-        indices = collect(indices)
+function HashDictionary{I, T}(inds::HashIndices{I}, values) where {I, T}
+    iter_size = Base.IteratorSize(values)
+    if iter_size isa Union{Base.HasLength, Base.HasShape}
+        vs = Vector{T}(undef, length(values))
+        @inbounds for (i, v) in enumerate(values)
+            vs[i] = v
+        end
+        return HashDictionary{I, T}(inds, vs)
+    else
+        vs = Vector{T}()
+        for v in values
+            push!(vs, v)
+        end
+        return HashDictionary{I, T}(inds, vs)
     end
-
-    return HashDictionary{eltype(indices)}(indices, values)
 end
 
-"""
-    HashDictionary(dict::AbstractDictionary)
-    HashDictionary{I}(dict::AbstractDictionary)
-    HashDictionary{I, T}(dict::AbstractDictionary)
-
-Construct a copy of `dict` with the same keys and values.
-
-(For copying an `AbstractDict` or other iterable of `Pair`s, see `dictionary`).
-"""
-HashDictionary(dict::AbstractDictionary) = HashDictionary(keys(dict), dict)
-HashDictionary{I}(dict::AbstractDictionary) where {I} = HashDictionary{I}(keys(dict), dict)
-HashDictionary{I, T}(dict::AbstractDictionary) where {I, T} = HashDictionary{I, T}(keys(dict), dict)
-
 """
     dictionary(iter)
 
@@ -116,97 +74,98 @@ dictionary(p1::Pair, p2::Pair...) = dictionary((p1, p2...))
 function _dictionary(::Type{Pair{I, T}}, iter) where {I, T}
     iter_size = Base.IteratorSize(iter)
     if iter_size isa Union{Base.HasLength, Base.HasShape}
-        d = HashDictionary{I, T}(; sizehint = length(iter)*2)
+        n = length(iter)
+        inds = Vector{I}(undef, n)
+        vals = Vector{T}(undef, n)
+        j = 1
+        @inbounds for (i, v) in iter
+            inds[j] = i
+            vals[j] = v
+            j += 1
+        end
+        return HashDictionary{I, T}(inds, vals)
     else
-        d = HashDictionary{I, T}()
+        inds = Vector{I}()
+        vals = Vector{T}()
+        @inbounds for (i, v) in iter
+            push!(inds, i)
+            push!(vals, v)
+        end        
+        return HashDictionary{I, T}(inds, vals)
     end
+end
 
-    for (i, v) in iter
-        insert!(d, i, v)
-    end
+# indices
 
-    return d
-end
+Base.keys(dict::HashDictionary) = dict.indices
 
-## Implementation
+# tokens
 
-Base.keys(d::HashDictionary) = d.indices
-isinsertable(d::HashDictionary) = true
-issettable(d::HashDictionary) = true
+tokenized(dict::HashDictionary) = dict.values
 
-@propagate_inbounds function gettoken(d::HashDictionary{I}, i::I) where {I}
-    return gettoken(keys(d), i)
-end
+# values
 
-@inline function gettokenvalue(d::HashDictionary, token)
-    return @inbounds d.values[token]
+function istokenassigned(dict::HashDictionary, (_slot, index))
+    return isassigned(dict.values, index)
 end
 
-function istokenassigned(d::HashDictionary, token)
-    return isassigned(d.values, token)
+@propagate_inbounds function gettokenvalue(dict::HashDictionary, (_slot, index))
+    return dict.values[index]
 end
 
-@inline function settokenvalue!(d::HashDictionary{I, T}, token, value::T) where {I, T}
-    @inbounds d.values[token] = value
-    return d
-end
+issettable(::HashDictionary) = true
 
-function gettoken!(d::HashDictionary{T}, key::T) where {T}
-    indices = keys(d)
-    (token, values) = _gettoken!(indices, d.values, key)
-    if token < 0
-        (token, values) = _insert!(indices, values, key, -token)
-        d.values = values
-        return (false, token)
-    else
-        d.values = values
-        return (true, token)
-    end 
+@propagate_inbounds function settokenvalue!(dict::HashDictionary{<:Any, T}, (_slot, index), value::T) where {T}
+    dict.values[index] = value
+    return dict
 end
 
-function Base.copy(d::HashDictionary{I, T}) where {I, T}
-    return HashDictionary{I, T}(d.indices, copy(d.values), nothing)
-end
+# insertion
 
-tokenized(d::HashDictionary) = d.values
+isinsertable(::HashDictionary) = true
 
-function Base.empty!(d::HashDictionary)
-    empty!(d.indices)
-    empty!(d.values)
-    resize!(d.values, length(keys(d).slots))
-    return d
+function gettoken!(dict::HashDictionary{I}, i::I) where {I}
+    (hadtoken, (slot, index)) = gettoken!(keys(dict), i, (dict.values,))
+    return (hadtoken, (slot, index))
 end
 
-function deletetoken!(d::HashDictionary{I, T}, token) where {I, T}
-    deletetoken!(keys(d), token)
-    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), d.values, token-1)
-    return d
+function deletetoken!(dict::HashDictionary{I, T}, (slot, index)) where {I, T}
+    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), dict.values, index-1)
+    deletetoken!(dict.indices, (slot, index), (dict.values,))
+    return dict
 end
 
-function Base.sizehint!(d::HashDictionary, sz::Int)
-    d.values = _sizehint!(d.indices, d.values, sz)
-    return d
-end
 
-function Base.rehash!(d::HashDictionary, newsz::Int = length(d.inds))
-    _rehash!(d.indices, d.values, newsz)
-    return d
-end
+function Base.empty!(dict::HashDictionary{I, T}) where {I, T}
+    empty!(dict.values)
+    empty!(dict.indices)
 
-Base.filter!(pred, d::HashDictionary) = Base.unsafe_filter!(pred, d)
+    return dict
+end
 
-# `HashDictionary` is the default mutable AbstractDictionary
-# If given some other index type, create a new `HashIndices` copy of the indices
-function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
-    return similar(HashIndices{I}(indices), T)
+function Base.filter!(pred, dict::HashDictionary)
+    indices = keys(dict)
+    _filter!(i -> pred(@inbounds dict.values[i]), keys(indices.values), indices.values, indices.hashes, (dict.values,))
+    indices.deleted = 0
+    newsize = Base._tablesz(3*length(indices.values) >> 0x01)
+    rehash!(indices, newsize, (dict.values,))
+    return dict
 end
 
-# For `HashIndices` we don't copy the indices, we allow the `keys` to remain identical (`===`)
-function Base.similar(indices::HashIndices{I}, ::Type{T}) where {I, T}
-    return HashDictionary{I, T}(indices, undef)
+function Base.filter!(pred, dict::PairDictionary{<:Any, <:Any, <:HashDictionary})
+    d = dict.d
+    indices = keys(d)
+    _filter!(i -> pred(@inbounds indices.values[i] => d.values[i]), keys(indices.values), indices.values, indices.hashes, (d.values,))
+    indices.deleted = 0
+    newsize = Base._tablesz(3*length(indices.values) >> 0x01)
+    rehash!(indices, newsize, (d.values,))
+    return dict
 end
 
-# `HashDictionary` is the default insertable AbstractDictionary
-function Base.empty(::AbstractDictionary, ::Type{I}, ::Type{T}) where {I, T}
-    return HashDictionary{I, T}()
+# Factories
+
+Base.empty(::AbstractIndices, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()
+
+function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
+    return HashDictionary(indices, Vector{T}(undef, length(indices)))
 end
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 14ac3d0..72596fe 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -1,31 +1,23 @@
-# These can be changed, to trade off better performance for space
-const global maxallowedprobe = 16
-const global maxprobeshift   = 6
-
-mutable struct HashIndices{T} <: AbstractIndices{T}
-    slots::Array{UInt8,1}
-    inds::Array{T,1}
-    ndel::Int
-    count::Int
-    idxfloor::Int  # an index <= the indices of all used slots
-    maxprobe::Int
-end
+const hash_mask = typemax(UInt) >>> 0x01
+const deletion_mask = hash_mask + 0x01
 
-HashIndices() = HashIndices{Any}()
+mutable struct HashIndices{I} <: AbstractIndices{I}
+    # The hash table
+    slots::Vector{Int}
 
-"""
-    HashIndices{I}()
+    # Hashes and values
+    hashes::Vector{UInt} # Deletion marker stored in high bit
+    values::Vector{I}
 
-Construct an empty `HashIndices` with indices of type `I`. This container uses hashes for
-fast lookup, and is insertable. (See `isinsertable`).
-"""
-function HashIndices{T}(; sizehint::Int = 16) where {T}
-    sz = Base._tablesz(sizehint)
-    HashIndices{T}(zeros(UInt8, sz), Vector{T}(undef, sz), 0, 0, 1, 0)
+    deleted::Int
 end
 
+HashIndices(; sizehint = 8) = HashIndices{Any}(; sizehint = sizehint)
 
-## Constructors
+function HashIndices{I}(; sizehint = 8) where {I}
+    newsize = Base._tablesz((3 * sizehint) >> 0x01);
+    HashIndices{I}(fill(0, sizehint), Vector{UInt}(), Vector{I}(), 0)
+end
 
 """
     HashIndices(iter)
@@ -42,294 +34,297 @@ function HashIndices(iter)
     return HashIndices{eltype(iter)}(iter)
 end
 
-function HashIndices{T}(iter) where {T}
+function HashIndices{I}(iter) where {I}
     iter_size = Base.IteratorSize(iter)
     if iter_size isa Union{Base.HasLength, Base.HasShape}
-        h = HashIndices{T}(; sizehint = length(iter)*2)
+        values = Vector{I}(undef, length(iter))
+        @inbounds for (i, value) in enumerate(iter)
+            values[i] = value
+        end
+        return HashIndices{I}(values)
     else
-        h = HashIndices{T}()
-    end
-
-    for i in iter
-        insert!(h, i) # should this be `set!` or `insert!`?
+        h = HashIndices{I}()
+        for i in iter
+            insert!(h, i)
+        end
+        return h
     end
-
-    return h
-end
-
-function Base.copy(h::HashIndices{T}) where {T}
-    return HashIndices{T}(copy(h.slots), copy(h.inds), h.ndel, h.count, h.idxfloor, h.maxprobe)
 end
 
-Base.empty(::HashIndices, ::Type{T}) where {T} = HashIndices{T}()
-
-
-## Length
-Base.length(h::HashIndices) = h.count
-
-
-## Token interface
-
-istokenizable(::HashIndices) = true
-tokentype(::HashIndices) = Int
-
-@propagate_inbounds isslotempty(h::HashIndices, i::Int) = h.slots[i] == 0x0
-@propagate_inbounds isslotfilled(h::HashIndices, i::Int) = h.slots[i] == 0x1
-@propagate_inbounds isslotdeleted(h::HashIndices, i::Int) = h.slots[i] == 0x2 # deletion marker/tombstone
-
-istokenassigned(h::HashIndices, i::Int) = isslotfilled(h, i)
-
-# iteratetoken
-
-function skip_deleted(h::HashIndices, i)
-    L = length(h.slots)
-    @inbounds while i <= L && !isslotfilled(h, i)
-        i += 1
-    end
-    return i
+function HashIndices{I}(values::Vector{I}) where {I}
+    hashes = map(v -> hash(v) & hash_mask, values)
+    slots = Vector{Int}()
+    out = HashIndices{I}(slots, hashes, values, 0)
+    newsize = Base._tablesz(3*length(values) >> 0x01)
+    rehash!(out, newsize)
+    return out
 end
 
-@propagate_inbounds function iteratetoken(h::HashIndices{T}) where {T}
-    idx = skip_deleted(h, h.idxfloor)
-    h.idxfloor = idx # An optimization to skip unnecessary elements when iterating multiple times
-    
-    if idx > length(h.inds)
-        return nothing
+function Base.copy(indices::HashIndices{I}) where {I}
+    if indices.deleted == 0
+        return HashIndices{I}(copy(indices.slots), copy(indices.hashes), copy(indices.values), 0)
     else
-        return (idx, idx + 1)
+        out = HashIndices{I}(Vector{Int}(), copy(indices.hashes), copy(indices.values), indices.deleted)
+        newsize = Base._tablesz(3*length(indices) >> 0x01)
+        rehash!(out, newsize)
     end
 end
 
-@propagate_inbounds function iteratetoken(h::HashIndices{T}, idx::Int) where {T}
-    idx = skip_deleted(h, idx)
+# private (note that newsize must be power of two)
+function rehash!(indices::HashIndices{I}, newsize::Int, values = (), include_last_values::Bool = true) where {I}
+    slots = resize!(indices.slots, newsize)
+    fill!(slots, 0)
+    bit_mask = newsize - 1 # newsize is a power of two
     
-    if idx > length(h.inds)
-        return nothing
+    if indices.deleted == 0
+        for (index, full_hash) in enumerate(indices.hashes)
+            trial_slot = reinterpret(Int, full_hash) & bit_mask
+            @inbounds while true
+                trial_slot = (trial_slot + 1)
+                if slots[trial_slot] == 0
+                    slots[trial_slot] = index
+                    break
+                else
+                    trial_slot = trial_slot & bit_mask
+                end
+                # This is potentially an infinte loop and care must be taken by the callee not
+                # to overfill the container
+            end
+        end
     else
-        return (idx, idx + 1)
-    end
-end
-
-# gettoken
-function hashtoken(key, sz::Int)
-    # Given key what is the hash slot? sz is a power of two
-    (((hash(key)%Int) & (sz-1)) + 1)::Int
-end
-
-function gettoken(h::HashIndices{T}, key::T) where {T}
-    sz = length(h.inds)
-    iter = 0
-    maxprobe = h.maxprobe
-    token = hashtoken(key, sz)
-    keys = h.inds
-
-    @inbounds while true
-        if isslotempty(h, token)
-            break
+        to_index = Ref(1) # Reassigning to to_index/from_index gives the closure capture boxing issue, so mutate a reference instead
+        from_index = Ref(1)
+        n_values = length(indices.values)
+        @inbounds while from_index[] <= n_values
+            full_hash = indices.hashes[from_index[]]
+            if full_hash & deletion_mask === zero(UInt)
+                trial_slot = reinterpret(Int, full_hash) & bit_mask
+                @inbounds while true
+                    trial_slot = trial_slot + 1
+                    if slots[trial_slot] == 0
+                        slots[trial_slot] = to_index[]
+                        indices.hashes[to_index[]] = indices.hashes[from_index[]]
+                        indices.values[to_index[]] = indices.values[from_index[]]
+                        if include_last_values || from_index[] < n_values
+                            map(values) do (vals)
+                                @inbounds vals[to_index[]] = vals[from_index[]]
+                            end
+                        end
+                        to_index[] += 1
+                        break
+                    else
+                        trial_slot = trial_slot & bit_mask
+                    end
+                end
+            end
+            
+            from_index[] += 1
         end
-        if !isslotdeleted(h, token) && (key === keys[token] || isequal(key, keys[token]))
-            return (true, token)
+    
+        new_size = length(indices.values) - indices.deleted
+        resize!(indices.values, new_size)
+        resize!(indices.hashes, new_size)
+        map(values) do (vals)
+            resize!(vals, new_size)
         end
-
-        token = (token & (sz-1)) + 1
-        iter += 1
-        iter > maxprobe && break
+        indices.deleted = 0
     end
-    return (false, 0)
+    return indices
 end
 
-# gettokenvalue
-@propagate_inbounds function gettokenvalue(h::HashIndices, token::Int)
-    return h.inds[token]
-end
+Base.length(indices::HashIndices) = length(indices.values) - indices.deleted
 
+# Token interface
+istokenizable(::HashIndices) = true
 
-# insertable interface
-isinsertable(::HashIndices) = true
-
-function Base.empty!(h::HashIndices{T}) where {T}
-    fill!(h.slots, 0x0) # It should be OK to reduce this back to some smaller size.
-    sz = length(h.slots)
-    empty!(h.inds)
-    resize!(h.inds, sz)
-    h.ndel = 0
-    h.count = 0
-    h.idxfloor = 1
-    return h
-end
+tokentype(::HashIndices) = Int
 
-function Base.rehash!(h::HashIndices, newsz::Int = length(h.inds))
-    _rehash!(h, nothing, newsz)
-    return h
+# Duration iteration the token cannot be used for deletion - we do not worry about the slots
+@propagate_inbounds function iteratetoken(indices::HashIndices)
+    if indices.deleted == 0
+        return length(indices) > 0 ? ((0, 1), 1) : nothing
+    end
+    index = 1
+    @inbounds while index <= length(indices.hashes)
+        if indices.hashes[index] & deletion_mask === zero(UInt)
+            return ((0, index), index)
+        end
+        index += 1
+    end
+    return nothing
 end
 
-function _rehash!(h::HashIndices{T}, oldv::Union{Nothing, Vector}, newsz::Int) where {T}
-    olds = h.slots
-    oldk = h.inds
-    sz = length(olds)
-    newsz = Base._tablesz(newsz)
-    h.idxfloor = 1
-    if h.count == 0
-        resize!(h.slots, newsz)
-        fill!(h.slots, 0)
-        resize!(h.inds, newsz)
-        error()
-        oldv === nothing || resize!(oldv, newsz)
-        h.ndel = 0
-        return oldv
+@propagate_inbounds function iteratetoken(indices::HashIndices, index::Int)
+    index += 1
+    if indices.deleted == 0 # apparently this is enough to make it iterate as fast as `Vector`
+        return index <= length(indices.values) ? ((0, index), index) : nothing
     end
-
-    slots = zeros(UInt8, newsz)
-    keys = Vector{T}(undef, newsz)
-    vals = oldv === nothing ? nothing : Vector{eltype(oldv)}(undef, newsz)
-    count = 0
-    maxprobe = h.maxprobe
-
-    for i ∈ 1:sz
-        @inbounds if olds[i] == 0x1
-            k = oldk[i]
-            v = vals === nothing ? nothing : oldv[i]
-            index0 = index = hashtoken(k, newsz)
-            while slots[index] != 0
-                index = (index & (newsz-1)) + 1
-            end
-            probe = (index - index0) & (newsz-1)
-            probe > maxprobe && (maxprobe = probe)
-            slots[index] = 0x1
-            keys[index] = k
-            vals === nothing || (vals[index] = v)
-            count += 1
+    @inbounds while index <= length(indices.hashes)
+        if indices.hashes[index] & deletion_mask === zero(UInt)
+            return ((0, index), index)
         end
+        index += 1
     end
+    return nothing
+end
 
-    h.slots = slots
-    h.inds = keys
-    h.count = count
-    h.ndel = 0
-    h.maxprobe = maxprobe
+function gettoken(indices::HashIndices{I}, i::I) where {I}
+    full_hash = hash(i) & hash_mask
+    n_slots = length(indices.slots)
+    bit_mask = n_slots - 1 # n_slots is always a power of two
 
-    return vals
-end
+    trial_slot = reinterpret(Int, full_hash) & bit_mask
+    @inbounds while true
+        trial_slot = (trial_slot + 1)
+        trial_index = indices.slots[trial_slot]
+        if trial_index == 0
+            return (false, (0, 0))
+        end
 
-Base.sizehint!(h::HashIndices, newsz::Int) = _sizehint!(h, nothing, newsz)
+        if full_hash === indices.hashes[trial_index] && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+            return (true, (trial_slot, trial_index))
+        end
 
-function _sizehint!(h::HashIndices{T}, values::Union{Nothing, Vector}, newsz::Int) where {T}
-    oldsz = length(h.slots)
-    if newsz <= oldsz
-        # TODO: shrink
-        # be careful: rehash!() assumes everything fits. it was only designed
-        # for growing.
-        return hash
+        trial_slot = trial_slot & bit_mask
+        # This is potentially an infinte loop and care must be taken upon insertion not
+        # to completely fill the container
     end
-    # grow at least 25%
-    newsz = min(max(newsz, (oldsz*5)>>2),
-                Base.max_values(T))
-    return _rehash!(h, values, newsz)
 end
 
+@propagate_inbounds function gettokenvalue(indices::HashIndices, (_slot, index))
+    return indices.values[index]
+end
 
+# Insertion interface
+isinsertable(::HashIndices) = true
 
-function gettoken!(h::HashIndices{T}, key::T) where {T}
-    (token, _) = _gettoken!(h, nothing, key) # This will make sure a slot is available at `token` (or `-token` if it is new)
-
-    if token < 0
-        @inbounds (token, _) = _insert!(h, nothing, key, -token) # This will fill the slot with `key`
-        return (false, token)
-    else
-        return (true, token)
-    end
-end
+function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
+    full_hash = hash(i) & hash_mask
+    n_slots = length(indices.slots)
+    bit_mask = n_slots - 1 # n_slots is always a power of two
+    n_values = length(indices.values)
+    new_index = n_values + 1
 
-# get the index where a key is stored, or -pos if not present 
-# and the key would be inserted at pos
-# This version is for use by insert!, set! and get!
-function _gettoken!(h::HashIndices{T}, values::Union{Nothing, Vector}, key::T) where {T}
-    sz = length(h.inds)
-    iter = 0
-    maxprobe = h.maxprobe
-    token = hashtoken(key, sz)
-    avail = 0
-    keys = h.inds
-
-    # Search of the key is present or if there is a deleted slot `key` could fill.
+    trial_slot = reinterpret(Int, full_hash) & bit_mask
+    trial_index = 0
+    deleted_slot = 0
     @inbounds while true
-        if isslotempty(h, token)
-            if avail < 0
-                return (avail, values)
-            end
-            return (-token, values)
+        trial_slot = (trial_slot + 1)
+        trial_index = indices.slots[trial_slot]
+        if trial_index == 0
+            break
+        end
+        if trial_index < 0 && deleted_slot == 0
+            deleted_slot = trial_slot
         end
 
-        if isslotdeleted(h, token)
-            if avail == 0
-                # found an available deleted slot, but we need to keep scanning
-                # in case `key` already exists in a later collided slot.
-                avail = -token
-            end
-        elseif key === keys[token] || isequal(key, keys[token])
-            return (token, values)
+        trial_hash = indices.hashes[trial_index]
+
+        if trial_hash === full_hash && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+            return (true, (trial_slot, trial_index))
         end
 
-        token = (token & (sz-1)) + 1
-        iter += 1
-        iter > maxprobe && break
+        trial_slot = trial_slot & bit_mask
+        # This is potentially an infinte loop and care must be taken upon insertion not
+        # to completely fill the container
     end
 
-    avail < 0 && return (avail, values)
-
-    # The key definitely isn't present, but a slot may become available if we increase
-    # `maxprobe` (up to some reasonable global limits).
-    maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
+    if deleted_slot == 0
+        # Use the trail slot
+        indices.slots[trial_slot] = new_index
+    else
+        # Use the deleted slot
+        indices.slots[trial_slot] = new_index
+        indices.deleted -= 1
+    end
+    push!(indices.hashes, full_hash)
+    push!(indices.values, i)
+    map(values) do (vals)
+        resize!(vals, length(vals) + 1)
+    end
     
-    @inbounds while iter < maxallowed
-        if !isslotfilled(h,token)
-            h.maxprobe = iter
-            return (-token, values)
+    # Expand the hash map when it reaches 2/3rd full
+    if 3 * new_index > 2 * n_slots
+        # Grow faster for small hash maps than for large ones
+        newsize = n_slots > 16000 ? 2 * n_slots : 4 * n_slots
+        rehash!(indices, newsize, values, false)
+
+        # The slot almost certainly has changed
+        bit_mask = newsize - 1
+        trial_slot = reinterpret(Int, full_hash) & bit_mask
+        @inbounds while true
+            trial_slot = (trial_slot + 1)
+            if indices.slots[trial_slot] == new_index
+                break
+            end
+            trial_slot = trial_slot & bit_mask
         end
-        token = (token & (sz-1)) + 1
-        iter += 1
+
+        # The index may have changed
+        new_index = length(indices.values)
     end
 
-    # If we get here, then all the probable slots are filled, and the only recourse is to
-    # increase the size of the hash map and try again
-    values = _rehash!(h, values, h.count > 64000 ? sz*2 : sz*4)
-    return _gettoken!(h, values, key)
+    return (false, (trial_slot, new_index))
 end
 
-@propagate_inbounds function _insert!(h::HashIndices{T}, values::Union{Nothing, Vector}, key::T, token::Int) where {T}
-    h.slots[token] = 0x1
-    h.inds[token] = key
-    h.count += 1
-    if token < h.idxfloor
-        h.idxfloor = token
+@propagate_inbounds function deletetoken!(indices::HashIndices{I}, (slot, index), values = ()) where {I}
+    @boundscheck if slot == 0
+        error("Cannot use iteration token for deletion")
     end
-    
-    # TODO revisit this...
-    #=
-    sz = length(h.inds)
-    # Rehash now if necessary
-    if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
-        # > 3/4 deleted or > 2/3 full
-        values = _rehash!(h, values, h.count > 64000 ? h.count*2 : h.count*4)
-        (_, token) = gettoken(h, key)
+    indices.slots[slot] = -index
+    indices.hashes[index] = deletion_mask
+    isbitstype(I) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), indices.values, index-1)
+    indices.deleted += 1
+
+    # Recreate the hash map when 1/3rd of the values are deletions
+    n_slots = length(indices.slots)
+    n_values = length(indices.values) - indices.deleted
+    if 3 * indices.deleted > n_values
+        # Halve if necessary
+        halve = 4 * n_values < n_slots && n_slots > 8
+        rehash!(indices, halve ? n_slots >> 0x01 : n_slots, values)
     end
-    =#
 
-    return (token, values)
+    return indices
 end
 
-
-function deletetoken!(h::HashIndices{T}, token::Int) where {T}
-    h.slots[token] = 0x2
-    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.inds, token-1)
+function Base.empty!(indices::HashIndices{I}) where {I} 
+    indices.hashes = Vector{UInt}()
+    indices.values = Vector{I}()
+    indices.slots = fill(0, 8)
+    indices.deleted = 0
     
-    h.ndel += 1
-    h.count -= 1
-    return h
+    return indices
+end
+
+# Accelerated filtering
+
+function Base.filter!(pred, indices::HashIndices)
+    _filter!(i -> pred(@inbounds indices.values[i]), keys(indices.values), indices.values, indices.hashes, ())
+    indices.deleted = 0
+    newsize = Base._tablesz(3*length(indices.values) >> 0x01)
+    rehash!(indices, newsize)
+end
+
+@inline function _filter!(pred, range, indices, hashes, values = ())
+    n = length(indices)
+    i = Ref(0)
+    j = Ref(0)
+    @inbounds while i[] < n
+        i[] += 1
+        if hashes[i[]] & deletion_mask === zero(UInt) && pred(i[])
+            j[] += 1
+            indices[j[]] = indices[i[]]
+            hashes[j[]] = hashes[i[]]
+            map(vec -> @inbounds(vec[j[]] = vec[i[]]), values)
+        end
+    end
+    newsize = j[]
+    resize!(indices, newsize)
+    resize!(hashes, newsize)
+    map(vec -> resize!(vec, newsize), values)
 end
 
-# Since deleting elements doesn't mess with iteration, we can use `unsafe_filter!``
-Base.filter!(pred, h::HashIndices) = Base.unsafe_filter!(pred, h)
+# Factories
 
-# The default insertable indices
-Base.empty(d::AbstractDictionary, ::Type{T}) where {T} = HashIndices{T}()
+Base.empty(::AbstractIndices, ::Type{I}) where {I} = HashIndices{I}()
diff --git a/src/OldHashDictionary.jl b/src/OldHashDictionary.jl
new file mode 100644
index 0000000..0f293bb
--- /dev/null
+++ b/src/OldHashDictionary.jl
@@ -0,0 +1,173 @@
+mutable struct OldHashDictionary{I,T} <: AbstractDictionary{I, T}
+    indices::OldHashIndices{I}
+    values::Vector{T}
+
+    OldHashDictionary{I, T}(indices::OldHashIndices{I}, values::Vector{T}, ::Nothing) where {I, T} = new(indices, values)
+end
+
+"""
+    OldHashDictionary{I, T}()
+
+Construct an empty `OldHashDictionary` with index type `I` and element type `T`. This type of
+dictionary uses hashes for fast lookup and insertion, and is both mutable and insertable.
+(See `issettable` and `isinsertable`).
+"""
+function OldHashDictionary{I, T}(; sizehint::Int = 16) where {I, T}
+    indices = OldHashIndices{I}(; sizehint=sizehint)
+    OldHashDictionary{I, T}(indices, Vector{T}(undef, length(indices.slots)), nothing)
+end
+OldHashDictionary{I}() where {I} = OldHashDictionary{I, Any}()
+OldHashDictionary() = OldHashDictionary{Any}()
+
+"""
+    OldHashDictionary{I, T}(indices, undef::UndefInitializer)
+
+Construct a `OldHashDictionary` with index type `I` and element type `T`. The container is
+initialized with `keys` that match the values of `indices`, but the values are unintialized.
+"""
+function OldHashDictionary{I, T}(indices, ::UndefInitializer) where {I, T} 
+    return OldHashDictionary{I, T}(OldHashIndices{I}(indices), undef)
+end
+
+function OldHashDictionary{I, T}(h::OldHashIndices{I}, ::UndefInitializer) where {I, T}
+    return OldHashDictionary{I, T}(h, Vector{T}(undef, length(h.slots)), nothing)
+end
+
+function OldHashDictionary{I, T}(indices::OldHashIndices{I}, values) where {I, T}
+    vals = Vector{T}(undef, length(indices.slots))
+    d = OldHashDictionary{I, T}(indices, vals, nothing)
+
+    @inbounds for (i, v) in zip(tokens(indices), values)
+        vals[i] = v
+    end
+
+    return d
+end
+
+"""
+    OldHashDictionary(indices, values)
+    OldHashDictionary{I}(indices, values)
+    OldHashDictionary{I, T}(indices, values)
+
+Construct a `OldHashDictionary` with indices from `indices` and values from `values`, matched
+in iteration order.
+"""
+function OldHashDictionary{I, T}(indices, values) where {I, T}
+    iter_size = Base.IteratorSize(indices)
+    if iter_size isa Union{Base.HasLength, Base.HasShape}
+        d = OldHashDictionary{I, T}(; sizehint = length(indices)*2)
+    else
+        d = OldHashDictionary{I, T}()
+    end
+
+    for (i, v) in zip(indices, values)
+        insert!(d, i, v)
+    end
+
+    return d
+end
+function OldHashDictionary{I}(indices, values) where {I}
+    if Base.IteratorEltype(values) === Base.EltypeUnknown()
+        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
+        values = collect(values)
+    end
+
+    return OldHashDictionary{I, eltype(values)}(indices, values)
+end
+
+function OldHashDictionary(indices, values)
+    if Base.IteratorEltype(indices) === Base.EltypeUnknown()
+        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
+        indices = collect(indices)
+    end
+
+    return OldHashDictionary{eltype(indices)}(indices, values)
+end
+
+"""
+    OldHashDictionary(dict::AbstractDictionary)
+    OldHashDictionary{I}(dict::AbstractDictionary)
+    OldHashDictionary{I, T}(dict::AbstractDictionary)
+
+Construct a copy of `dict` with the same keys and values.
+(For copying an `AbstractDict` or other iterable of `Pair`s, see `dictionary`).
+"""
+OldHashDictionary(dict::AbstractDictionary) = OldHashDictionary(keys(dict), dict)
+OldHashDictionary{I}(dict::AbstractDictionary) where {I} = OldHashDictionary{I}(keys(dict), dict)
+OldHashDictionary{I, T}(dict::AbstractDictionary) where {I, T} = OldHashDictionary{I, T}(keys(dict), dict)
+
+## Implementation
+
+Base.keys(d::OldHashDictionary) = d.indices
+isinsertable(d::OldHashDictionary) = true
+issettable(d::OldHashDictionary) = true
+
+@propagate_inbounds function gettoken(d::OldHashDictionary{I}, i::I) where {I}
+    return gettoken(keys(d), i)
+end
+
+@inline function gettokenvalue(d::OldHashDictionary, token)
+    return @inbounds d.values[token]
+end
+
+function istokenassigned(d::OldHashDictionary, token)
+    return isassigned(d.values, token)
+end
+
+@inline function settokenvalue!(d::OldHashDictionary{I, T}, token, value::T) where {I, T}
+    @inbounds d.values[token] = value
+    return d
+end
+
+function gettoken!(d::OldHashDictionary{T}, key::T) where {T}
+    indices = keys(d)
+    (token, values) = _gettoken!(indices, d.values, key)
+    if token < 0
+        (token, values) = _insert!(indices, values, key, -token)
+        d.values = values
+        return (false, token)
+    else
+        d.values = values
+        return (true, token)
+    end 
+end
+
+function Base.copy(d::OldHashDictionary{I, T}) where {I, T}
+    return OldHashDictionary{I, T}(d.indices, copy(d.values), nothing)
+end
+
+tokenized(d::OldHashDictionary) = d.values
+
+function Base.empty!(d::OldHashDictionary)
+    empty!(d.indices)
+    empty!(d.values)
+    resize!(d.values, length(keys(d).slots))
+    return d
+end
+
+function deletetoken!(d::OldHashDictionary{I, T}, token) where {I, T}
+    deletetoken!(keys(d), token)
+    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), d.values, token-1)
+    return d
+end
+
+function Base.sizehint!(d::OldHashDictionary, sz::Int)
+    d.values = _sizehint!(d.indices, d.values, sz)
+    return d
+end
+
+function Base.rehash!(d::OldHashDictionary, newsz::Int = length(d.inds))
+    _rehash!(d.indices, d.values, newsz)
+    return d
+end
+
+Base.filter!(pred, d::OldHashDictionary) = Base.unsafe_filter!(pred, d)
+
+# For `OldHashIndices` we don't copy the indices, we allow the `keys` to remain identical (`===`)
+function Base.similar(indices::OldHashIndices{I}, ::Type{T}) where {I, T}
+    return OldHashDictionary{I, T}(indices, undef)
+end
+
+function Base.empty(indices::OldHashIndices, ::Type{I}, ::Type{T}) where {I, T}
+    return OldHashDictionary{I, T}()
+end
\ No newline at end of file
diff --git a/src/OldHashIndices.jl b/src/OldHashIndices.jl
new file mode 100644
index 0000000..858fbf3
--- /dev/null
+++ b/src/OldHashIndices.jl
@@ -0,0 +1,332 @@
+# These can be changed, to trade off better performance for space
+const global maxallowedprobe = 16
+const global maxprobeshift   = 6
+
+mutable struct OldHashIndices{T} <: AbstractIndices{T}
+    slots::Array{UInt8,1}
+    inds::Array{T,1}
+    ndel::Int
+    count::Int
+    idxfloor::Int  # an index <= the indices of all used slots
+    maxprobe::Int
+end
+
+OldHashIndices() = OldHashIndices{Any}()
+
+"""
+    OldHashIndices{I}()
+
+Construct an empty `OldHashIndices` with indices of type `I`. This container uses hashes for
+fast lookup, and is insertable. (See `isinsertable`).
+"""
+function OldHashIndices{T}(; sizehint::Int = 16) where {T}
+    sz = Base._tablesz(sizehint)
+    OldHashIndices{T}(zeros(UInt8, sz), Vector{T}(undef, sz), 0, 0, 1, 0)
+end
+
+
+## Constructors
+
+"""
+    OldHashIndices(iter)
+    OldHashIndices{I}(iter)
+
+Construct a `OldHashIndices` with indices from iterable container `iter`.
+"""
+function OldHashIndices(iter)
+    if Base.IteratorEltype(iter) === Base.EltypeUnknown()
+        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
+        iter = collect(iter)
+    end
+
+    return OldHashIndices{eltype(iter)}(iter)
+end
+
+function OldHashIndices{T}(iter) where {T}
+    iter_size = Base.IteratorSize(iter)
+    if iter_size isa Union{Base.HasLength, Base.HasShape}
+        h = OldHashIndices{T}(; sizehint = length(iter)*2)
+    else
+        h = OldHashIndices{T}()
+    end
+
+    for i in iter
+        insert!(h, i) # should this be `set!` or `insert!`?
+    end
+
+    return h
+end
+
+function Base.copy(h::OldHashIndices{T}) where {T}
+    return OldHashIndices{T}(copy(h.slots), copy(h.inds), h.ndel, h.count, h.idxfloor, h.maxprobe)
+end
+
+## Length
+Base.length(h::OldHashIndices) = h.count
+
+
+## Token interface
+
+istokenizable(::OldHashIndices) = true
+tokentype(::OldHashIndices) = Int
+
+@propagate_inbounds isslotempty(h::OldHashIndices, i::Int) = h.slots[i] == 0x0
+@propagate_inbounds isslotfilled(h::OldHashIndices, i::Int) = h.slots[i] == 0x1
+@propagate_inbounds isslotdeleted(h::OldHashIndices, i::Int) = h.slots[i] == 0x2 # deletion marker/tombstone
+
+istokenassigned(h::OldHashIndices, i::Int) = isslotfilled(h, i)
+
+# iteratetoken
+
+function skip_deleted(h::OldHashIndices, i)
+    L = length(h.slots)
+    @inbounds while i <= L && !isslotfilled(h, i)
+        i += 1
+    end
+    return i
+end
+
+@propagate_inbounds function iteratetoken(h::OldHashIndices{T}) where {T}
+    idx = skip_deleted(h, h.idxfloor)
+    h.idxfloor = idx # An optimization to skip unnecessary elements when iterating multiple times
+    
+    if idx > length(h.inds)
+        return nothing
+    else
+        return (idx, idx + 1)
+    end
+end
+
+@propagate_inbounds function iteratetoken(h::OldHashIndices{T}, idx::Int) where {T}
+    idx = skip_deleted(h, idx)
+    
+    if idx > length(h.inds)
+        return nothing
+    else
+        return (idx, idx + 1)
+    end
+end
+
+# gettoken
+function hashtoken(key, sz::Int)
+    # Given key what is the hash slot? sz is a power of two
+    (((hash(key)%Int) & (sz-1)) + 1)::Int
+end
+
+function gettoken(h::OldHashIndices{T}, key::T) where {T}
+    sz = length(h.inds)
+    iter = 0
+    maxprobe = h.maxprobe
+    token = hashtoken(key, sz)
+    keys = h.inds
+
+    @inbounds while true
+        if isslotempty(h, token)
+            break
+        end
+        if !isslotdeleted(h, token) && (key === keys[token] || isequal(key, keys[token]))
+            return (true, token)
+        end
+
+        token = (token & (sz-1)) + 1
+        iter += 1
+        iter > maxprobe && break
+    end
+    return (false, 0)
+end
+
+# gettokenvalue
+@propagate_inbounds function gettokenvalue(h::OldHashIndices, token::Int)
+    return h.inds[token]
+end
+
+
+# insertable interface
+isinsertable(::OldHashIndices) = true
+
+function Base.empty!(h::OldHashIndices{T}) where {T}
+    fill!(h.slots, 0x0) # It should be OK to reduce this back to some smaller size.
+    sz = length(h.slots)
+    empty!(h.inds)
+    resize!(h.inds, sz)
+    h.ndel = 0
+    h.count = 0
+    h.idxfloor = 1
+    return h
+end
+
+function Base.rehash!(h::OldHashIndices, newsz::Int = length(h.inds))
+    _rehash!(h, nothing, newsz)
+    return h
+end
+
+function _rehash!(h::OldHashIndices{T}, oldv::Union{Nothing, Vector}, newsz::Int) where {T}
+    olds = h.slots
+    oldk = h.inds
+    sz = length(olds)
+    newsz = Base._tablesz(newsz)
+    h.idxfloor = 1
+    if h.count == 0
+        resize!(h.slots, newsz)
+        fill!(h.slots, 0)
+        resize!(h.inds, newsz)
+        error()
+        oldv === nothing || resize!(oldv, newsz)
+        h.ndel = 0
+        return oldv
+    end
+
+    slots = zeros(UInt8, newsz)
+    keys = Vector{T}(undef, newsz)
+    vals = oldv === nothing ? nothing : Vector{eltype(oldv)}(undef, newsz)
+    count = 0
+    maxprobe = h.maxprobe
+
+    for i ∈ 1:sz
+        @inbounds if olds[i] == 0x1
+            k = oldk[i]
+            v = vals === nothing ? nothing : oldv[i]
+            index0 = index = hashtoken(k, newsz)
+            while slots[index] != 0
+                index = (index & (newsz-1)) + 1
+            end
+            probe = (index - index0) & (newsz-1)
+            probe > maxprobe && (maxprobe = probe)
+            slots[index] = 0x1
+            keys[index] = k
+            vals === nothing || (vals[index] = v)
+            count += 1
+        end
+    end
+
+    h.slots = slots
+    h.inds = keys
+    h.count = count
+    h.ndel = 0
+    h.maxprobe = maxprobe
+
+    return vals
+end
+
+Base.sizehint!(h::OldHashIndices, newsz::Int) = _sizehint!(h, nothing, newsz)
+
+function _sizehint!(h::OldHashIndices{T}, values::Union{Nothing, Vector}, newsz::Int) where {T}
+    oldsz = length(h.slots)
+    if newsz <= oldsz
+        # TODO: shrink
+        # be careful: rehash!() assumes everything fits. it was only designed
+        # for growing.
+        return hash
+    end
+    # grow at least 25%
+    newsz = min(max(newsz, (oldsz*5)>>2),
+                Base.max_values(T))
+    return _rehash!(h, values, newsz)
+end
+
+
+
+function gettoken!(h::OldHashIndices{T}, key::T) where {T}
+    (token, _) = _gettoken!(h, nothing, key) # This will make sure a slot is available at `token` (or `-token` if it is new)
+
+    if token < 0
+        @inbounds (token, _) = _insert!(h, nothing, key, -token) # This will fill the slot with `key`
+        return (false, token)
+    else
+        return (true, token)
+    end
+end
+
+# get the index where a key is stored, or -pos if not present 
+# and the key would be inserted at pos
+# This version is for use by insert!, set! and get!
+function _gettoken!(h::OldHashIndices{T}, values::Union{Nothing, Vector}, key::T) where {T}
+    sz = length(h.inds)
+    iter = 0
+    maxprobe = h.maxprobe
+    token = hashtoken(key, sz)
+    avail = 0
+    keys = h.inds
+
+    # Search of the key is present or if there is a deleted slot `key` could fill.
+    @inbounds while true
+        if isslotempty(h, token)
+            if avail < 0
+                return (avail, values)
+            end
+            return (-token, values)
+        end
+
+        if isslotdeleted(h, token)
+            if avail == 0
+                # found an available deleted slot, but we need to keep scanning
+                # in case `key` already exists in a later collided slot.
+                avail = -token
+            end
+        elseif key === keys[token] || isequal(key, keys[token])
+            return (token, values)
+        end
+
+        token = (token & (sz-1)) + 1
+        iter += 1
+        iter > maxprobe && break
+    end
+
+    avail < 0 && return (avail, values)
+
+    # The key definitely isn't present, but a slot may become available if we increase
+    # `maxprobe` (up to some reasonable global limits).
+    maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
+    
+    @inbounds while iter < maxallowed
+        if !isslotfilled(h,token)
+            h.maxprobe = iter
+            return (-token, values)
+        end
+        token = (token & (sz-1)) + 1
+        iter += 1
+    end
+
+    # If we get here, then all the probable slots are filled, and the only recourse is to
+    # increase the size of the hash map and try again
+    values = _rehash!(h, values, h.count > 64000 ? sz*2 : sz*4)
+    return _gettoken!(h, values, key)
+end
+
+@propagate_inbounds function _insert!(h::OldHashIndices{T}, values::Union{Nothing, Vector}, key::T, token::Int) where {T}
+    h.slots[token] = 0x1
+    h.inds[token] = key
+    h.count += 1
+    if token < h.idxfloor
+        h.idxfloor = token
+    end
+    
+    # TODO revisit this...
+    #=
+    sz = length(h.inds)
+    # Rehash now if necessary
+    if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
+        # > 3/4 deleted or > 2/3 full
+        values = _rehash!(h, values, h.count > 64000 ? h.count*2 : h.count*4)
+        (_, token) = gettoken(h, key)
+    end
+    =#
+
+    return (token, values)
+end
+
+
+function deletetoken!(h::OldHashIndices{T}, token::Int) where {T}
+    h.slots[token] = 0x2
+    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.inds, token-1)
+    
+    h.ndel += 1
+    h.count -= 1
+    return h
+end
+
+# Since deleting elements doesn't mess with iteration, we can use `unsafe_filter!``
+Base.filter!(pred, h::OldHashIndices) = Base.unsafe_filter!(pred, h)
+
+# The default insertable indices
+Base.empty(d::OldHashIndices, ::Type{T}) where {T} = OldHashIndices{T}()
diff --git a/src/insertion.jl b/src/insertion.jl
index e407fd8..ee45cce 100644
--- a/src/insertion.jl
+++ b/src/insertion.jl
@@ -399,25 +399,27 @@ end
 
 ## Filtering
 
-# `filter!` is basically a programmatic version of `intersect!`. 
-function Base.filter!(pred, indices::AbstractIndices)
-    for i in indices
-        if !pred(i)
-            delete!(indices, i)
-        end
-    end
-    return indices
-end
-
-# Dictionary version is similar
-function Base.filter!(pred, dict::AbstractDictionary)
-    for (i, v) in pairs(dict)
-        if !pred(v)
-            delete!(dict, i)
-        end
-    end
-    return dict
-end
+# These generic implementations are gimped.
+
+# # `filter!` is basically a programmatic version of `intersect!`. 
+# function Base.filter!(pred, indices::AbstractIndices)
+#     for i in copy(indices)
+#         if !pred(i)
+#             delete!(indices, i)
+#         end
+#     end
+#     return indices
+# end
+
+# # Dictionary version is similar
+# function Base.filter!(pred, dict::AbstractDictionary)
+#     for (i, v) in pairs(copy(dict))
+#         if !pred(v)
+#             delete!(dict, i)
+#         end
+#     end
+#     return dict
+# end
 
 # This implementation is faster when deleting indices does not invalidate tokens/iteration,
 # and is opt-in only. Works for both dictionaries and indices
@@ -458,4 +460,6 @@ of type `T` (even when the first argument is are indices). The default container
 Return an empty, insertable `AbstractDictionary` with indices of type `keytype(dict)` and
 elements of type `eltype(inds)`.
 """
-Base.empty(d::AbstractDictionary) = empty(d, keytype(d), eltype(d))
+Base.empty(d::AbstractDictionary) = empty(keys(d), keytype(d), eltype(d))
+
+Base.empty(d::AbstractDictionary, ::Type{I}) where {I} = empty(keys(d), I)
\ No newline at end of file

From 20d634476528d3edae9a62662bdb28fc5b7c497a Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Tue, 10 Mar 2020 22:17:07 +1000
Subject: [PATCH 02/20] Added Benchmark to CI via github workflows

---
 .github/workflows/TagBot.yml    |  11 ++
 .github/workflows/benchmark.yml |  19 ++++
 benchmark/Project.toml          |   4 +
 benchmark/bench_indices.jl      | 190 ++++++++++++++++++++++++++++++++
 benchmark/benchmarks.jl         |  12 ++
 5 files changed, 236 insertions(+)
 create mode 100644 .github/workflows/TagBot.yml
 create mode 100644 .github/workflows/benchmark.yml
 create mode 100644 benchmark/Project.toml
 create mode 100644 benchmark/bench_indices.jl
 create mode 100644 benchmark/benchmarks.jl

diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 0000000..d77d3a0
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,11 @@
+name: TagBot
+on:
+  schedule:
+    - cron: 0 * * * *
+jobs:
+  TagBot:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000..78e3711
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,19 @@
+name: Run benchmarks
+
+on:
+  pull_request:
+
+jobs:
+  Benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: 1.3
+      - name: Install dependencies
+        run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"'
+      - name: Run benchmarks
+        run: julia -e 'using BenchmarkCI; BenchmarkCI.judge()'
+      - name: Print judgement
+        run: julia -e 'using BenchmarkCI; BenchmarkCI.displayjudgement()'
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
new file mode 100644
index 0000000..0963be5
--- /dev/null
+++ b/benchmark/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
new file mode 100644
index 0000000..2005a04
--- /dev/null
+++ b/benchmark/bench_indices.jl
@@ -0,0 +1,190 @@
+module BenchHashIndices
+
+using BenchmarkTools
+using Dictionaries
+
+const suite = BenchmarkGroup()
+
+#sizes = [(8 .^ (0:8))...]
+sizes = [10, 10_000]
+
+function build_set_by_insertion(n)
+    out = Set{Int}()
+    for i in 1:n
+        push!(out, i)
+    end
+    return out
+end
+
+function build_hashindices_by_insertion(n)
+    out = HashIndices{Int}()
+    for i in 1:n
+        insert!(out, i)
+    end
+    return out
+end
+
+function empty_by_deletion(set::Set, n)
+    for i in 1:n
+        delete!(set, i)
+    end
+    return set
+end
+
+function empty_by_deletion(indices::HashIndices, n)
+    for i in 1:n
+        delete!(indices, i)
+    end
+    return indices
+end
+
+function foreachsum(set)
+    count = Ref(0)
+    foreach(x -> count[] += 1, set)
+    return count[]
+end
+
+
+for n in sizes
+    r = 1:n
+    y = n ÷ 2
+    pred1(x) = x != y
+    pred2(x) = x == y
+    vec = collect(r)
+    set = Set(r)
+    indices = Indices(collect(r))
+    hash_indices = HashIndices(r)
+
+    s = suite["constructor ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable Vector($r)
+    s["Set"] = @benchmarkable Set($r)
+    s["Indices"] = @benchmarkable Indices($r)
+    s["HashIndices"] = @benchmarkable HashIndices($r)
+
+    s = suite["build by insertion ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable build_set_by_insertion($n)
+    s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
+
+    #s = suite["empty by deletion ($n)"] = BenchmarkGroup()
+    #s["Set"] = @benchmarkable empty_by_deletion($(Set(r)), $n)
+    #s["HashIndices"] = @benchmarkable empty_by_deletion($(HashIndices(r)), $n)
+    
+    s = suite["in ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable in($y, $vec)
+    s["Set"] = @benchmarkable in($y, $set)
+    s["Indices"] = @benchmarkable in($y, $indices)
+    s["HashIndices"] = @benchmarkable in($y, $hash_indices)
+
+    s = suite["count ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable count(iseven, $vec)
+    s["Set"] = @benchmarkable count(iseven, $set)
+    s["Indices"] = @benchmarkable count(iseven, $indices)
+    s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
+
+    s = suite["sum ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable sum($vec)
+    s["Set"] = @benchmarkable sum($set)
+    s["Indices"] = @benchmarkable sum($indices)
+    s["HashIndices"] = @benchmarkable sum($hash_indices)
+
+    s = suite["foreach ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable foreachsum($vec)
+    s["Set"] = @benchmarkable foreachsum($set)
+    s["Indices"] = @benchmarkable foreachsum($indices)
+    s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
+
+    s = suite["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x)))
+    s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
+    s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x)))
+    s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
+
+    s = suite["filter (most) ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable filter($pred1, $vec)
+    s["Set"] = @benchmarkable filter($pred1, $set)
+    s["Indices"] = @benchmarkable filter($pred1, $indices)
+    s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
+
+    s = suite["filter (half) ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable filter(iseven, $vec)
+    s["Set"] = @benchmarkable filter(iseven, $set)
+    s["Indices"] = @benchmarkable filter(iseven, $indices)
+    s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
+
+    s = suite["filter (few) ($n)"] = BenchmarkGroup()
+    s["Vector"] = @benchmarkable filter($pred2, $vec)
+    s["Set"] = @benchmarkable filter($pred2, $set)
+    s["Indices"] = @benchmarkable filter($pred2, $indices)
+    s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
+
+    # s = suite["filter! (most) ($n)"] = BenchmarkGroup()
+    # s["Vector"] = @benchmarkable filter($pred1, $(collect(r)))
+    # s["Set"] = @benchmarkable filter($pred1, $(Set(r)))
+    # #s["Indices"] = @benchmarkable filter($pred1, $(Indices(collect(r))))
+    # s["HashIndices"] = @benchmarkable filter($pred1, $(HashIndices(r)))
+
+    # s = suite["filter! (half) ($n)"] = BenchmarkGroup()
+    # s["Vector"] = @benchmarkable filter(iseven, $(collect(r)))
+    # s["Set"] = @benchmarkable filter(iseven, $(Set(r)))
+    # #s["Indices"] = @benchmarkable filter(iseven, $(Indices(collect(r))))
+    # s["HashIndices"] = @benchmarkable filter(iseven, $(HashIndices(r)))
+
+    # s = suite["filter! (few) ($n)"] = BenchmarkGroup()
+    # s["Vector"] = @benchmarkable filter!($pred2, $(collect(r)))
+    # s["Set"] = @benchmarkable filter!($pred2, $(Set(r)))
+    # #s["Indices"] = @benchmarkable filter!($pred2, $(Indices(collect(r))))
+    # s["HashIndices"] = @benchmarkable filter!($pred2, $(HashIndices(r)))
+
+    even_set = Set(2:2:n)
+    odd_set = Set(1:2:n)
+    even_hash_indices = HashIndices(2:2:n)
+    odd_hash_indices = HashIndices(1:2:n)
+
+    s = suite["union ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable union($even_set, $odd_set)
+    s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
+
+    s = suite["intersect (empty) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable intersect($even_set, $odd_set)
+    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
+
+    s = suite["intersect (half) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable intersect($even_set, $set)
+    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
+
+    s = suite["intersect (whole) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable intersect($set, $set)
+    s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
+
+    s = suite["setdiff (whole) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
+    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
+
+    s = suite["setdiff (half) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable setdiff($even_set, $set)
+    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
+
+    s = suite["setdiff (empty) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable setdiff($set, $set)
+    s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
+
+    s = suite["symdiff (whole) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
+    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+
+    s = suite["symdiff (left half) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable symdiff($set, $odd_set)
+    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+
+    s = suite["symdiff (right half) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable symdiff($even_set, $set)
+    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+
+    s = suite["symdiff (empty) ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable symdiff($set, $set)
+    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+end
+
+end  # module
+
+BenchHashIndices.suite
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
new file mode 100644
index 0000000..e3b408d
--- /dev/null
+++ b/benchmark/benchmarks.jl
@@ -0,0 +1,12 @@
+# Each file of the form "bench_$(name).jl" in this directory is `include`d and
+# its last statement is assumed to be a `BenchmarkGroup`.  This group is added
+# to the top-level group `SUITE` with the `$name` extracted from the file name.
+
+using BenchmarkTools
+const SUITE = BenchmarkGroup()
+for file in sort(readdir(@__DIR__))
+    if startswith(file, "bench_") && endswith(file, ".jl")
+        SUITE[chop(file, head = length("bench_"), tail = length(".jl"))] =
+            include(file)
+    end
+end

From 7330f19bc289d0cd2956abbf6f1a4dbf292cae32 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Sun, 31 May 2020 15:38:07 +1000
Subject: [PATCH 03/20] Use github actions for tests etc

---
 .github/workflows/CompatHelper.yml | 26 ++++++++++++++
 .github/workflows/test.yml         | 51 ++++++++++++++++++++++++++++
 README.md                          |  2 +-
 benchmark/bench_indices.jl         | 54 ++++++++++++++++--------------
 4 files changed, 106 insertions(+), 27 deletions(-)
 create mode 100644 .github/workflows/CompatHelper.yml
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
new file mode 100644
index 0000000..12aae0b
--- /dev/null
+++ b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,26 @@
+ 
+name: CompatHelper
+
+on:
+  schedule:
+    - cron: '00 00 * * *'
+
+jobs:
+  CompatHelper:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: [1]
+        julia-arch: [x64]
+        os: [ubuntu-latest]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..04ae0cb
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,51 @@
+name: Test
+on:
+  push:
+    branches:
+      - master
+    tags: '*'
+  pull_request:
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    continue-on-error: ${{ matrix.version == 'nightly' }}
+    strategy:
+      matrix:
+        version:
+          - '1.0'
+          - '1.1'
+          - '1.2'
+          - '1.3'
+          - '1.4'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x86
+          - x64
+        exclude:
+          # Remove some configurations from the build matrix to reduce CI time.
+          # See https://github.com/marketplace/actions/setup-julia-environment
+          # MacOS not available on x86
+          - {os: 'macOS-latest', arch: 'x86'}
+          # Don't test on all versions
+          - {os: 'macOS-latest', version: '1.1'}
+          - {os: 'macOS-latest', version: '1.2'}
+          - {os: 'macOS-latest', version: '1.3'}
+          - {os: 'windows-latest', version: '1.1'}
+          - {os: 'windows-latest', version: '1.2'}
+          - {os: 'windows-latest', version: '1.3'}
+    steps:
+      - uses: actions/checkout@v1
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-runtest@latest
+      - uses: julia-actions/julia-uploadcodecov@latest
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/README.md b/README.md
index 8f29a8a..12c0f36 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 *An alternative interface for dictionaries in Julia, for improved productivity and performance*
 
-[![Build Status](https://travis-ci.org/andyferris/Dictionaries.jl.svg?branch=master)](https://travis-ci.org/andyferris/Dictionaries.jl)
+![Test Status](https://github.com/andyferris/Dictionaries.jl/workflows/Test/badge.svg)
 [![Codecov](https://codecov.io/gh/andyferris/Dictionaries.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/andyferris/Dictionaries.jl)
 
 This package is still quite young - new features are being added and some (low-level) interfaces may be tweaked in the future, but things should be stable enough for general usage. Contributions welcome - please submit an issue or PR!
diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 2005a04..7168536 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -6,7 +6,7 @@ using Dictionaries
 const suite = BenchmarkGroup()
 
 #sizes = [(8 .^ (0:8))...]
-sizes = [10, 10_000]
+sizes = [10, 10_000, 10_000_000]
 
 function build_set_by_insertion(n)
     out = Set{Int}()
@@ -55,81 +55,83 @@ for n in sizes
     indices = Indices(collect(r))
     hash_indices = HashIndices(r)
 
-    s = suite["constructor ($n)"] = BenchmarkGroup()
+    suite_n = suite["$n"] = BenchmarkGroup()
+
+    s = suite_n["constructor ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable Vector($r)
     s["Set"] = @benchmarkable Set($r)
     s["Indices"] = @benchmarkable Indices($r)
     s["HashIndices"] = @benchmarkable HashIndices($r)
 
-    s = suite["build by insertion ($n)"] = BenchmarkGroup()
+    s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable build_set_by_insertion($n)
     s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
 
-    #s = suite["empty by deletion ($n)"] = BenchmarkGroup()
+    #s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
     #s["Set"] = @benchmarkable empty_by_deletion($(Set(r)), $n)
     #s["HashIndices"] = @benchmarkable empty_by_deletion($(HashIndices(r)), $n)
     
-    s = suite["in ($n)"] = BenchmarkGroup()
+    s = suite_n["in ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable in($y, $vec)
     s["Set"] = @benchmarkable in($y, $set)
     s["Indices"] = @benchmarkable in($y, $indices)
     s["HashIndices"] = @benchmarkable in($y, $hash_indices)
 
-    s = suite["count ($n)"] = BenchmarkGroup()
+    s = suite_n["count ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable count(iseven, $vec)
     s["Set"] = @benchmarkable count(iseven, $set)
     s["Indices"] = @benchmarkable count(iseven, $indices)
     s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
 
-    s = suite["sum ($n)"] = BenchmarkGroup()
+    s = suite_n["sum ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable sum($vec)
     s["Set"] = @benchmarkable sum($set)
     s["Indices"] = @benchmarkable sum($indices)
     s["HashIndices"] = @benchmarkable sum($hash_indices)
 
-    s = suite["foreach ($n)"] = BenchmarkGroup()
+    s = suite_n["foreach ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable foreachsum($vec)
     s["Set"] = @benchmarkable foreachsum($set)
     s["Indices"] = @benchmarkable foreachsum($indices)
     s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
 
-    s = suite["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
+    s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x)))
     s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
     s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x)))
     s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
 
-    s = suite["filter (most) ($n)"] = BenchmarkGroup()
+    s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable filter($pred1, $vec)
     s["Set"] = @benchmarkable filter($pred1, $set)
     s["Indices"] = @benchmarkable filter($pred1, $indices)
     s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
 
-    s = suite["filter (half) ($n)"] = BenchmarkGroup()
+    s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable filter(iseven, $vec)
     s["Set"] = @benchmarkable filter(iseven, $set)
     s["Indices"] = @benchmarkable filter(iseven, $indices)
     s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
 
-    s = suite["filter (few) ($n)"] = BenchmarkGroup()
+    s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
     s["Vector"] = @benchmarkable filter($pred2, $vec)
     s["Set"] = @benchmarkable filter($pred2, $set)
     s["Indices"] = @benchmarkable filter($pred2, $indices)
     s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
 
-    # s = suite["filter! (most) ($n)"] = BenchmarkGroup()
+    # s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
     # s["Vector"] = @benchmarkable filter($pred1, $(collect(r)))
     # s["Set"] = @benchmarkable filter($pred1, $(Set(r)))
     # #s["Indices"] = @benchmarkable filter($pred1, $(Indices(collect(r))))
     # s["HashIndices"] = @benchmarkable filter($pred1, $(HashIndices(r)))
 
-    # s = suite["filter! (half) ($n)"] = BenchmarkGroup()
+    # s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
     # s["Vector"] = @benchmarkable filter(iseven, $(collect(r)))
     # s["Set"] = @benchmarkable filter(iseven, $(Set(r)))
     # #s["Indices"] = @benchmarkable filter(iseven, $(Indices(collect(r))))
     # s["HashIndices"] = @benchmarkable filter(iseven, $(HashIndices(r)))
 
-    # s = suite["filter! (few) ($n)"] = BenchmarkGroup()
+    # s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
     # s["Vector"] = @benchmarkable filter!($pred2, $(collect(r)))
     # s["Set"] = @benchmarkable filter!($pred2, $(Set(r)))
     # #s["Indices"] = @benchmarkable filter!($pred2, $(Indices(collect(r))))
@@ -140,47 +142,47 @@ for n in sizes
     even_hash_indices = HashIndices(2:2:n)
     odd_hash_indices = HashIndices(1:2:n)
 
-    s = suite["union ($n)"] = BenchmarkGroup()
+    s = suite_n["union ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable union($even_set, $odd_set)
     s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
 
-    s = suite["intersect (empty) ($n)"] = BenchmarkGroup()
+    s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable intersect($even_set, $odd_set)
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
 
-    s = suite["intersect (half) ($n)"] = BenchmarkGroup()
+    s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable intersect($even_set, $set)
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
 
-    s = suite["intersect (whole) ($n)"] = BenchmarkGroup()
+    s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable intersect($set, $set)
     s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
 
-    s = suite["setdiff (whole) ($n)"] = BenchmarkGroup()
+    s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
 
-    s = suite["setdiff (half) ($n)"] = BenchmarkGroup()
+    s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable setdiff($even_set, $set)
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
 
-    s = suite["setdiff (empty) ($n)"] = BenchmarkGroup()
+    s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable setdiff($set, $set)
     s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
 
-    s = suite["symdiff (whole) ($n)"] = BenchmarkGroup()
+    s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
     s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
 
-    s = suite["symdiff (left half) ($n)"] = BenchmarkGroup()
+    s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable symdiff($set, $odd_set)
     s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
 
-    s = suite["symdiff (right half) ($n)"] = BenchmarkGroup()
+    s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable symdiff($even_set, $set)
     s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
 
-    s = suite["symdiff (empty) ($n)"] = BenchmarkGroup()
+    s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable symdiff($set, $set)
     s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
 end

From afa4c382d63229f935767eaa2afc5216a1dffbfb Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Sun, 31 May 2020 21:36:05 +1000
Subject: [PATCH 04/20] Improve CI, add tests, fix a deleted bug

Still doesn't pass tests though...
---
 Project.toml           |  2 +-
 src/HashIndices.jl     | 27 +++++++++---------
 test/HashDictionary.jl | 62 ++++++++++++++++++++++++++++++++++++++++++
 test/HashIndices.jl    | 48 ++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/Project.toml b/Project.toml
index a30d161..4c9cdd9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 authors = ["Andy Ferris <andy.ferris@roames.com.au>"]
 name = "Dictionaries"
 uuid = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4"
-version = "0.2.1"
+version = "0.3.0"
 
 [deps]
 Indexing = "313cdc1a-70c2-5d6a-ae34-0150d3930a38"
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 72596fe..4a7dc63 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -179,10 +179,10 @@ function gettoken(indices::HashIndices{I}, i::I) where {I}
         trial_index = indices.slots[trial_slot]
         if trial_index == 0
             return (false, (0, 0))
-        end
-
-        if full_hash === indices.hashes[trial_index] && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
-            return (true, (trial_slot, trial_index))
+        elseif trial_index > 0
+            if full_hash === indices.hashes[trial_index] && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+                return (true, (trial_slot, trial_index))
+            end    
         end
 
         trial_slot = trial_slot & bit_mask
@@ -213,15 +213,16 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
         trial_index = indices.slots[trial_slot]
         if trial_index == 0
             break
-        end
-        if trial_index < 0 && deleted_slot == 0
-            deleted_slot = trial_slot
-        end
-
-        trial_hash = indices.hashes[trial_index]
+        elseif trial_index < 0
+            if deleted_slot == 0
+                deleted_slot = trial_slot
+            end
+        else
+            trial_hash = indices.hashes[trial_index]
 
-        if trial_hash === full_hash && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
-            return (true, (trial_slot, trial_index))
+            if trial_hash === full_hash && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+                return (true, (trial_slot, trial_index))
+            end
         end
 
         trial_slot = trial_slot & bit_mask
@@ -234,7 +235,7 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
         indices.slots[trial_slot] = new_index
     else
         # Use the deleted slot
-        indices.slots[trial_slot] = new_index
+        indices.slots[deleted_slot] = new_index
         indices.deleted -= 1
     end
     push!(indices.hashes, full_hash)
diff --git a/test/HashDictionary.jl b/test/HashDictionary.jl
index 3b1f664..f1da44d 100644
--- a/test/HashDictionary.jl
+++ b/test/HashDictionary.jl
@@ -106,4 +106,66 @@
     @test isempty(empty!(d))
    
     # TODO token interface
+
+    @testset "Dict tests from Base" begin
+        h = HashDictionary{Int, Int}()
+
+        for i in 1:10000
+            insert!(h, i, i+1)
+        end
+        for i in 1:10000
+            @test h[i] == i+1
+        end
+        for i in 1:2:10000
+            delete!(h, i)
+        end
+        for i in 1:10000
+            if iseven(i)
+                @test h[i] == i+1
+            else
+                @test_throws IndexError h[i]
+            end
+        end
+        for i in 1:2:10000
+            insert!(h, i, i+1)
+        end
+        for i in 1:10000
+            @test h[i] == i+1
+        end
+        for i in 1:10000
+            delete!(h, i)
+        end
+        @test isempty(h)
+        insert!(h, 77, 100)
+        @test h[77] == 100
+        for i in 1:10000
+            set!(h, i, i+1)
+        end
+        for i in 1:10000
+            @test h[i] == i+1
+        end
+        for i in 1:2:10000
+            delete!(h, i)
+        end
+        for i in 1:10000
+            if iseven(i)
+                @test h[i] == i+1
+            else
+                @test_throws IndexError h[i]
+            end
+        end
+        for i in 10001:20000
+            insert!(h, i, i+1)
+        end
+        for i in 1:10000
+            if iseven(i)
+                @test h[i] == i+1
+            else
+                @test_throws IndexError h[i]
+            end
+        end
+        for i in 10000:20000
+            @test h[i] == i+1
+        end
+    end
 end
\ No newline at end of file
diff --git a/test/HashIndices.jl b/test/HashIndices.jl
index b06dfba..9dc15b0 100644
--- a/test/HashIndices.jl
+++ b/test/HashIndices.jl
@@ -46,5 +46,53 @@
     @test all(in(i, h) == iseven(i) for i in 2:1000)
     @test isempty(empty!(h))
 
+    @testset "Adapated from Dict tests from Base" begin
+        h = HashIndices{Int}()
+        N = 10000
+
+        for i in 1:N
+            insert!(h, i)
+        end
+        for i in 1:N
+            @test i in h
+        end
+        for i in 1:2:N
+            delete!(h, i)
+        end
+        for i in 1:N
+            @test (i in h) == iseven(i)
+        end
+        for i in 1:2:N
+            insert!(h, i)
+        end
+        for i in 1:N
+            @test i in h
+        end
+        for i in 1:N
+            delete!(h, i)
+        end
+        @test isempty(h)
+        insert!(h, 77)
+        @test 77 in h
+        for i in 1:N
+            set!(h, i)
+        end
+        for i in 1:N
+            @test i in h
+        end
+        for i in 1:2:N
+            delete!(h, i)
+        end
+        for i in 1:N
+            @test (i in h) == iseven(i)
+        end
+        for i in N+1:2N
+            insert!(h, i)
+        end
+        for i in 1:2N
+            @test (i in h) == i > N || h == iseven(i)
+        end
+    end
+
     # TODO: token interface
 end
\ No newline at end of file

From 0ae4d60be47494c9eafee09f7911c4f06b9bc414 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Mon, 1 Jun 2020 10:33:00 +1000
Subject: [PATCH 05/20] Fix the implementation :)

---
 src/HashIndices.jl  | 55 +++++++++++++++++++++++----------------------
 test/HashIndices.jl |  2 +-
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 4a7dc63..88723ad 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -9,7 +9,7 @@ mutable struct HashIndices{I} <: AbstractIndices{I}
     hashes::Vector{UInt} # Deletion marker stored in high bit
     values::Vector{I}
 
-    deleted::Int
+    holes::Int # Number of "vacant" slots in hashes and values
 end
 
 HashIndices(; sizehint = 8) = HashIndices{Any}(; sizehint = sizehint)
@@ -27,7 +27,6 @@ Construct a `HashIndices` with indices from iterable container `iter`.
 """
 function HashIndices(iter)
     if Base.IteratorEltype(iter) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
         iter = collect(iter)
     end
 
@@ -61,10 +60,10 @@ function HashIndices{I}(values::Vector{I}) where {I}
 end
 
 function Base.copy(indices::HashIndices{I}) where {I}
-    if indices.deleted == 0
+    if indices.holes == 0
         return HashIndices{I}(copy(indices.slots), copy(indices.hashes), copy(indices.values), 0)
     else
-        out = HashIndices{I}(Vector{Int}(), copy(indices.hashes), copy(indices.values), indices.deleted)
+        out = HashIndices{I}(Vector{Int}(), copy(indices.hashes), copy(indices.values), indices.holes)
         newsize = Base._tablesz(3*length(indices) >> 0x01)
         rehash!(out, newsize)
     end
@@ -76,7 +75,7 @@ function rehash!(indices::HashIndices{I}, newsize::Int, values = (), include_las
     fill!(slots, 0)
     bit_mask = newsize - 1 # newsize is a power of two
     
-    if indices.deleted == 0
+    if indices.holes == 0
         for (index, full_hash) in enumerate(indices.hashes)
             trial_slot = reinterpret(Int, full_hash) & bit_mask
             @inbounds while true
@@ -87,11 +86,11 @@ function rehash!(indices::HashIndices{I}, newsize::Int, values = (), include_las
                 else
                     trial_slot = trial_slot & bit_mask
                 end
-                # This is potentially an infinte loop and care must be taken by the callee not
-                # to overfill the container
+                # This is potentially an infinte loop and care must be taken not to overfill the container
             end
         end
     else
+        # Compactify indices.values, indices.hashes and the values while we are at it
         to_index = Ref(1) # Reassigning to to_index/from_index gives the closure capture boxing issue, so mutate a reference instead
         from_index = Ref(1)
         n_values = length(indices.values)
@@ -106,6 +105,9 @@ function rehash!(indices::HashIndices{I}, newsize::Int, values = (), include_las
                         indices.hashes[to_index[]] = indices.hashes[from_index[]]
                         indices.values[to_index[]] = indices.values[from_index[]]
                         if include_last_values || from_index[] < n_values
+                            # Note - the last slot might end up with a random value (or
+                            # GC'd reference). It's the callers responsibility to ensure the
+                            # last slot is written to after this operation.
                             map(values) do (vals)
                                 @inbounds vals[to_index[]] = vals[from_index[]]
                             end
@@ -121,18 +123,18 @@ function rehash!(indices::HashIndices{I}, newsize::Int, values = (), include_las
             from_index[] += 1
         end
     
-        new_size = length(indices.values) - indices.deleted
+        new_size = n_values - indices.holes
         resize!(indices.values, new_size)
         resize!(indices.hashes, new_size)
         map(values) do (vals)
             resize!(vals, new_size)
         end
-        indices.deleted = 0
+        indices.holes = 0
     end
     return indices
 end
 
-Base.length(indices::HashIndices) = length(indices.values) - indices.deleted
+Base.length(indices::HashIndices) = length(indices.values) - indices.holes
 
 # Token interface
 istokenizable(::HashIndices) = true
@@ -141,7 +143,7 @@ tokentype(::HashIndices) = Int
 
 # Duration iteration the token cannot be used for deletion - we do not worry about the slots
 @propagate_inbounds function iteratetoken(indices::HashIndices)
-    if indices.deleted == 0
+    if indices.holes == 0
         return length(indices) > 0 ? ((0, 1), 1) : nothing
     end
     index = 1
@@ -156,7 +158,7 @@ end
 
 @propagate_inbounds function iteratetoken(indices::HashIndices, index::Int)
     index += 1
-    if indices.deleted == 0 # apparently this is enough to make it iterate as fast as `Vector`
+    if indices.holes == 0 # apparently this is enough to make it iterate as fast as `Vector`
         return index <= length(indices.values) ? ((0, index), index) : nothing
     end
     @inbounds while index <= length(indices.hashes)
@@ -203,7 +205,6 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
     n_slots = length(indices.slots)
     bit_mask = n_slots - 1 # n_slots is always a power of two
     n_values = length(indices.values)
-    new_index = n_values + 1
 
     trial_slot = reinterpret(Int, full_hash) & bit_mask
     trial_index = 0
@@ -230,27 +231,30 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
         # to completely fill the container
     end
 
+    new_index = n_values + 1
     if deleted_slot == 0
         # Use the trail slot
         indices.slots[trial_slot] = new_index
     else
         # Use the deleted slot
         indices.slots[deleted_slot] = new_index
-        indices.deleted -= 1
     end
     push!(indices.hashes, full_hash)
     push!(indices.values, i)
     map(values) do (vals)
         resize!(vals, length(vals) + 1)
     end
-    
+
     # Expand the hash map when it reaches 2/3rd full
     if 3 * new_index > 2 * n_slots
         # Grow faster for small hash maps than for large ones
         newsize = n_slots > 16000 ? 2 * n_slots : 4 * n_slots
         rehash!(indices, newsize, values, false)
 
-        # The slot almost certainly has changed
+        # The index has changed
+        new_index = length(indices.values)
+
+        # The slot also has changed
         bit_mask = newsize - 1
         trial_slot = reinterpret(Int, full_hash) & bit_mask
         @inbounds while true
@@ -260,9 +264,6 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
             end
             trial_slot = trial_slot & bit_mask
         end
-
-        # The index may have changed
-        new_index = length(indices.values)
     end
 
     return (false, (trial_slot, new_index))
@@ -275,13 +276,13 @@ end
     indices.slots[slot] = -index
     indices.hashes[index] = deletion_mask
     isbitstype(I) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), indices.values, index-1)
-    indices.deleted += 1
+    indices.holes += 1
 
     # Recreate the hash map when 1/3rd of the values are deletions
-    n_slots = length(indices.slots)
-    n_values = length(indices.values) - indices.deleted
-    if 3 * indices.deleted > n_values
+    n_values = length(indices.values) - indices.holes
+    if 3 * indices.holes > n_values
         # Halve if necessary
+        n_slots = length(indices.slots)
         halve = 4 * n_values < n_slots && n_slots > 8
         rehash!(indices, halve ? n_slots >> 0x01 : n_slots, values)
     end
@@ -293,7 +294,7 @@ function Base.empty!(indices::HashIndices{I}) where {I}
     indices.hashes = Vector{UInt}()
     indices.values = Vector{I}()
     indices.slots = fill(0, 8)
-    indices.deleted = 0
+    indices.holes = 0
     
     return indices
 end
@@ -301,13 +302,13 @@ end
 # Accelerated filtering
 
 function Base.filter!(pred, indices::HashIndices)
-    _filter!(i -> pred(@inbounds indices.values[i]), keys(indices.values), indices.values, indices.hashes, ())
-    indices.deleted = 0
+    _filter!(i -> pred(@inbounds indices.values[i]), indices.values, indices.hashes, ())
+    indices.holes = 0
     newsize = Base._tablesz(3*length(indices.values) >> 0x01)
     rehash!(indices, newsize)
 end
 
-@inline function _filter!(pred, range, indices, hashes, values = ())
+@inline function _filter!(pred, indices, hashes, values = ())
     n = length(indices)
     i = Ref(0)
     j = Ref(0)
diff --git a/test/HashIndices.jl b/test/HashIndices.jl
index 9dc15b0..fec66fa 100644
--- a/test/HashIndices.jl
+++ b/test/HashIndices.jl
@@ -90,7 +90,7 @@
             insert!(h, i)
         end
         for i in 1:2N
-            @test (i in h) == i > N || h == iseven(i)
+            @test (i in h) == (i > N || iseven(i))
         end
     end
 

From 320cd057c1da373f92e3c7bf73b3275d5061f7e5 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Mon, 1 Jun 2020 22:19:21 +1000
Subject: [PATCH 06/20] Fix benchmarks, add union etc.

---
 benchmark/bench_indices.jl | 198 +++++++++++++++++++++++++++----------
 src/AbstractIndices.jl     |  48 +++++++++
 src/Indices.jl             |   5 +
 3 files changed, 200 insertions(+), 51 deletions(-)

diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 7168536..2ed7e5e 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -6,7 +6,16 @@ using Dictionaries
 const suite = BenchmarkGroup()
 
 #sizes = [(8 .^ (0:8))...]
-sizes = [10, 10_000, 10_000_000]
+sizes = [10, 100, 1000, 10_000] #, 10_000, 10_000_000]
+cutoff = 101
+
+function build_vector_by_insertion(n)
+    out = Vector{Int}()
+    for i in 1:n
+        push!(out, i)
+    end
+    return out
+end
 
 function build_set_by_insertion(n)
     out = Set{Int}()
@@ -16,6 +25,14 @@ function build_set_by_insertion(n)
     return out
 end
 
+function build_indices_by_insertion(n)
+    out = Indices{Int}()
+    for i in 1:n
+        insert!(out, i)
+    end
+    return out
+end
+
 function build_hashindices_by_insertion(n)
     out = HashIndices{Int}()
     for i in 1:n
@@ -24,6 +41,21 @@ function build_hashindices_by_insertion(n)
     return out
 end
 
+function build_old_hashindices_by_insertion(n)
+    out = Dictionaries.OldHashIndices{Int}()
+    for i in 1:n
+        insert!(out, i)
+    end
+    return out
+end
+
+function empty_by_deletion(set::Vector, n)
+    for i in 1:n
+        pop!(set)
+    end
+    return set
+end
+
 function empty_by_deletion(set::Set, n)
     for i in 1:n
         delete!(set, i)
@@ -31,7 +63,7 @@ function empty_by_deletion(set::Set, n)
     return set
 end
 
-function empty_by_deletion(indices::HashIndices, n)
+function empty_by_deletion(indices::AbstractIndices, n)
     for i in 1:n
         delete!(indices, i)
     end
@@ -50,141 +82,205 @@ for n in sizes
     y = n ÷ 2
     pred1(x) = x != y
     pred2(x) = x == y
-    vec = collect(r)
+
+    if n < cutoff 
+        vec = collect(r)
+        even_vec = collect(2:2:n)
+        odd_vec = collect(1:2:n)
+    
+        indices = Indices(collect(r))
+        even_indices = Indices(collect(2:2:n))
+        odd_indices = Indices(collect(1:2:n))
+    end
+
     set = Set(r)
-    indices = Indices(collect(r))
+    even_set = Set(2:2:n)
+    odd_set = Set(1:2:n)
+
     hash_indices = HashIndices(r)
+    even_hash_indices = HashIndices(2:2:n)
+    odd_hash_indices = HashIndices(1:2:n)
+
+    old_hash_indices = Dictionaries.OldHashIndices(r)
+    even_old_hash_indices = Dictionaries.OldHashIndices(2:2:n)
+    odd_old_hash_indices = Dictionaries.OldHashIndices(1:2:n)
 
     suite_n = suite["$n"] = BenchmarkGroup()
 
     s = suite_n["constructor ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable Vector($r)
+    n < cutoff && (s["Vector"] = @benchmarkable Vector($r))
     s["Set"] = @benchmarkable Set($r)
-    s["Indices"] = @benchmarkable Indices($r)
+    n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
     s["HashIndices"] = @benchmarkable HashIndices($r)
+    s["OldHashIndices"] = @benchmarkable HashIndices($r)
 
     s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector (push!)"] = @benchmarkable build_vector_by_insertion($n))
     s["Set"] = @benchmarkable build_set_by_insertion($n)
+    n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
     s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
-
-    #s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
-    #s["Set"] = @benchmarkable empty_by_deletion($(Set(r)), $n)
-    #s["HashIndices"] = @benchmarkable empty_by_deletion($(HashIndices(r)), $n)
+    s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
+
+    s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)))
+    s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r))
+    n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))))
+    s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r))
+    s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r))
     
     s = suite_n["in ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable in($y, $vec)
+    n < cutoff && (s["Vector"] = @benchmarkable in($y, $vec))
     s["Set"] = @benchmarkable in($y, $set)
-    s["Indices"] = @benchmarkable in($y, $indices)
+    n < cutoff && (s["Indices"] = @benchmarkable in($y, $indices))
     s["HashIndices"] = @benchmarkable in($y, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable in($y, $old_hash_indices)
 
     s = suite_n["count ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable count(iseven, $vec)
+    n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
     s["Set"] = @benchmarkable count(iseven, $set)
-    s["Indices"] = @benchmarkable count(iseven, $indices)
+    n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
     s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
 
     s = suite_n["sum ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable sum($vec)
+    n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
     s["Set"] = @benchmarkable sum($set)
-    s["Indices"] = @benchmarkable sum($indices)
+    n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
     s["HashIndices"] = @benchmarkable sum($hash_indices)
+    s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
 
     s = suite_n["foreach ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable foreachsum($vec)
+    n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
     s["Set"] = @benchmarkable foreachsum($set)
-    s["Indices"] = @benchmarkable foreachsum($indices)
+    n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
     s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
+    s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
 
     s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x)))
+    n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
     s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
-    s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x)))
+    n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
     s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
+    s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
 
     s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable filter($pred1, $vec)
+    n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
     s["Set"] = @benchmarkable filter($pred1, $set)
-    s["Indices"] = @benchmarkable filter($pred1, $indices)
+    n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
     s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
 
     s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable filter(iseven, $vec)
+    n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
     s["Set"] = @benchmarkable filter(iseven, $set)
-    s["Indices"] = @benchmarkable filter(iseven, $indices)
+    n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
     s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
 
     s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
-    s["Vector"] = @benchmarkable filter($pred2, $vec)
+    n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
     s["Set"] = @benchmarkable filter($pred2, $set)
-    s["Indices"] = @benchmarkable filter($pred2, $indices)
+    n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
     s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
-
-    # s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
-    # s["Vector"] = @benchmarkable filter($pred1, $(collect(r)))
-    # s["Set"] = @benchmarkable filter($pred1, $(Set(r)))
-    # #s["Indices"] = @benchmarkable filter($pred1, $(Indices(collect(r))))
-    # s["HashIndices"] = @benchmarkable filter($pred1, $(HashIndices(r)))
-
-    # s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
-    # s["Vector"] = @benchmarkable filter(iseven, $(collect(r)))
-    # s["Set"] = @benchmarkable filter(iseven, $(Set(r)))
-    # #s["Indices"] = @benchmarkable filter(iseven, $(Indices(collect(r))))
-    # s["HashIndices"] = @benchmarkable filter(iseven, $(HashIndices(r)))
-
-    # s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
-    # s["Vector"] = @benchmarkable filter!($pred2, $(collect(r)))
-    # s["Set"] = @benchmarkable filter!($pred2, $(Set(r)))
-    # #s["Indices"] = @benchmarkable filter!($pred2, $(Indices(collect(r))))
-    # s["HashIndices"] = @benchmarkable filter!($pred2, $(HashIndices(r)))
-
-    even_set = Set(2:2:n)
-    odd_set = Set(1:2:n)
-    even_hash_indices = HashIndices(2:2:n)
-    odd_hash_indices = HashIndices(1:2:n)
+    s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
+
+    s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)))
+    s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))))
+    s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r))
+    s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r))
+
+    s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)))
+    s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))))
+    s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r))
+    s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r))
+
+    s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)))
+    s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))))
+    s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r))
+    s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r))
 
     s = suite_n["union ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
     s["Set"] = @benchmarkable union($even_set, $odd_set)
+    n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
     s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
     s["Set"] = @benchmarkable intersect($even_set, $odd_set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
     s["Set"] = @benchmarkable intersect($even_set, $set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
     s["Set"] = @benchmarkable intersect($set, $set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
     s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
 
     s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
     s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
     s["Set"] = @benchmarkable setdiff($even_set, $set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
     s["Set"] = @benchmarkable setdiff($set, $set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
     s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
 
     s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
     s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
     s["Set"] = @benchmarkable symdiff($set, $odd_set)
-    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
+    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
     s["Set"] = @benchmarkable symdiff($even_set, $set)
-    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
+    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
     s["Set"] = @benchmarkable symdiff($set, $set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
     s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
 end
 
 end  # module
diff --git a/src/AbstractIndices.jl b/src/AbstractIndices.jl
index 84b8a44..6296652 100644
--- a/src/AbstractIndices.jl
+++ b/src/AbstractIndices.jl
@@ -206,3 +206,51 @@ function Base.hash(inds::AbstractIndices, h::UInt)
     
     return hash(hash(UInt === UInt64 ? 0x8955a87bc313a509 : 0xa9cff5d1, h1), h1)
 end
+
+function Base.union(i::AbstractIndices, itr)
+    if isinsertable(i)
+        out = copy(i)
+        union!(out, itr)
+    else
+        out = empty(i)
+        union!(out, i)
+        union!(out, itr)
+    end
+    return out
+end
+
+function Base.intersect(i::AbstractIndices, itr)
+    if isinsertable(i)
+        out = copy(i)
+        intersect!(out, itr)
+    else
+        out = empty(i)
+        intersect!(out, i)
+        intersect!(out, itr)
+    end
+    return out
+end
+
+function Base.setdiff(i::AbstractIndices, itr)
+    if isinsertable(i)
+        out = copy(i)
+        setdiff!(out, itr)
+    else
+        out = empty(i)
+        setdiff!(out, i)
+        setdiff!(out, itr)
+    end
+    return out
+end
+
+function Base.symdiff(i::AbstractIndices, itr)
+    if isinsertable(i)
+        out = copy(i)
+        symdiff!(out, itr)
+    else
+        out = empty(i)
+        symdiff!(out, i)
+        symdiff!(out, itr)
+    end
+    return out
+end
diff --git a/src/Indices.jl b/src/Indices.jl
index f6650ce..0a7855a 100644
--- a/src/Indices.jl
+++ b/src/Indices.jl
@@ -69,3 +69,8 @@ end
 Base.empty(inds::VectorIndices, ::Type{I}) where {I} = Indices{I, Vector{I}}(Vector{I}())
 
 Base.copy(inds::VectorIndices) = Indices(copy(inds.inds))
+
+function Base.filter!(pred, inds::VectorIndices)
+    filter!(pred, inds.inds)
+    return inds
+end
\ No newline at end of file

From 2fe4c7a14aa94616fc9a87181d87f21f4aac032d Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Tue, 2 Jun 2020 10:50:58 +1000
Subject: [PATCH 07/20] Add some `@inbounds`

---
 .github/workflows/benchmark.yml |  2 +-
 benchmark/bench_indices.jl      | 44 ++++++++++++++++-----------------
 src/HashIndices.jl              |  6 ++---
 src/insertion.jl                | 18 +++++++-------
 4 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 78e3711..081759a 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -10,7 +10,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
         with:
-          version: 1.3
+          version: 1.4
       - name: Install dependencies
         run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"'
       - name: Run benchmarks
diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 2ed7e5e..2533ed0 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -6,7 +6,7 @@ using Dictionaries
 const suite = BenchmarkGroup()
 
 #sizes = [(8 .^ (0:8))...]
-sizes = [10, 100, 1000, 10_000] #, 10_000, 10_000_000]
+sizes = [10 ]#, 100, 1000, 10_000] #, 10_000, 10_000_000]
 cutoff = 101
 
 function build_vector_by_insertion(n)
@@ -122,12 +122,12 @@ for n in sizes
     s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
 
     s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)))
-    s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r))
-    n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))))
-    s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r))
-    s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r))
-    
+    n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)) evals=1)
+    s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r)) evals=1
+    n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))) evals=1)
+    s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r)) evals=1
+    s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+
     s = suite_n["in ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable in($y, $vec))
     s["Set"] = @benchmarkable in($y, $set)
@@ -185,25 +185,25 @@ for n in sizes
     s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
 
     s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)))
-    s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r))
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))))
-    s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r))
-    s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r))
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)) evals=1)
+    s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r)) evals=1
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))) evals=1)
+    s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r)) evals=1
+    s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
 
     s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)))
-    s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r))
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))))
-    s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r))
-    s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r))
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)) evals=1)
+    s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r)) evals=1
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))) evals=1)
+    s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r)) evals=1
+    s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
 
     s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)))
-    s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r))
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))))
-    s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r))
-    s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r))
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)) evals=1)
+    s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r)) evals=1
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))) evals=1)
+    s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r)) evals=1
+    s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
 
     s = suite_n["union ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 88723ad..70a99b8 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -179,12 +179,12 @@ function gettoken(indices::HashIndices{I}, i::I) where {I}
     @inbounds while true
         trial_slot = (trial_slot + 1)
         trial_index = indices.slots[trial_slot]
-        if trial_index == 0
-            return (false, (0, 0))
-        elseif trial_index > 0
+        if trial_index > 0
             if full_hash === indices.hashes[trial_index] && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
                 return (true, (trial_slot, trial_index))
             end    
+        elseif trial_index === 0
+            return (false, (0, 0))
         end
 
         trial_slot = trial_slot & bit_mask
diff --git a/src/insertion.jl b/src/insertion.jl
index ee45cce..d9e7f54 100644
--- a/src/insertion.jl
+++ b/src/insertion.jl
@@ -232,9 +232,9 @@ end
 function Base.get!(d::AbstractDictionary{I, T}, i::I, default::T) where {I, T}
     (hadindex, token) = gettoken!(d, i)
     if hadindex
-        return gettokenvalue(d, token)
+        return @inbounds gettokenvalue(d, token)
     else
-        settokenvalue!(d, token, default)
+        @inbounds settokenvalue!(d, token, default)
         return default
     end
 end
@@ -257,10 +257,10 @@ end
 function Base.get!(f::Callable, d::AbstractDictionary{I}, i::I) where {I}
     (hadindex, token) = gettoken!(d, i)
     if hadindex
-        return gettokenvalue(d, token)
+        return @inbounds gettokenvalue(d, token)
     else
         default = f()
-        settokenvalue!(d, token, default)
+        @inbounds settokenvalue!(d, token, default)
         return default
     end
 end
@@ -290,7 +290,7 @@ function Base.delete!(d::AbstractDictionary{I}, i::I) where {I}
     if !hasindex
         throw(IndexError("Index doesn't exist: $i"))
     end
-    deletetoken!(d, token)
+    @inbounds deletetoken!(d, token)
     return d
 end
 
@@ -317,7 +317,7 @@ end
 function unset!(d::AbstractDictionary{I}, i::I) where {I}
     (hasindex, token) = gettoken(d, i)
     if hasindex
-        deletetoken!(d, token)
+        @inbounds deletetoken!(d, token)
     end
     return d
 end
@@ -330,9 +330,9 @@ function Base.merge!(combiner::Callable, d::AbstractDictionary, d2::AbstractDict
     for (i, v) in pairs(d2)
         (hasindex, token) = gettoken!(d, i)
         if hasindex
-            settokenvalue!(d, token, combiner(gettokenvalue(d, token), v))
+            @inbounds settokenvalue!(d, token, combiner(gettokenvalue(d, token), v))
         else
-            settokenvalue!(d, token, v)
+            @inbounds settokenvalue!(d, token, v)
         end
     end
     return d
@@ -391,7 +391,7 @@ function Base.symdiff!(s1::AbstractIndices, s2::AbstractIndices)
     for i in s2
         (hastoken, token) = gettoken!(s1, i)
         if hastoken
-            deletetoken!(s1, token)
+            @inbounds deletetoken!(s1, token)
         end
     end
     return s1

From 33608fdf6cf2c3b6a6a5d343f232530fd2b7c889 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Wed, 3 Jun 2020 10:23:57 +1000
Subject: [PATCH 08/20] Don't compare hashes, benchmarking progress.

---
 benchmark/Project.toml     |   1 +
 benchmark/bench_indices.jl | 665 +++++++++++++++++++++++++++----------
 src/HashIndices.jl         |   9 +-
 3 files changed, 496 insertions(+), 179 deletions(-)

diff --git a/benchmark/Project.toml b/benchmark/Project.toml
index 0963be5..49bfdf4 100644
--- a/benchmark/Project.toml
+++ b/benchmark/Project.toml
@@ -1,4 +1,5 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 2533ed0..0614228 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -2,11 +2,12 @@ module BenchHashIndices
 
 using BenchmarkTools
 using Dictionaries
+using OrderedCollections
 
 const suite = BenchmarkGroup()
 
 #sizes = [(8 .^ (0:8))...]
-sizes = [10 ]#, 100, 1000, 10_000] #, 10_000, 10_000_000]
+sizes = [10, 100, 1000, 10_000] #, 10_000, 10_000_000]
 cutoff = 101
 
 function build_vector_by_insertion(n)
@@ -25,6 +26,14 @@ function build_set_by_insertion(n)
     return out
 end
 
+function build_ordered_set_by_insertion(n)
+    out = OrderedSet{Int}()
+    for i in 1:n
+        push!(out, i)
+    end
+    return out
+end
+
 function build_indices_by_insertion(n)
     out = Indices{Int}()
     for i in 1:n
@@ -63,6 +72,13 @@ function empty_by_deletion(set::Set, n)
     return set
 end
 
+function empty_by_deletion(set::OrderedSet, n)
+    for i in 1:n
+        delete!(set, i)
+    end
+    return set
+end
+
 function empty_by_deletion(indices::AbstractIndices, n)
     for i in 1:n
         delete!(indices, i)
@@ -76,6 +92,261 @@ function foreachsum(set)
     return count[]
 end
 
+function all_in(set, n)
+    out = true
+    for i in 1:n
+        out &= i in set
+    end
+    return out
+end
+
+function not_in(set, n)
+    out = true
+    for i in n+1:2n
+        out &= i in set
+    end
+    return out
+end
+
+function basic_set_test(N)
+    h = Set{Int}()
+    out = true
+    for i in 1:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in 1:2:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:N
+        delete!(h, i)
+    end
+    out &= isempty(h)
+    push!(h, 7)
+    out &= 7 in h
+    for i in 1:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in N+1:2N
+        push!(h, i)
+    end
+    for i in 1:2N
+        out &= (i in h) == (i > N || iseven(i))
+    end
+    return out
+end
+
+function basic_ordered_set_test(N)
+    h = OrderedSet{Int}()
+    out = true
+    for i in 1:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in 1:2:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:N
+        delete!(h, i)
+    end
+    out &= isempty(h)
+    push!(h, 7)
+    out &= 7 in h
+    for i in 1:N
+        push!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in N+1:2N
+        push!(h, i)
+    end
+    for i in 1:2N
+        out &= (i in h) == (i > N || iseven(i))
+    end
+    return out
+end
+
+function basic_indices_test(N)
+    h = Indices{Int}()
+    out = true
+    for i in 1:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in 1:2:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:N
+        delete!(h, i)
+    end
+    out &= isempty(h)
+    insert!(h, 7)
+    out &= 7 in h
+    for i in 1:N
+        set!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in N+1:2N
+        insert!(h, i)
+    end
+    for i in 1:2N
+        out &= (i in h) == (i > N || iseven(i))
+    end
+    return out
+end
+
+function basic_hash_indices_test(N)
+    h = HashIndices{Int}()
+    out = true
+    for i in 1:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in 1:2:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:N
+        delete!(h, i)
+    end
+    out &= isempty(h)
+    insert!(h, 7)
+    out &= 7 in h
+    for i in 1:N
+        set!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in N+1:2N
+        insert!(h, i)
+    end
+    for i in 1:2N
+        out &= (i in h) == (i > N || iseven(i))
+    end
+    return out
+end
+
+function basic_old_hash_indices_test(N)
+    h = Dictionaries.OldHashIndices{Int}()
+    out = true
+    for i in 1:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in 1:2:N
+        insert!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:N
+        delete!(h, i)
+    end
+    out &= isempty(h)
+    insert!(h, 7)
+    out &= 7 in h
+    for i in 1:N
+        set!(h, i)
+    end
+    for i in 1:N
+        out &= i in h
+    end
+    for i in 1:2:N
+        delete!(h, i)
+    end
+    for i in 1:N
+        out &= (i in h) == iseven(i)
+    end
+    for i in N+1:2N
+        insert!(h, i)
+    end
+    for i in 1:2N
+        out &= (i in h) == (i > N || iseven(i))
+    end
+    return out
+end
 
 for n in sizes
     r = 1:n
@@ -97,6 +368,10 @@ for n in sizes
     even_set = Set(2:2:n)
     odd_set = Set(1:2:n)
 
+    ordered_set = OrderedSet(r)
+    even_ordered_set = OrderedSet(2:2:n)
+    odd_ordered_set = OrderedSet(1:2:n)
+
     hash_indices = HashIndices(r)
     even_hash_indices = HashIndices(2:2:n)
     odd_hash_indices = HashIndices(1:2:n)
@@ -107,180 +382,220 @@ for n in sizes
 
     suite_n = suite["$n"] = BenchmarkGroup()
 
-    s = suite_n["constructor ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable Vector($r))
-    s["Set"] = @benchmarkable Set($r)
-    n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
-    s["HashIndices"] = @benchmarkable HashIndices($r)
-    s["OldHashIndices"] = @benchmarkable HashIndices($r)
-
-    s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector (push!)"] = @benchmarkable build_vector_by_insertion($n))
-    s["Set"] = @benchmarkable build_set_by_insertion($n)
-    n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
-    s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
-    s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
-
-    s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)) evals=1)
-    s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r)) evals=1
-    n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))) evals=1)
-    s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r)) evals=1
-    s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    s = suite_n["in ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable in($y, $vec))
-    s["Set"] = @benchmarkable in($y, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable in($y, $indices))
-    s["HashIndices"] = @benchmarkable in($y, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable in($y, $old_hash_indices)
-
-    s = suite_n["count ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
-    s["Set"] = @benchmarkable count(iseven, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
-    s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
-
-    s = suite_n["sum ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
-    s["Set"] = @benchmarkable sum($set)
-    n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
-    s["HashIndices"] = @benchmarkable sum($hash_indices)
-    s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
-
-    s = suite_n["foreach ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
-    s["Set"] = @benchmarkable foreachsum($set)
-    n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
-    s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
-    s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
-
-    s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
-    s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
-    n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
-    s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
-    s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
-
-    s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
-    s["Set"] = @benchmarkable filter($pred1, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
-    s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
-
-    s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
-    s["Set"] = @benchmarkable filter(iseven, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
-    s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
-
-    s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
-    s["Set"] = @benchmarkable filter($pred2, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
-    s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
-
-    s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)) evals=1)
-    s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r)) evals=1
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))) evals=1)
-    s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r)) evals=1
-    s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)) evals=1)
-    s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r)) evals=1
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))) evals=1)
-    s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r)) evals=1
-    s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)) evals=1)
-    s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r)) evals=1
-    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))) evals=1)
-    s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r)) evals=1
-    s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    s = suite_n["union ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
-    s["Set"] = @benchmarkable union($even_set, $odd_set)
-    n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
-    s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
-
-    s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
-    s["Set"] = @benchmarkable intersect($even_set, $odd_set)
-    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
-    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
-
-    s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
-    s["Set"] = @benchmarkable intersect($even_set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
-    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
-
-    s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
-    s["Set"] = @benchmarkable intersect($set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
-    s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
-
-    s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
-    s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
-    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
-    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
-
-    s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
-    s["Set"] = @benchmarkable setdiff($even_set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
-    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
-
-    s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
-    s["Set"] = @benchmarkable setdiff($set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
-    s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
-
-    s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
-    s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
-    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
-    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
-
-    s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
-    s["Set"] = @benchmarkable symdiff($set, $odd_set)
-    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
-    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
-
-    s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
-    s["Set"] = @benchmarkable symdiff($even_set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
-    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
-
-    s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
-    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
-    s["Set"] = @benchmarkable symdiff($set, $set)
-    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
-    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
+    # s = suite_n["constructor ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable Vector($r))
+    # s["Set"] = @benchmarkable Set($r)
+    # s["OrderedSet"] = @benchmarkable OrderedSet($r)
+    # n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
+    # s["HashIndices"] = @benchmarkable HashIndices($r)
+    # s["OldHashIndices"] = @benchmarkable HashIndices($r)
+
+    # s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector (push!)"] = @benchmarkable build_vector_by_insertion($n))
+    # s["Set"] = @benchmarkable build_set_by_insertion($n)
+    # s["OrderedSet"] = @benchmarkable build_ordered_set_by_insertion($n)
+    # n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
+    # s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
+    # s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
+
+    # s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)) evals=1)
+    # s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r)) evals=1
+    # s["OrderedSet"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=OrderedSet($r)) evals=1
+    # n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))) evals=1)
+    # s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r)) evals=1
+    # s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+
+    s = suite_n["insertion/deletion tests ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable basic_set_test($n)
+    s["OrderedSet"] = @benchmarkable basic_ordered_set_test($n)
+    n < cutoff && (s["Indices"] = @benchmarkable basic_indices_test($n))
+    s["HashIndices"] = @benchmarkable basic_hash_indices_test($n)
+    s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
+
+    # s = suite_n["in ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable all_in($vec, $n))
+    # s["Set"] = @benchmarkable all_in($set, $n)
+    # s["OrderedSet"] = @benchmarkable all_in($ordered_set, $n)
+    # n < cutoff && (s["Indices"] = @benchmarkable all_in($indices, $n))
+    # s["HashIndices"] = @benchmarkable all_in($hash_indices, $n)
+    # s["OldHashIndices"] = @benchmarkable all_in($old_hash_indices, $n)
+
+    # s = suite_n["not in ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable not_in($vec, $n))
+    # s["Set"] = @benchmarkable not_in($set, $n)
+    # s["OrderedSet"] = @benchmarkable not_in($ordered_set, $n)
+    # n < cutoff && (s["Indices"] = @benchmarkable not_in($indices, $n))
+    # s["HashIndices"] = @benchmarkable not_in($hash_indices, $n)
+    # s["OldHashIndices"] = @benchmarkable not_in($old_hash_indices, $n)
+
+    # s = suite_n["count ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
+    # s["Set"] = @benchmarkable count(iseven, $set)
+    # s["OrderedSet"] = @benchmarkable count(iseven, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
+    # s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
+
+    # s = suite_n["sum ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
+    # s["Set"] = @benchmarkable sum($set)
+    # s["OrderedSet"] = @benchmarkable sum($ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
+    # s["HashIndices"] = @benchmarkable sum($hash_indices)
+    # s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
+
+    # s = suite_n["foreach ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
+    # s["Set"] = @benchmarkable foreachsum($set)
+    # s["OrderedSet"] = @benchmarkable foreachsum($ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
+    # s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
+    # s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
+
+    # s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
+    # s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
+    # s["OrderedSet"] = @benchmarkable sum($(2x for x in ordered_set if isodd(x)))
+    # n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
+    # s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
+    # s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
+
+    # s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
+    # s["Set"] = @benchmarkable filter($pred1, $set)
+    # s["OrderedSet"] = @benchmarkable filter($pred1, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
+    # s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
+
+    # s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
+    # s["Set"] = @benchmarkable filter(iseven, $set)
+    # s["OrderedSet"] = @benchmarkable filter(iseven, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
+    # s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
+
+    # s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
+    # s["Set"] = @benchmarkable filter($pred2, $set)
+    # s["OrderedSet"] = @benchmarkable filter($pred2, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
+    # s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
+
+    # s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)) evals=1)
+    # s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r)) evals=1
+    # s["OrderedSet"] = @benchmarkable filter!($pred1, s) setup=(s=OrderedSet($r)) evals=1
+    # n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))) evals=1)
+    # s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r)) evals=1
+    # s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+
+    # s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)) evals=1)
+    # s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r)) evals=1
+    # s["OrderedSet"] = @benchmarkable filter!($iseven, s) setup=(s=OrderedSet($r)) evals=1
+    # n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))) evals=1)
+    # s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r)) evals=1
+    # s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+
+    # s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)) evals=1)
+    # s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r)) evals=1
+    # s["OrderedSet"] = @benchmarkable filter!($pred2, s) setup=(s=OrderedSet($r)) evals=1
+    # n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))) evals=1)
+    # s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r)) evals=1
+    # s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+
+    # s = suite_n["union ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
+    # s["Set"] = @benchmarkable union($even_set, $odd_set)
+    # s["OrderedSet"] = @benchmarkable union($even_ordered_set, $odd_ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
+    # s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
+    # s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
+
+    # s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
+    # s["Set"] = @benchmarkable intersect($even_set, $odd_set)
+    # s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $odd_ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
+    # s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
+    # s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
+
+    # s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
+    # s["Set"] = @benchmarkable intersect($even_set, $set)
+    # s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
+    # s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
+
+    # s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
+    # s["Set"] = @benchmarkable intersect($set, $set)
+    # s["OrderedSet"] = @benchmarkable intersect($ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
+    # s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
+
+    # s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
+    # s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
+    # s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $odd_ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
+    # s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
+    # s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
+
+    # s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
+    # s["Set"] = @benchmarkable setdiff($even_set, $set)
+    # s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
+    # s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
+
+    # s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
+    # s["Set"] = @benchmarkable setdiff($set, $set)
+    # s["OrderedSet"] = @benchmarkable setdiff($ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
+    # s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
+
+    # s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
+    # s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
+    # s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $odd_ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
+    # s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+    # s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
+
+    # s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
+    # s["Set"] = @benchmarkable symdiff($set, $odd_set)
+    # s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $odd_ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
+    # s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
+    # s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
+
+    # s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
+    # s["Set"] = @benchmarkable symdiff($even_set, $set)
+    # s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
+    # s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
+
+    # s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
+    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
+    # s["Set"] = @benchmarkable symdiff($set, $set)
+    # s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $ordered_set)
+    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
+    # s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+    # s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
 end
 
 end  # module
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 70a99b8..80f0f48 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -51,6 +51,7 @@ function HashIndices{I}(iter) where {I}
 end
 
 function HashIndices{I}(values::Vector{I}) where {I}
+    # TODO Incrementally build the hashmap removing duplicates
     hashes = map(v -> hash(v) & hash_mask, values)
     slots = Vector{Int}()
     out = HashIndices{I}(slots, hashes, values, 0)
@@ -180,7 +181,8 @@ function gettoken(indices::HashIndices{I}, i::I) where {I}
         trial_slot = (trial_slot + 1)
         trial_index = indices.slots[trial_slot]
         if trial_index > 0
-            if full_hash === indices.hashes[trial_index] && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+            value = indices.values[trial_index]
+            if i === value || isequal(i, value)
                 return (true, (trial_slot, trial_index))
             end    
         elseif trial_index === 0
@@ -219,9 +221,8 @@ function gettoken!(indices::HashIndices{I}, i::I, values = ()) where {I}
                 deleted_slot = trial_slot
             end
         else
-            trial_hash = indices.hashes[trial_index]
-
-            if trial_hash === full_hash && isequal(i, indices.values[trial_index]) # Note: the first bit also ensures the value wasn't deleted (and potentiall undefined)
+            value = indices.values[trial_index]            
+            if i === value || isequal(i, value)
                 return (true, (trial_slot, trial_index))
             end
         end

From 0e0a306bd094a588c3888167eb334b3a16648d37 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Mon, 8 Jun 2020 21:30:20 +1000
Subject: [PATCH 09/20] Many many changes

---
 README.md                  | 168 +++++++-------
 benchmark/bench_indices.jl | 455 ++++++++++++++++++++-----------------
 src/AbstractDictionary.jl  |  93 +++++++-
 src/AbstractIndices.jl     |  67 +++++-
 src/Dictionaries.jl        |   7 +-
 src/Dictionary.jl          |   2 -
 src/HashDictionary.jl      |  19 +-
 src/HashIndices.jl         | 153 +++++++++++--
 src/Indices.jl             |   8 +-
 src/OldHashDictionary.jl   |   2 +-
 src/OldHashIndices.jl      |   2 +-
 src/foreach.jl             |  18 +-
 src/insertion.jl           |  21 +-
 src/map.jl                 |  53 ++++-
 14 files changed, 732 insertions(+), 336 deletions(-)

diff --git a/README.md b/README.md
index 12c0f36..7fd1215 100644
--- a/README.md
+++ b/README.md
@@ -15,14 +15,16 @@ In this package we aim to devise a cohesive interface for abstract dictionaries
 
 ## Getting started
 
-Dictionaries share the common supertype `AbstractDictionary`, and the go-to container in this package is `HashDictionary` - which shares the same hash-based implementation as Julia's inbuilt `Dict` type (using `hash` and `isequal` for key lookup and comparison). You can construct one from a list of indices (or keys) and a list of values.
+Dictionaries share the common supertype `AbstractDictionary`, and the go-to container in this package is `HashDictionary` - which is a new hash-based implementation that serves as a replacement of Julia's inbuilt `Dict` type (using `hash` and `isequal` for key lookup and comparison). The three main difference to `Dict` are that it preserves the order of elements, it iterates much faster, and it iterates values rather than key-value pairs.
+
+You can construct one from a list of indices (or keys) and a list of values.
 
 ```julia
 julia> dict = HashDictionary(["a", "b", "c"], [1, 2, 3])
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 1
+ "b" │ 2
+ "c" │ 3
 
 julia> dict["a"]
 1
@@ -32,13 +34,13 @@ If you prefer, you can use the `dictionary` function to create a dictionary from
 ```julia
 julia> dictionary(["a" => 1, "b" => 2, "c" => 3])
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 1
+ "b" │ 2
+ "c" │ 3
 ```
 
 The values of `HashDictionary` are mutable, or "settable", and can be modified via `setindex!`.
-However, just like for `Array`s, new indices (keys) are *never* created this way.
+However, just like for `Array`s, new indices (keys) are *never* created or rearranged this way.
 
 ```julia
 julia> dict["a"] = 10
@@ -46,15 +48,15 @@ julia> dict["a"] = 10
 
 julia> dict
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 10
+ "b" │ 2
+ "c" │ 3
 
 julia> dict["d"] = 42
 ERROR: IndexError("Dictionary does not contain index: d")
 Stacktrace:
- [1] setindex!(::HashDictionary{String,Int64}, ::Int64, ::String) at /home/ferris/.julia/dev/Dictionaries/src/AbstractDictionary.jl:134
- [2] top-level scope at REPL[15]:1
+ [1] setindex!(::HashDictionary{String,Int64}, ::Int64, ::String) at /home/ferris/.julia/dev/Dictionaries/src/AbstractDictionary.jl:347
+ [2] top-level scope at REPL[7]:1
 ```
 
 The indices of `HashDictionary` are said to be "insertable" - indices can be added or removed with the `insert!` and `delete!` functions.
@@ -62,16 +64,16 @@ The indices of `HashDictionary` are said to be "insertable" - indices can be add
 ```
 julia> insert!(dict, "d", 42)
 4-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 10
+ "b" │ 2
+ "c" │ 3
  "d" │ 42
 
 julia> delete!(dict, "d")
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 10
+ "b" │ 2
+ "c" │ 3
 ```
 
 Note that `insert!` and `delete!` are precise in the sense that `insert!` will error if the index already exists, and `delete!` will error if the index does not. The `set!` function provides "upsert" functionality ("update or insert") and `unset!` is useful for removing an index that may or may not exist.
@@ -83,9 +85,9 @@ Dictionaries can be manipulated and transformed using a similar interface to Jul
 ```julia
 julia> dict = HashDictionary(["a", "b", "c"], [1, 2, 3])
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 1
+ "b" │ 2
+ "c" │ 3
 
 julia> sum(dict)
 6
@@ -99,21 +101,21 @@ Mapping and broadcasting also function as-per arrays, preserving the indices and
 ```julia
 julia> map(iseven, dict)
 3-element HashDictionary{String,Bool}
- "c" │ false
- "b" │ true
  "a" │ false
+ "b" │ true
+ "c" │ false
 
 julia> map(*, dict, dict)
 3-element HashDictionary{String,Int64}
- "c" │ 9
- "b" │ 4
  "a" │ 1
+ "b" │ 4
+ "c" │ 9
 
 julia> dict .+ 1
 3-element HashDictionary{String,Int64}
- "c" │ 4
- "b" │ 3
  "a" │ 2
+ "b" │ 3
+ "c" │ 4
 ```
 
 There is a `mapview` function, which is the lazy version of the above.
@@ -123,8 +125,8 @@ Filtering a dictionary also preserves the keys, dropping the remainder.
 ```julia
 julia> filter(isodd, dict)
 2-element HashDictionary{String,Bool}
- "c" │ 3
  "a" │ 1
+ "c" │ 3
 ```
 
 The `filterview` function is provided to lazily filter a dictionary, which may occassionally
@@ -135,15 +137,15 @@ The `pairs` function allows access to both the index (key) and value when iterat
 ```julia
 julia> pairs(dict)
 3-element Dictionaries.PairDictionary{String,Int64,HashDictionary{String,Int64}}
- "c" │ "c" => 3
- "b" │ "b" => 2
  "a" │ "a" => 1
+ "b" │ "b" => 2
+ "c" │ "c" => 3
 
 julia> map(((k,v),) -> k^v, pairs(dict))
 3-element HashDictionary{String,String}
- "c" │ "ccc"
- "b" │ "bb"
  "a" │ "a"
+ "b" │ "bb"
+ "c" │ "ccc"
 ```
 
 ### Indices
@@ -153,9 +155,9 @@ The indices of a dictionary are unique, and form a set (in the mathematical sens
 ```julia
 julia> keys(dict)
 3-element HashIndices{String}
- "c"
- "b"
  "a"
+ "b"
+ "c"
 ```
 
 Whenever you call `keys(::AbstractDictionary)`, you always receive an `AbstractIndices` in return. 
@@ -164,26 +166,38 @@ Whenever you call `keys(::AbstractDictionary)`, you always receive an `AbstractI
 ```julia
 julia> inds = HashIndices(["a", "b", "c"])
 3-element HashIndices{String}
- "c"
- "b"
  "a"
+ "b"
+ "c"
 ```
 
+You can also use the `distinct` function, which is similar to `unique` from `Base`, to construct indices where the input may not be unique.
+
+```julia
+julia> distinct([1,2,3,3])
+3-element HashIndices{Int64}
+ 1
+ 2
+ 3
+```
+
+The `distinct` function may be considered as useful replacement of `unique` in many cases, as the `unique` function internally constructs a hashmap (`Set`) anyway before returning a `Vector`. However, a `HashIndices` iterates as fast as `Vector` and in many cases it can be useful to be able to `map` it into a dictionary. 
+
 `HashIndices` are insertable, so you can use `insert!` and `delete!` (or `set!` and `unset!`) to add and remove elements.
 
 ```julia
 julia> insert!(inds, "d")
 4-element HashIndices{String}
- "c"
- "b"
  "a"
+ "b"
+ "c"
  "d"
 
 julia> delete!(inds, "d")
 3-element HashIndices{String}
- "c"
- "b"
  "a"
+ "b"
+ "c"
 ```
 
 One crucial property of `AbstractIndices` is that they are a subtype of `AbstractDictionary` (similar to how the `keys` of an `AbstractArray` are always `AbstractArray`s). But how can a set, or indices, be a dictionary? Under `getindex`, they form a map from each element to itself.
@@ -193,7 +207,9 @@ julia> inds["b"]
 "b"
 ```
 
-Since all dictionaries have `keys`, even indices must have `keys` - and in this case `keys(inds) === inds`.
+Thus, if you iterate an `AbstractIndices` you are guaranteed never to get the same value twice, and the collection is a set. All the usual set operations like `union`, `intersect`, `setdiff` and `symdiff` are defined, as well as a newly exported predicate function `disjoint(set1, set2)` which returns `true` if `set1` and `set2` do not intersect/overlap according to an elementwise `isequal` check, and `false` otherwise.
+
+Since all dictionaries have `keys`, even indices must have `keys` - and in this case `keys(inds::AbstractIndices) === inds`.
 
 ### Working with indices
 
@@ -204,15 +220,15 @@ If you wish to perform an operation on each element of a set, you can simply `ma
 ```julia
 julia> map(uppercase, inds)
 3-element HashDictionary{String,String}
- "c" │ "C"
- "b" │ "B"
  "a" │ "A"
+ "b" │ "B"
+ "c" │ "C"
 
 julia> inds .* "at"
 3-element HashDictionary{String,String}
- "c" │ "cat"
- "b" │ "bat"
  "a" │ "aat"
+ "b" │ "bat"
+ "c" │ "cat"
 ```
 
 You can filter indices.
@@ -220,8 +236,8 @@ You can filter indices.
 ```julia
 julia> filter(in(["a", "b"]), inds)
 2-element HashIndices{String}
- "b"
  "a"
+ "b"
 ```
 
 To find the subset of dictionary indices/keys that satisfy some constraint on the values, use the `findall` function.
@@ -229,14 +245,14 @@ To find the subset of dictionary indices/keys that satisfy some constraint on th
 ```julia
 julia> dict
 3-element HashDictionary{String,Int64}
- "c" │ 3
- "b" │ 2
  "a" │ 1
+ "b" │ 2
+ "c" │ 3
 
 julia> inds2 = findall(isodd, dict)
 2-element HashIndices{String}
- "c"
  "a"
+ "c"
 ```
 
 And, finally, one useful thing you can do with indices is, well, *indexing*. Non-scalar indexing of dictionaries is a little more complicated than that of arrays, since there is an ambiguity on whether the indexer is a *single* index or a collection of indices (for arrays, the scalar indices are integers (or `CartesianIndex`es) so this ambiguity is less of a problem). The [Indexing.jl](https://github.com/andyferris/Indexing.jl) provides the `getindices` function to return a container with the same indices as the indexer, and this is re-exported here.
@@ -244,8 +260,8 @@ And, finally, one useful thing you can do with indices is, well, *indexing*. Non
 ```julia
 julia> getindices(dict, inds2)
 2-element HashDictionary{String,Int64}
- "c" │ 3
  "a" │ 1
+ "c" │ 3
 ```
 
 It has [been suggested](https://github.com/JuliaLang/julia/issues/30845) to make the syntax `dict.[inds2]` available in Julia in the future for unambiguous non-scalar indexing. 
@@ -255,8 +271,8 @@ Lazy non-scalar indexing may be achieved, as usual, with the `view` function.
 ```julia
 julia> view(dict, inds2)
 2-element DictionaryView{String,Int64,HashIndices{String},HashDictionary{String,Int64}}
- "c" │ 3
  "a" │ 1
+ "c" │ 3
 ```
 
 Boolean or "logical" indexing is also ambiguous with scalar and non-scalar indexing. Luckily, the `findall` function is a convenient way to convert a Boolean-valued dictionary into indices, which we can use with `getindices`:
@@ -264,14 +280,14 @@ Boolean or "logical" indexing is also ambiguous with scalar and non-scalar index
 ```julia
 julia> isodd.(dict)
 3-element HashDictionary{String,Bool}
- "c" │ true
- "b" │ false
  "a" │ true
+ "b" │ false
+ "c" │ true
 
 julia> getindices(dict, findall(isodd.(dict)))
 2-element HashDictionary{String,Int64}
- "c" │ 3
  "a" │ 1
+ "c" │ 3
 ```
 
 (Who knows - maybe we need syntax for this, too?)
@@ -280,8 +296,6 @@ julia> getindices(dict, findall(isodd.(dict)))
 
 The `Dictionary` container is a simple, iteration-based dictionary that may be faster for smaller collections. It's `keys` are the corresponding `Indices` type. By default these contain `Vector`s which support mutation, insertion and tokenization, but they can contain other iterables such as `Tuple`s (which make for good statically-sized dictionaries, with similarities with `Base.ImmutableDict` or [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl)).
 
-It is planned to add new dictionary types that support an ordering (such as sorted by the values, or the columns of a `DataFrame`, similar to [OrderedCollections.jl](https://github.com/JuliaCollections/OrderedCollections.jl)).
-
 Indices that are based on sort ordering instead of hashing (both in a dense sorted form and as a B-tree or similar) are also planned.
 
 ### Factories for dictionary creation
@@ -293,9 +307,9 @@ The `similar` function is used to create a dictionary with defined indices, but
 ```julia
 julia> similar(dict, Vector{Int})
 3-element HashDictionary{String,Array{Int64,1}}
- "c" │ #undef
- "b" │ #undef
  "a" │ #undef
+ "b" │ #undef
+ "c" │ #undef
 ```
 
 The behaviour is the same if `dict` is an `AbstractIndices` - you always get a dictionary with settable/mutable elements. Preserving the indices using `similar` and setting the values provides a huge performance advantage compared to iteratively constructing a new dictionary via insertion (see the bottom of this README).
@@ -305,9 +319,9 @@ On the other hand, values can be initialized with the `fill(value, dict)` functi
 ```julia
 julia> fill(42, dict)
 3-element HashDictionary{String,Int64}
- "c" │ 42
- "b" │ 42
  "a" │ 42
+ "b" │ 42
+ "c" │ 42
 ```
 
 The `fill` function can optionally define a wider type than the value, helpful for if you want to assign a default value like `missing` but allow this to be updated later.
@@ -315,9 +329,9 @@ The `fill` function can optionally define a wider type than the value, helpful f
 ```julia
 julia> fill(missing, dict, Union{Missing, Int64})
 3-element HashDictionary{String,Union{Missing, Int64}}
- "c" │ missing
- "b" │ missing
  "a" │ missing
+ "b" │ missing
+ "c" │ missing
 ```
 
 Functions `zeros`, `ones`, `falses` and `trues` are defined as a handy alternative to the above in common cases, as are `rand` and `randn`.
@@ -325,15 +339,15 @@ Functions `zeros`, `ones`, `falses` and `trues` are defined as a handy alternati
 ```julia
 julia> zeros(dict)
 3-element HashDictionary{String,Float64}
- "c" │ 0.0
- "b" │ 0.0
  "a" │ 0.0
+ "b" │ 0.0
+ "c" │ 0.0
 
 julia> zeros(UInt8, dict)
 3-element HashDictionary{String,UInt8}
- "c" │ 0x00
- "b" │ 0x00
  "a" │ 0x00
+ "b" │ 0x00
+ "c" │ 0x00
 ```
 
 Note that the *indices* of the output are not guaranteed to be mutable/insertable - in fact, in the current implementation inserting or deleting indices to the output of the above can corrupt the input container (Julia suffers similar restrictions with `AbstractArray`s with mutable indices, for example changing the size of the indices of a `SubArray` can lead to corruption and segfaults). This also holds true for the output of `map`, `broadcast`, `getindices`, `similar`, `zeros`, `ones`, `falses` and `trues`. If you want a new container with indices you can insert, by sure to `copy` the indices furst, or use `empty` instead.
@@ -462,9 +476,7 @@ tokens are equivalent with a constant-time operation. When this is the case, the
 operation can skip lookup entirely, performing zero calls to `hash` and dealing with hash
 collisions.
 
-A quick benchmark verifies the result. The `copy` below makes `keys(d1) !== keys(d2)`,
-disabling token co-iteration (with results somewhat in line with typical usage of 
-`Base.Dict`).
+A quick benchmark verifies the result.
 
 ```julia
 julia> using Dictionaries, BenchmarkTools
@@ -474,25 +486,29 @@ julia> d1 = HashDictionary(1:10_000_000, 10_000_000:-1:1);
 julia> d2 = d1 .+ 1;
 
 julia> @btime map(+, d1, d2);
-  155.299 ms (22 allocations: 256.00 MiB)
+  23.362 ms (18 allocations: 76.29 MiB)
+```
 
+The `copy` below makes `keys(d1) !== keys(d2)`, disabling token co-iteration (requiring
+mulitple hash-table lookups per element).
+
+```julia
 julia> @btime map(+, d1, $(HashDictionary(copy(keys(d2)), d2)));
-  343.394 ms (22 allocations: 256.00 MiB)
+  1.485 s (18 allocations: 76.29 MiB)
 ```
 
 For a comparitive baseline benchmark, we can try the same with dense vectors.
 
 ```julia
-julia> v = collect(10_000_000:-1:1);
+julia> v1 = collect(10_000_000:-1:1);
+
+julia> v2 = v1 .+ 1;
 
-julia> @btime map(+, v, v);
-  26.910 ms (5 allocations: 76.29 MiB)
+julia> @btime map(+, v1, v2);
+  25.449 ms (5 allocations: 76.29 MiB)
 ```
 
-Here, the operation uses SIMD, and the vector is densely packed whereas the values of the
-dictionaries above are sparsely distributed into slots with a filling ratio of ~3.4. Thus,
-the fact that the vector operation is 5.8x faster seems explainable (note that the gap may
-narrow with more complex data types and mapping functions).
+Here, the vector results are in line with the dictionary co-iteration!
 
 Using insertion, instead of preserving the existing indices, is comparitively slow.
 
@@ -507,7 +523,7 @@ julia> function f(d1, d2)
 f (generic function with 1 method)
 
 julia> @btime f(d1, d2);
-  2.161 s (10000073 allocations: 846.35 MiB)
+  2.793 s (10000090 allocations: 668.42 MiB)
 ```
 
 Unfortunately, insertion appears to be the idiomatic way of doing things with `Base.Dict`.
@@ -526,7 +542,7 @@ julia> function g(d1, d2)
 g (generic function with 1 method)
 
 julia> @btime g(dict1, dict2);
-  10.985 s (72 allocations: 541.17 MiB)
+  9.362 s (72 allocations: 541.17 MiB)
 ```
 
 The result is similar with generators, which is possibly the easiest way of dealing with
@@ -537,5 +553,5 @@ julia> @btime Dict(i => dict1[i] + dict2[i] for i in keys(dict1));
   13.787 s (89996503 allocations: 2.02 GiB)
 ```
 
-This represents a 88x speedup between the first example with `HashDictionary` to this last
+This represents a 590x speedup between the first example with `HashDictionary` to this last
 example with `Base.Dict`.
diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 0614228..3bcaee2 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -353,6 +353,10 @@ for n in sizes
     y = n ÷ 2
     pred1(x) = x != y
     pred2(x) = x == y
+    mostly_unique = rand(1:n, n)
+    sorted_mostly_unique = sort(mostly_unique)
+    rarely_unique = rand(1:floor(Int, sqrt(n)), n)
+    sorted_rarely_unique = sort(rarely_unique)
 
     if n < cutoff 
         vec = collect(r)
@@ -382,29 +386,37 @@ for n in sizes
 
     suite_n = suite["$n"] = BenchmarkGroup()
 
-    # s = suite_n["constructor ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable Vector($r))
-    # s["Set"] = @benchmarkable Set($r)
-    # s["OrderedSet"] = @benchmarkable OrderedSet($r)
-    # n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
-    # s["HashIndices"] = @benchmarkable HashIndices($r)
-    # s["OldHashIndices"] = @benchmarkable HashIndices($r)
-
-    # s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector (push!)"] = @benchmarkable build_vector_by_insertion($n))
-    # s["Set"] = @benchmarkable build_set_by_insertion($n)
-    # s["OrderedSet"] = @benchmarkable build_ordered_set_by_insertion($n)
-    # n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
-    # s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
-    # s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
-
-    # s = suite_n["empty by deletion ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=collect($r)) evals=1)
-    # s["Set"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Set($r)) evals=1
-    # s["OrderedSet"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=OrderedSet($r)) evals=1
-    # n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Indices(collect($r))) evals=1)
-    # s["HashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=HashIndices($r)) evals=1
-    # s["OldHashIndices"] = @benchmarkable empty_by_deletion(s, $n) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
+    s = suite_n["constructor ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable Vector($r))
+    s["Set"] = @benchmarkable Set($r)
+    s["OrderedSet"] = @benchmarkable OrderedSet($r)
+    n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
+    s["HashIndices"] = @benchmarkable HashIndices($r)
+    s["OldHashIndices"] = @benchmarkable HashIndices($r)
+
+    s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector (push!)"] = @benchmarkable build_set_by_insertion($n))
+    s["Set"] = @benchmarkable build_set_by_insertion($n)
+    s["OrderedSet"] = @benchmarkable build_ordered_set_by_insertion($n)
+    n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
+    s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
+    s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
+
+    s = suite_n["copy ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector (pop!)"] = @benchmarkable copy($vec))
+    s["Set"] = @benchmarkable copy($set)
+    s["OrderedSet"] = @benchmarkable copy($ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable copy($indices))
+    s["HashIndices"] = @benchmarkable copy($hash_indices)
+    s["OldHashIndices"] = @benchmarkable copy($old_hash_indices)
+
+    s = suite_n["copy and empty by deletion ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(copy($vec), $n))
+    s["Set"] = @benchmarkable empty_by_deletion(copy($set), $n)
+    s["OrderedSet"] = @benchmarkable empty_by_deletion(copy($ordered_set), $n)
+    n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(copy($indices), $n))
+    s["HashIndices"] = @benchmarkable empty_by_deletion(copy($hash_indices), $n)
+    s["OldHashIndices"] = @benchmarkable empty_by_deletion(copy($old_hash_indices), $n)
 
     s = suite_n["insertion/deletion tests ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable basic_set_test($n)
@@ -413,189 +425,220 @@ for n in sizes
     s["HashIndices"] = @benchmarkable basic_hash_indices_test($n)
     s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
 
-    # s = suite_n["in ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable all_in($vec, $n))
-    # s["Set"] = @benchmarkable all_in($set, $n)
-    # s["OrderedSet"] = @benchmarkable all_in($ordered_set, $n)
-    # n < cutoff && (s["Indices"] = @benchmarkable all_in($indices, $n))
-    # s["HashIndices"] = @benchmarkable all_in($hash_indices, $n)
-    # s["OldHashIndices"] = @benchmarkable all_in($old_hash_indices, $n)
-
-    # s = suite_n["not in ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable not_in($vec, $n))
-    # s["Set"] = @benchmarkable not_in($set, $n)
-    # s["OrderedSet"] = @benchmarkable not_in($ordered_set, $n)
-    # n < cutoff && (s["Indices"] = @benchmarkable not_in($indices, $n))
-    # s["HashIndices"] = @benchmarkable not_in($hash_indices, $n)
-    # s["OldHashIndices"] = @benchmarkable not_in($old_hash_indices, $n)
-
-    # s = suite_n["count ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
-    # s["Set"] = @benchmarkable count(iseven, $set)
-    # s["OrderedSet"] = @benchmarkable count(iseven, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
-    # s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
-
-    # s = suite_n["sum ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
-    # s["Set"] = @benchmarkable sum($set)
-    # s["OrderedSet"] = @benchmarkable sum($ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
-    # s["HashIndices"] = @benchmarkable sum($hash_indices)
-    # s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
-
-    # s = suite_n["foreach ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
-    # s["Set"] = @benchmarkable foreachsum($set)
-    # s["OrderedSet"] = @benchmarkable foreachsum($ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
-    # s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
-    # s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
-
-    # s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
-    # s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
-    # s["OrderedSet"] = @benchmarkable sum($(2x for x in ordered_set if isodd(x)))
-    # n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
-    # s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
-    # s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
-
-    # s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
-    # s["Set"] = @benchmarkable filter($pred1, $set)
-    # s["OrderedSet"] = @benchmarkable filter($pred1, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
-    # s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
-
-    # s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
-    # s["Set"] = @benchmarkable filter(iseven, $set)
-    # s["OrderedSet"] = @benchmarkable filter(iseven, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
-    # s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
-
-    # s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
-    # s["Set"] = @benchmarkable filter($pred2, $set)
-    # s["OrderedSet"] = @benchmarkable filter($pred2, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
-    # s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
-
-    # s = suite_n["filter! (most) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, s) setup=(s=collect($r)) evals=1)
-    # s["Set"] = @benchmarkable filter!($pred1, s) setup=(s=Set($r)) evals=1
-    # s["OrderedSet"] = @benchmarkable filter!($pred1, s) setup=(s=OrderedSet($r)) evals=1
-    # n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, s) setup=(s=Indices(collect($r))) evals=1)
-    # s["HashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=HashIndices($r)) evals=1
-    # s["OldHashIndices"] = @benchmarkable filter!($pred1, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    # s = suite_n["filter! (half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter!($iseven, s) setup=(s=collect($r)) evals=1)
-    # s["Set"] = @benchmarkable filter!($iseven, s) setup=(s=Set($r)) evals=1
-    # s["OrderedSet"] = @benchmarkable filter!($iseven, s) setup=(s=OrderedSet($r)) evals=1
-    # n < cutoff && (s["Indices"] = @benchmarkable filter!($iseven, s) setup=(s=Indices(collect($r))) evals=1)
-    # s["HashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=HashIndices($r)) evals=1
-    # s["OldHashIndices"] = @benchmarkable filter!($iseven, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    # s = suite_n["filter! (few) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, s) setup=(s=collect($r)) evals=1)
-    # s["Set"] = @benchmarkable filter!($pred2, s) setup=(s=Set($r)) evals=1
-    # s["OrderedSet"] = @benchmarkable filter!($pred2, s) setup=(s=OrderedSet($r)) evals=1
-    # n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, s) setup=(s=Indices(collect($r))) evals=1)
-    # s["HashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=HashIndices($r)) evals=1
-    # s["OldHashIndices"] = @benchmarkable filter!($pred2, s) setup=(s=Dictionaries.OldHashIndices($r)) evals=1
-
-    # s = suite_n["union ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
-    # s["Set"] = @benchmarkable union($even_set, $odd_set)
-    # s["OrderedSet"] = @benchmarkable union($even_ordered_set, $odd_ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
-    # s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
-    # s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
-
-    # s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
-    # s["Set"] = @benchmarkable intersect($even_set, $odd_set)
-    # s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $odd_ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
-    # s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
-    # s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
-
-    # s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
-    # s["Set"] = @benchmarkable intersect($even_set, $set)
-    # s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
-    # s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
-
-    # s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
-    # s["Set"] = @benchmarkable intersect($set, $set)
-    # s["OrderedSet"] = @benchmarkable intersect($ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
-    # s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
-
-    # s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
-    # s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
-    # s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $odd_ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
-    # s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
-    # s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
-
-    # s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
-    # s["Set"] = @benchmarkable setdiff($even_set, $set)
-    # s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
-    # s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
-
-    # s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
-    # s["Set"] = @benchmarkable setdiff($set, $set)
-    # s["OrderedSet"] = @benchmarkable setdiff($ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
-    # s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
-
-    # s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
-    # s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
-    # s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $odd_ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
-    # s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
-    # s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
-
-    # s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
-    # s["Set"] = @benchmarkable symdiff($set, $odd_set)
-    # s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $odd_ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
-    # s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
-    # s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
-
-    # s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
-    # s["Set"] = @benchmarkable symdiff($even_set, $set)
-    # s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
-    # s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
-
-    # s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
-    # n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
-    # s["Set"] = @benchmarkable symdiff($set, $set)
-    # s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $ordered_set)
-    # n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
-    # s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
-    # s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
+    s = suite_n["insertion/deletion tests ($n)"] = BenchmarkGroup()
+    s["Set"] = @benchmarkable basic_set_test($n)
+    s["OrderedSet"] = @benchmarkable basic_ordered_set_test($n)
+    n < cutoff && (s["Indices"] = @benchmarkable basic_indices_test($n))
+    s["HashIndices"] = @benchmarkable basic_hash_indices_test($n)
+    s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
+
+    s = suite_n["unique/distinct (high uniqueness, unsorted) ($n)"] = BenchmarkGroup()
+    s["Vector (unique)"] = @benchmarkable unique($mostly_unique)
+    s["Set"] = @benchmarkable Set($mostly_unique)
+    s["OrderedSet"] = @benchmarkable OrderedSet($mostly_unique)
+    s["HashIndices (distinct)"] = @benchmarkable distinct($mostly_unique)
+
+    s = suite_n["unique/distinct (high uniqueness, sorted) ($n)"] = BenchmarkGroup()
+    s["Vector (unique)"] = @benchmarkable unique($sorted_mostly_unique)
+    s["Set"] = @benchmarkable Set($sorted_mostly_unique)
+    s["OrderedSet"] = @benchmarkable OrderedSet($sorted_mostly_unique)
+    s["HashIndices"] = @benchmarkable distinct($sorted_mostly_unique)
+
+    s = suite_n["unique/distinct (low uniqueness, unsorted) ($n)"] = BenchmarkGroup()
+    s["Vector (unique)"] = @benchmarkable unique($rarely_unique)
+    s["Set"] = @benchmarkable Set($rarely_unique)
+    s["OrderedSet"] = @benchmarkable OrderedSet($rarely_unique)
+    s["HashIndices"] = @benchmarkable distinct($rarely_unique)
+
+    s = suite_n["unique/distinct (low uniqueness, sorted) ($n)"] = BenchmarkGroup()
+    s["Vector (unique)"] = @benchmarkable unique($sorted_rarely_unique)
+    s["Set"] = @benchmarkable Set($sorted_rarely_unique)
+    s["OrderedSet"] = @benchmarkable OrderedSet($sorted_rarely_unique)
+    s["HashIndices"] = @benchmarkable distinct($sorted_rarely_unique)
+
+    s = suite_n["in ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable all_in($vec, $n))
+    s["Set"] = @benchmarkable all_in($set, $n)
+    s["OrderedSet"] = @benchmarkable all_in($ordered_set, $n)
+    n < cutoff && (s["Indices"] = @benchmarkable all_in($indices, $n))
+    s["HashIndices"] = @benchmarkable all_in($hash_indices, $n)
+    s["OldHashIndices"] = @benchmarkable all_in($old_hash_indices, $n)
+
+    s = suite_n["not in ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable not_in($vec, $n))
+    s["Set"] = @benchmarkable not_in($set, $n)
+    s["OrderedSet"] = @benchmarkable not_in($ordered_set, $n)
+    n < cutoff && (s["Indices"] = @benchmarkable not_in($indices, $n))
+    s["HashIndices"] = @benchmarkable not_in($hash_indices, $n)
+    s["OldHashIndices"] = @benchmarkable not_in($old_hash_indices, $n)
+
+    s = suite_n["count ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
+    s["Set"] = @benchmarkable count(iseven, $set)
+    s["OrderedSet"] = @benchmarkable count(iseven, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
+    s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
+
+    s = suite_n["sum ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
+    s["Set"] = @benchmarkable sum($set)
+    s["OrderedSet"] = @benchmarkable sum($ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
+    s["HashIndices"] = @benchmarkable sum($hash_indices)
+    s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
+
+    s = suite_n["foreach ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
+    s["Set"] = @benchmarkable foreachsum($set)
+    s["OrderedSet"] = @benchmarkable foreachsum($ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
+    s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
+    s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
+
+    s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
+    s["Set"] = @benchmarkable sum($(2x for x in set if isodd(x)))
+    s["OrderedSet"] = @benchmarkable sum($(2x for x in ordered_set if isodd(x)))
+    n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
+    s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
+    s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
+
+    s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
+    s["Set"] = @benchmarkable filter($pred1, $set)
+    s["OrderedSet"] = @benchmarkable filter($pred1, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
+    s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
+
+    s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
+    s["Set"] = @benchmarkable filter(iseven, $set)
+    s["OrderedSet"] = @benchmarkable filter(iseven, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
+    s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
+
+    s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
+    s["Set"] = @benchmarkable filter($pred2, $set)
+    s["OrderedSet"] = @benchmarkable filter($pred2, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
+    s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
+
+    s = suite_n["copy and filter! (most) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, copy($vec)))
+    s["Set"] = @benchmarkable filter!($pred1, copy($set))
+    s["OrderedSet"] = @benchmarkable filter!($pred1, copy($ordered_set))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, copy($indices)))
+    s["HashIndices"] = @benchmarkable filter!($pred1, copy($hash_indices))
+    s["OldHashIndices"] = @benchmarkable filter!($pred1, copy($old_hash_indices))
+
+    s = suite_n["copy and filter! (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!(iseven, copy($vec)))
+    s["Set"] = @benchmarkable filter!(iseven, copy($set))
+    s["OrderedSet"] = @benchmarkable filter!(iseven, copy($ordered_set))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!(iseven, copy($indices)))
+    s["HashIndices"] = @benchmarkable filter!(iseven, copy($hash_indices))
+    s["OldHashIndices"] = @benchmarkable filter!(iseven, copy($old_hash_indices))
+
+    s = suite_n["copy and filter! (few) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, copy($vec)))
+    s["Set"] = @benchmarkable filter!($pred2, copy($set))
+    s["OrderedSet"] = @benchmarkable filter!($pred2, copy($ordered_set))
+    n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, copy($indices)))
+    s["HashIndices"] = @benchmarkable filter!($pred2, copy($hash_indices))
+    s["OldHashIndices"] = @benchmarkable filter!($pred2, copy($old_hash_indices))
+
+    s = suite_n["union ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
+    s["Set"] = @benchmarkable union($even_set, $odd_set)
+    s["OrderedSet"] = @benchmarkable union($even_ordered_set, $odd_ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
+    s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
+
+    s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
+    s["Set"] = @benchmarkable intersect($even_set, $odd_set)
+    s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $odd_ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
+    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
+
+    s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
+    s["Set"] = @benchmarkable intersect($even_set, $set)
+    s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
+    s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
+
+    s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
+    s["Set"] = @benchmarkable intersect($set, $set)
+    s["OrderedSet"] = @benchmarkable intersect($ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
+    s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
+
+    s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
+    s["Set"] = @benchmarkable setdiff($even_set, $odd_set)
+    s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $odd_ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
+    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
+
+    s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
+    s["Set"] = @benchmarkable setdiff($even_set, $set)
+    s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
+    s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
+
+    s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
+    s["Set"] = @benchmarkable setdiff($set, $set)
+    s["OrderedSet"] = @benchmarkable setdiff($ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
+    s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
+
+    s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
+    s["Set"] = @benchmarkable symdiff($even_set, $odd_set)
+    s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $odd_ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
+    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
+
+    s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
+    s["Set"] = @benchmarkable symdiff($set, $odd_set)
+    s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $odd_ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
+    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
+
+    s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
+    s["Set"] = @benchmarkable symdiff($even_set, $set)
+    s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
+    s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
+
+    s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
+    n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
+    s["Set"] = @benchmarkable symdiff($set, $set)
+    s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $ordered_set)
+    n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
+    s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
+    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
 end
 
 end  # module
diff --git a/src/AbstractDictionary.jl b/src/AbstractDictionary.jl
index 7937371..15a0bab 100644
--- a/src/AbstractDictionary.jl
+++ b/src/AbstractDictionary.jl
@@ -235,6 +235,80 @@ function Base.unique(d::AbstractDictionary)
     return out
 end
 
+# TODO think of a name for this. This matches Base.unique but ideally `distinct` could be
+# be an abstract factory method, like `distinct(BTreeIndices{Int}, itr)`.
+#=
+"""
+    distinct(f, itr; to=HashDictionary)
+
+Collect the first element of iterator `itr` for each unique value produced by `f` applied to
+elements of `itr` into a new collection, defaulting to `HashDictionary`. Similar to
+`Base.unique`, except returning a dictionary instead of an array.
+
+# Example
+
+```julia
+julia> distinct(first, ["Alice", "Bob", "Charlie"])
+3-element HashDictionary{Char,String}
+ 'A' │ "Alice"
+ 'B' │ "Bob"
+ 'C' │ "Charlie"
+
+julia> distinct(first, ["Alice", "Bob", "Charlie", "Chaz"])
+3-element HashDictionary{Char,String}
+ 'A' │ "Alice"
+ 'B' │ "Bob"
+ 'C' │ "Charlie" 
+```
+"""
+distinct(f, itr) = _distinct(f, HashDictionary, itr)
+=#
+
+function _distinct(f, ::Type{T}, itr) where T
+    out = T()
+    for x in itr
+        i = f(x)
+        (hadtoken, token) = gettoken!(out, x)
+        if !hadtoken
+            @inbounds settokenvalue!(out, token, i)
+        end
+    end
+    return out
+end
+
+# An auto-widening AbstractDictionary constructor
+function __distinct(f, dict, itr, s)
+    I = keytype(dict)
+    T = eltype(dict)
+    tmp = iterate(itr, s)
+    while tmp !== nothing
+        (x, s) = tmp
+        i = f(x)
+        if !(i isa I)
+            new_inds = copy(keys(dict), promote_type(I, typeof(i)))
+            new_dict = similar(new_inds, promote_type(T, typeof(x)))
+            (hadtoken, token) = gettoken!(new_dict, i)
+            if !hadtoken
+                @inbounds settokenvalue!(new_dict, token, x)
+            end
+            return __distinct(f, new_dict, itr, s)
+        elseif !(x isa T)
+            new_dict = copy(dict, promote_type(T, typeof(x)))
+            (hadtoken, token) = gettoken!(new_dict, i)
+            if !hadtoken
+                @inbounds settokenvalue!(new_dict, token, x)
+            end
+            return __distinct(f, new_dict, itr, s)
+        end
+        (hadtoken, token) = gettoken!(dict, i)
+        if !hadtoken
+            @inbounds settokenvalue!(dict, token, x)
+        end
+        tmp = iterate(itr, s)
+    end
+    return dict
+end
+
 ### Settable interface
 
 """
@@ -268,7 +342,7 @@ function Base.isassigned(dict::AbstractDictionary{I}, i::I) where {I}
     end
 end
 
-function Base.setindex!(dict::AbstractDictionary{I, T}, value::T, i::I) where {I, T}
+@propagate_inbounds function Base.setindex!(dict::AbstractDictionary{I, T}, value::T, i::I) where {I, T}
     if !(istokenizable(dict))
         error("Every settable AbstractDictionary type must define a method for `setindex!`: $(typeof(dict))")
     end
@@ -293,6 +367,10 @@ Construct a new `issettable` dictionary with identical `keys` as `d` and an elem
 Base.similar(d::AbstractDictionary) = similar(keys(d), eltype(d))
 Base.similar(d::AbstractDictionary, ::Type{T}) where {T} = similar(keys(d), T)
 
+function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
+    return similar(convert(HashIndices{I}, indices), T)
+end
+
 # fill! and fill
 
 function Base.fill!(d::AbstractDictionary, value)
@@ -381,8 +459,17 @@ end
 
 
 # Copying - note that this doesn't necessarily copy the indices! (`copy(keys(dict))` can do that)
-function Base.copy(d::AbstractDictionary)
-    out = similar(d)
+"""
+    copy(dict::AbstractDictionary)
+    copy(dict::AbstractDictionary, ::Type{T})
+
+Create a shallow copy of the values of `dict`. Note that `keys(dict)` is not copied, and
+therefore care must be taken that inserting/deleting elements. A new element type `T` can 
+optionally be specified.
+"""
+Base.copy(dict::AbstractDictionary) = copy(dict, eltype(dict))
+function Base.copy(d::AbstractDictionary, ::Type{T}) where {T}
+    out = similar(d, T)
     copyto!(out, d)
     return out
 end
diff --git a/src/AbstractIndices.jl b/src/AbstractIndices.jl
index 6296652..61d16e5 100644
--- a/src/AbstractIndices.jl
+++ b/src/AbstractIndices.jl
@@ -79,6 +79,49 @@ end
 
 Base.unique(i::AbstractIndices) = i
 
+"""
+    copy(inds::AbstractIndices)
+    copy(inds::AbstractIndices, I::Type)
+
+Construct a shallow copy of `inds`, possibly specifying a new element type `I`. The output
+container is not guaranteed to be the same type as the input.
+"""
+Base.copy(inds::AbstractIndices) = copy(inds, eltype(inds))
+
+function Base.copy(inds::AbstractIndices, ::Type{I}) where I
+    out = empty(inds, I)
+    for i in inds
+        insert!(out, i)
+    end
+    return out
+end
+
+Base.empty(::AbstractIndices, ::Type{I}) where {I} = HashIndices{I}()
+
+"""
+    distinct(itr)
+
+Collect the distinct elements of iterator `itr` into a new collection. Similar to
+`Base.unique`, except returning a set (`HashIndices`) instead of an array.
+
+# Example
+
+```julia
+julia> distinct([1,2,3,3])
+3-element HashIndices{Int64}
+ 1
+ 2
+ 3
+```
+"""
+distinct(itr) = _distinct(HashIndices, itr)
+
+function _distinct(::Type{T}, itr) where T
+    out = T()
+    union!(out, itr)
+    return out
+end
+
 struct IndexError <: Exception
 	msg::String
 end
@@ -225,7 +268,7 @@ function Base.intersect(i::AbstractIndices, itr)
         intersect!(out, itr)
     else
         out = empty(i)
-        intersect!(out, i)
+        union!(out, i)
         intersect!(out, itr)
     end
     return out
@@ -237,7 +280,7 @@ function Base.setdiff(i::AbstractIndices, itr)
         setdiff!(out, itr)
     else
         out = empty(i)
-        setdiff!(out, i)
+        union!(out, i)
         setdiff!(out, itr)
     end
     return out
@@ -249,8 +292,26 @@ function Base.symdiff(i::AbstractIndices, itr)
         symdiff!(out, itr)
     else
         out = empty(i)
-        symdiff!(out, i)
+        union!(out, i)
         symdiff!(out, itr)
     end
     return out
 end
+
+# issetequal and issubset(equal) should work already
+
+"""
+    disjoint(set1, set2)
+
+Return `true` if `set1` and `set2` are disjoint or `false`. Two sets are disjoint if no
+elements of `set1` is in `set2`, and vice-versa. Somewhat equivalent to, but faster than,
+`isempty(intersect(set1, set2))`.
+"""
+function disjoint(set1, set2)
+    for i in set1
+        if i in set2
+            return false
+        end
+    end
+    return true
+end
diff --git a/src/Dictionaries.jl b/src/Dictionaries.jl
index d12d217..71dd505 100644
--- a/src/Dictionaries.jl
+++ b/src/Dictionaries.jl
@@ -1,20 +1,17 @@
 module Dictionaries
 
 using Random
-
 using Indexing
-
 using Base: @propagate_inbounds, Callable
 
 export getindices, setindices!
 
 export AbstractDictionary, AbstractIndices, IndexError, Indices, HashIndices, HashDictionary, Dictionary, MappedDictionary, DictionaryView, FilteredDictionary, FilteredIndices, BroadcastedDictionary
 
-export issettable, isinsertable, set!, unset!, dictionary
+export dictionary, distinct, disjoint, filterview
+export issettable, isinsertable, set!, unset!
 export istokenizable, tokentype, tokens, tokenized, gettoken, gettokenvalue, istokenassigned, settokenvalue!, gettoken!, deletetoken!, sharetokens
 
-export filterview # TODO move to SplitApplyCombine.jl (and re-order project dependencies?)
-
 include("AbstractDictionary.jl")
 include("AbstractIndices.jl")
 
diff --git a/src/Dictionary.jl b/src/Dictionary.jl
index 4e00478..d0ee71a 100644
--- a/src/Dictionary.jl
+++ b/src/Dictionary.jl
@@ -77,8 +77,6 @@ issettable(::VectorDictionary) = true
     return d
 end
 
-Base.copy(d::VectorDictionary) = Dictionary(d.indices, copy(d.values))
-
 function Base.similar(inds::VectorIndices, ::Type{T}) where {T}
     return Dictionary(inds.inds, Vector{T}(undef, length(inds)))
 end
diff --git a/src/HashDictionary.jl b/src/HashDictionary.jl
index cd5da27..3543301 100644
--- a/src/HashDictionary.jl
+++ b/src/HashDictionary.jl
@@ -3,6 +3,7 @@ struct HashDictionary{I, T} <: AbstractDictionary{I, T}
     values::Vector{T}
 
     function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}) where {I, T}
+        # TODO make sure sizes match, deal with the fact that inds.holes might be nonzero
         return new(inds, values)
     end
 end
@@ -164,8 +165,20 @@ end
 
 # Factories
 
-Base.empty(::AbstractIndices, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()
+function Base.similar(indices::HashIndices{I}, ::Type{T}) where {I, T}
+    return HashDictionary(indices, Vector{T}(undef, length(indices.values)))
+end
 
-function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
-    return HashDictionary(indices, Vector{T}(undef, length(indices)))
+function _distinct(f, ::Type{HashDictionary}, itr)
+    tmp = iterate(itr)
+    if tmp === nothing
+        T = Base.@default_eltype(itr)
+        I = Core.Compiler.return_type(f, Tuple{T})
+        return HashDictionary{I, T}()
+    end
+    (x, s) = tmp
+    i = f(x)
+    dict = HashDictionary{typeof(i), typeof(x)}()
+    insert!(dict, i, x)
+    return __distinct(f, dict, itr, s)
 end
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 80f0f48..2112345 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -24,6 +24,32 @@ end
     HashIndices{I}(iter)
 
 Construct a `HashIndices` with indices from iterable container `iter`.
+
+Note that the elements of `iter` must be distinct/unique. Instead, the `distinct` function
+can be used for finding the unique elements.
+
+# Examples
+
+```julia
+julia> HashIndices([1,2,3])
+3-element HashIndices{Int64}
+ 1
+ 2
+ 3
+
+julia> HashIndices([1,2,3,3])
+ERROR: IndexError("Indices are not unique (inputs at positions 3 and 4) - consider using the distinct function")
+Stacktrace:
+ [1] HashIndices{Int64}(::Array{Int64,1}) at /home/ferris/.julia/dev/Dictionaries/src/HashIndices.jl:92
+ [2] HashIndices(::Array{Int64,1}) at /home/ferris/.julia/dev/Dictionaries/src/HashIndices.jl:53
+ [3] top-level scope at REPL[12]:1
+
+julia> distinct([1,2,3,3])
+3-element HashIndices{Int64}
+ 1
+ 2
+ 3
+```
 """
 function HashIndices(iter)
     if Base.IteratorEltype(iter) === Base.EltypeUnknown()
@@ -40,31 +66,96 @@ function HashIndices{I}(iter) where {I}
         @inbounds for (i, value) in enumerate(iter)
             values[i] = value
         end
-        return HashIndices{I}(values)
     else
-        h = HashIndices{I}()
-        for i in iter
-            insert!(h, i)
+        values = Vector{I}()
+        @inbounds for value in iter
+            push!(values, value)
         end
-        return h
     end
+    return HashIndices{I}(values)
 end
 
 function HashIndices{I}(values::Vector{I}) where {I}
-    # TODO Incrementally build the hashmap removing duplicates
+    # The input must have unique elements (the constructor is not to be used in place of `distinct`)
+    hashes = map(v -> hash(v) & hash_mask, values)
+    
+    # Incrementally build the hashmap and throw if duplicates detected
+    newsize = Base._tablesz(3*length(values) >> 0x01)
+    bit_mask = newsize - 1 # newsize is a power of two
+    slots = zeros(Int, newsize)
+    @inbounds for index in keys(hashes)
+        full_hash = hashes[index]
+        trial_slot = reinterpret(Int, full_hash) & bit_mask
+        @inbounds while true
+            trial_slot = (trial_slot + 1)
+            if slots[trial_slot] == 0
+                slots[trial_slot] = index
+                break
+            else
+                # TODO make this check optional
+                if isequal(values[index], values[slots[trial_slot]])
+                    throw(IndexError("Indices are not unique (inputs at positions $(slots[trial_slot]) and $index) - consider using the distinct function"))
+                end
+            end
+            trial_slot = trial_slot & bit_mask
+            # This is potentially an infinte loop and care must be taken not to overfill the container
+        end
+    end
+    return HashIndices{I}(slots, hashes, values, 0)
+end
+
+Base.convert(::Type{HashIndices}, inds::AbstractIndices{I}) where {I} = convert(HashIndices{I}, inds)
+
+function Base.convert(::Type{HashIndices{I}}, inds::AbstractIndices) where {I}
+    # Fast path
+    if inds isa HashIndices && inds.holes == 0
+        # Note: `convert` doesn't have copy semantics
+        return HashIndices{I}(inds.slots, inds.hashes, convert(Vector{I}, inds.values), 0)
+    end
+
+    # The input is already unique
+    values = collect(I, inds)
     hashes = map(v -> hash(v) & hash_mask, values)
-    slots = Vector{Int}()
-    out = HashIndices{I}(slots, hashes, values, 0)
+    
+    # Incrementally build the hashmap
     newsize = Base._tablesz(3*length(values) >> 0x01)
-    rehash!(out, newsize)
-    return out
+    bit_mask = newsize - 1 # newsize is a power of two
+    slots = zeros(Int, newsize)
+    @inbounds for index in keys(hashes)
+        full_hash = hashes[index]
+        trial_slot = reinterpret(Int, full_hash) & bit_mask
+        @inbounds while true
+            trial_slot = (trial_slot + 1)
+            if slots[trial_slot] == 0
+                slots[trial_slot] = index
+                break
+            else
+                # TODO make this check optional
+                if isequal(values[index], values[slots[trial_slot]])
+                    throw(IndexError("Indices are not unique (inputs at positions $(slots[trial_slot]) and $index)"))
+                end
+            end
+            trial_slot = trial_slot & bit_mask
+            # This is potentially an infinte loop and care must be taken not to overfill the container
+        end
+    end
+    return HashIndices{I}(slots, hashes, values, 0)
 end
 
-function Base.copy(indices::HashIndices{I}) where {I}
+"""
+    copy(inds::AbstractIndices)
+    copy(inds::AbstractIndices, ::Type{I})
+
+Create a shallow copy of the indices, optionally changing the element type.
+
+(Note that `copy` on a dictionary does not copy its indices).
+"""
+function Base.copy(indices::HashIndices, ::Type{I}) where {I}
+    _copy = I === eltype(indices) ? copy : identity # the constructor will call `convert`
     if indices.holes == 0
-        return HashIndices{I}(copy(indices.slots), copy(indices.hashes), copy(indices.values), 0)
+        return HashIndices{I}(_copy(indices.slots), _copy(indices.hashes), _copy(indices.values), 0)
     else
-        out = HashIndices{I}(Vector{Int}(), copy(indices.hashes), copy(indices.values), indices.holes)
+        out = HashIndices{I}(Vector{Int}(), _copy(indices.hashes), _copy(indices.values), indices.holes)
         newsize = Base._tablesz(3*length(indices) >> 0x01)
         rehash!(out, newsize)
     end
@@ -330,4 +421,38 @@ end
 
 # Factories
 
-Base.empty(::AbstractIndices, ::Type{I}) where {I} = HashIndices{I}()
+# TODO make this generic... maybe a type-based `empty`?
+function _distinct(::Type{HashIndices}, itr)
+    if Base.IteratorEltype(itr) === Base.HasEltype()
+        return _distinct(HashIndices{eltype(itr)}, itr)
+    end
+
+    tmp = iterate(itr)
+    if tmp === nothing
+        return HashIndices{Base.@default_eltype(itr)}()
+    end
+    (x, s) = tmp
+    indices = HashIndices{typeof(x)}()
+    insert!(indices, x)
+    return __distinct!(indices, itr, s, x)
+end
+
+# An auto-widening constructor for insertable AbstractIndices
+function __distinct!(indices::AbstractIndices, itr, s, x_old)
+    T = eltype(indices)
+    tmp = iterate(itr, s)
+    while tmp !== nothing
+        (x, s) = tmp
+        if !isequal(x, x_old) # Optimized for repeating elements of `itr`, e.g. if `itr` is sorted
+            if !(x isa T) && promote_type(typeof(x), T) != T
+                new_indices = copy(indices, promote_type(T, typeof(x)))
+                set!(new_indices, x)
+                return __distinct!(new_indices, itr, s, x)
+            end
+            set!(indices, x)
+            x_old = x
+        end
+        tmp = iterate(itr, s)
+    end
+    return indices
+end
diff --git a/src/Indices.jl b/src/Indices.jl
index 0a7855a..6ae91ae 100644
--- a/src/Indices.jl
+++ b/src/Indices.jl
@@ -68,7 +68,13 @@ end
 
 Base.empty(inds::VectorIndices, ::Type{I}) where {I} = Indices{I, Vector{I}}(Vector{I}())
 
-Base.copy(inds::VectorIndices) = Indices(copy(inds.inds))
+function Base.copy(inds::VectorIndices, ::Type{I}) where {I}
+    if I === eltype(inds)
+        Indices{I}(copy(inds.inds))
+    else
+        Indices{I}(inds.inds) # The constructor will call convert
+    end
+end
 
 function Base.filter!(pred, inds::VectorIndices)
     filter!(pred, inds.inds)
diff --git a/src/OldHashDictionary.jl b/src/OldHashDictionary.jl
index 0f293bb..ae3389f 100644
--- a/src/OldHashDictionary.jl
+++ b/src/OldHashDictionary.jl
@@ -132,7 +132,7 @@ function gettoken!(d::OldHashDictionary{T}, key::T) where {T}
     end 
 end
 
-function Base.copy(d::OldHashDictionary{I, T}) where {I, T}
+function Base.copy(d::OldHashDictionary{I, T}, ::Type{I}, ::Type{T}) where {I, T}
     return OldHashDictionary{I, T}(d.indices, copy(d.values), nothing)
 end
 
diff --git a/src/OldHashIndices.jl b/src/OldHashIndices.jl
index 858fbf3..4708104 100644
--- a/src/OldHashIndices.jl
+++ b/src/OldHashIndices.jl
@@ -57,7 +57,7 @@ function OldHashIndices{T}(iter) where {T}
     return h
 end
 
-function Base.copy(h::OldHashIndices{T}) where {T}
+function Base.copy(h::OldHashIndices{T}, ::Type{T}) where {T}
     return OldHashIndices{T}(copy(h.slots), copy(h.inds), h.ndel, h.count, h.idxfloor, h.maxprobe)
 end
 
diff --git a/src/foreach.jl b/src/foreach.jl
index af6296b..eac751c 100644
--- a/src/foreach.jl
+++ b/src/foreach.jl
@@ -4,9 +4,12 @@ function Base.foreach(f, d::AbstractDictionary, d2::AbstractDictionary, ds::Abst
             f(gettokenvalue(d, t), gettokenvalue(d2, t), map(x -> @inbounds(gettokenvalue(x, t)), ds)...)
         end
     else
-        @inbounds for i in keys(d)
-            f(d[i], d2[i], map(x -> @inbounds(x[i]), ds)...)
-        end
+       @boundscheck if !isequal(keys(d), keys(d2)) || any(dict -> !isequal(keys(d), keys(dict)), ds)
+            throw(IndexError("Indices do not match"))
+       end
+       @inbounds for xs in zip(d, d2, ds...)
+            f(xs...)
+       end
     end
     return nothing
 end
@@ -18,9 +21,12 @@ function Base.foreach(f, d::AbstractDictionary, d2::AbstractDictionary)
             f(gettokenvalue(d, t), gettokenvalue(d2, t))
         end
     else
-        @inbounds for i in keys(d)
-            f(d[i], d2[i])
-        end
+       @boundscheck if !isequal(keys(d), keys(d2))
+            throw(IndexError("Indices do not match"))
+       end
+       @inbounds for (x, x2) in zip(d, d2)
+            f(x, x2)
+       end
     end
     return nothing
 end
diff --git a/src/insertion.jl b/src/insertion.jl
index d9e7f54..ffe9724 100644
--- a/src/insertion.jl
+++ b/src/insertion.jl
@@ -369,9 +369,20 @@ end
 
 ### Indices ("sets") versions of above
 
-function Base.union!(s1::AbstractIndices, s2::AbstractIndices)
-    for i in s2
-        set!(s1, i)
+function Base.union!(s1::AbstractIndices, itr)
+    # Optimized to handle repeated values in `itr` - e.g. if `itr` is already sorted
+    x = iterate(itr)
+    if x === nothing
+        return s1
+    end
+    (i, s) = x
+    set!(s1, i)
+    i_old = i
+    while x !== nothing
+        (i, s) = x
+        !isequal(i, i_old) && set!(s1, i)
+        i_old = i
+        x = iterate(itr, s)
     end
     return s1
 end
@@ -462,4 +473,6 @@ elements of type `eltype(inds)`.
 """
 Base.empty(d::AbstractDictionary) = empty(keys(d), keytype(d), eltype(d))
 
-Base.empty(d::AbstractDictionary, ::Type{I}) where {I} = empty(keys(d), I)
\ No newline at end of file
+Base.empty(d::AbstractDictionary, ::Type{I}) where {I} = empty(keys(d), I)
+
+Base.empty(::AbstractIndices, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()
diff --git a/src/map.jl b/src/map.jl
index e885c6c..7adf686 100644
--- a/src/map.jl
+++ b/src/map.jl
@@ -1,16 +1,27 @@
 # Make `map!` fast if the inputs and output share tokens
 
-# TODO consider if `map` should respect iteration order or indices??
-
 function Base.map!(f, out::AbstractDictionary, d::AbstractDictionary, d2::AbstractDictionary, ds::AbstractDictionary...)
     if sharetokens(out, d, d2, ds...)
         @inbounds for t in tokens(out)
             settokenvalue!(out, t, f(gettokenvalue(d, t), gettokenvalue(d2, t), map(x -> @inbounds(gettokenvalue(x, t)), ds)...))
         end
+    elseif istokenizable(out)
+       @boundscheck if !isequal(keys(out), keys(d)) || !isequal(keys(out), keys(d2)) || any(dict -> !isequal(keys(out), keys(dict)), ds)
+            throw(IndexError("Indices do not match"))
+       end
+       @inbounds for txs in zip(tokens(out), d, d2, ds...)
+            t = txs[1]
+            xs = Base.tail(txs)
+            settokenvalue!(out, t, f(xs...))
+       end
     else
-        @boundscheck nothing # TODO check that indices match
-        @inbounds for i in keys(out)
-            out[i] = f(d[i], d2[i], map(x -> @inbounds(x[i]), ds)...)
+        @boundscheck if !isequal(keys(out), keys(d)) || !isequal(keys(out), keys(d2)) || any(dict -> !isequal(keys(out), keys(dict)), ds)
+            throw(IndexError("Indices do not match"))
+        end 
+        @inbounds for ixs in zip(keys(out), d, d2, ds...)
+            i = ixs[1]
+            xs = Base.tail(ixs)
+            out[i] = f(xs...)
         end
     end
     return out
@@ -22,8 +33,17 @@ function Base.map!(f, out::AbstractDictionary, d::AbstractDictionary, d2::Abstra
         @inbounds for t in tokens(out)
             settokenvalue!(out, t, f(gettokenvalue(d, t), gettokenvalue(d2, t)))
         end
+    elseif istokenizable(out)
+        @boundscheck if !isequal(keys(out), keys(d)) || !isequal(keys(out), keys(d2))
+            throw(IndexError("Indices do not match"))
+       end
+       @inbounds for (t, x, x2) in zip(tokens(out), d, d2)
+            settokenvalue!(out, t, f(x, x2))
+       end
     else
-        @boundscheck nothing # TODO check that indices match
+        @boundscheck if !isequal(keys(out), keys(d)) || !isequal(keys(out), keys(d2))
+            throw(IndexError("Indices do not match"))
+        end 
         @inbounds for i in keys(out)
             out[i] = f(d[i], d2[i])
         end
@@ -36,10 +56,19 @@ function Base.map!(f, out::AbstractDictionary, d::AbstractDictionary)
         @inbounds for t in tokens(out)
             settokenvalue!(out, t, f(gettokenvalue(d, t)))
         end
+    elseif istokenizable(out)
+       @boundscheck if !isequal(keys(out), keys(d))
+            throw(IndexError("Indices do not match"))
+       end
+       @inbounds for (t, x) in zip(tokens(out), d)
+            settokenvalue!(out, t, f(x))
+       end
     else
-        @boundscheck nothing # TODO check that indices match
-        @inbounds for i in keys(out)
-            out[i] = f(d[i])
+        @boundscheck if !isequal(keys(out), keys(d))
+             throw(IndexError("Indices do not match"))
+        end
+        @inbounds for (i, x) in zip(keys(out), d)
+            out[i] = f(x)
         end
     end
     return out
@@ -60,12 +89,14 @@ end
 
 function Base.map(f, d::AbstractDictionary)
     out = similar(d, Core.Compiler.return_type(f, Tuple{eltype(d)}))
-    map!(f, out, d)
+    @inbounds map!(f, out, d)
     return out
 end
 
 function Base.map(f, d::AbstractDictionary, ds::AbstractDictionary...)
     out = similar(d, Core.Compiler.return_type(f, Tuple{eltype(d), map(eltype, ds)...}))
-    map!(f, out, d, ds...)
+    @inbounds map!(f, out, d, ds...)
     return out
 end
+
+# TODO mapreduce (mapfoldl)

From cf9d94544788e45423515da6974dbdeab16b4dfd Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Wed, 10 Jun 2020 12:49:25 +1000
Subject: [PATCH 10/20] Many changes

---
 README.md                               |  29 ++-
 {src => contrib}/DenseHashDictionary.jl |  38 ++--
 src/Dictionaries.jl                     |   2 +-
 src/Dictionary.jl                       |  14 +-
 src/HashDictionary.jl                   | 230 +++++++++++++++++++-----
 src/MappedDictionary.jl                 |   2 +-
 src/OldHashDictionary.jl                |   2 +-
 test/HashDictionary.jl                  |  14 ++
 test/HashIndices.jl                     |   7 +
 9 files changed, 264 insertions(+), 74 deletions(-)
 rename {src => contrib}/DenseHashDictionary.jl (79%)

diff --git a/README.md b/README.md
index 7fd1215..b2a19f1 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,8 @@ In this package we aim to devise a cohesive interface for abstract dictionaries
 
 Dictionaries share the common supertype `AbstractDictionary`, and the go-to container in this package is `HashDictionary` - which is a new hash-based implementation that serves as a replacement of Julia's inbuilt `Dict` type (using `hash` and `isequal` for key lookup and comparison). The three main difference to `Dict` are that it preserves the order of elements, it iterates much faster, and it iterates values rather than key-value pairs.
 
+### Constructing dictionaries
+
 You can construct one from a list of indices (or keys) and a list of values.
 
 ```julia
@@ -29,7 +31,19 @@ julia> dict = HashDictionary(["a", "b", "c"], [1, 2, 3])
 julia> dict["a"]
 1
 ```
-If you prefer, you can use the `dictionary` function to create a dictionary from something that iterates key-value pairs (note: this includes `Dict`s).
+
+The constructor also accepts any indexable container, preserving the keys and values.
+```julia
+julia> HashDictionary(Dict("a"=>1, "b"=>2, "c"=>3))
+3-element HashDictionary{String,Int64}
+ "c" │ 3
+ "b" │ 2
+ "a" │ 1
+```
+
+If you prefer, you can use the `dictionary` function to create a dictionary from something
+that iterates key-value pairs (either as a `Pair` or a two-tuple, etc), somewhat like a
+`Dict` constructor.
 
 ```julia
 julia> dictionary(["a" => 1, "b" => 2, "c" => 3])
@@ -39,6 +53,19 @@ julia> dictionary(["a" => 1, "b" => 2, "c" => 3])
  "c" │ 3
 ```
 
+One final way to construct a dictionary is using the `index` function, which accepts a
+function that constructs a "key" for each element in the collection.
+
+```julia
+julia> index(first, ["Alice", "Bob", "Charlie"])
+3-element HashDictionary{Char,String}
+ 'A' │ "Alice"
+ 'B' │ "Bob"
+ 'C' │ "Charlie"
+```
+
+### Accessing dictionaries
+
 The values of `HashDictionary` are mutable, or "settable", and can be modified via `setindex!`.
 However, just like for `Array`s, new indices (keys) are *never* created or rearranged this way.
 
diff --git a/src/DenseHashDictionary.jl b/contrib/DenseHashDictionary.jl
similarity index 79%
rename from src/DenseHashDictionary.jl
rename to contrib/DenseHashDictionary.jl
index 23efffa..a270801 100644
--- a/src/DenseHashDictionary.jl
+++ b/contrib/DenseHashDictionary.jl
@@ -1,3 +1,7 @@
+module DenseHashDictionaries
+
+import Base: @propagate_inbounds
+using Dictionaries
 export DenseHashIndices, DenseHashDictionary
 
 perfect_hash(::Any) = false
@@ -41,19 +45,19 @@ end
 Base.length(indices::DenseHashIndices) = length(indices.values)
 
 # Token interface
-istokenizable(::DenseHashIndices) = true
+Dictionaries.istokenizable(::DenseHashIndices) = true
 
-tokentype(::DenseHashIndices) = Int
+Dictionaries.tokentype(::DenseHashIndices) = Int
 
 # Duration iteration the token cannot be used for deletion - we do not worry about the slots
-@propagate_inbounds function iteratetoken(indices::DenseHashIndices)
+@propagate_inbounds function Dictionaries.iteratetoken(indices::DenseHashIndices)
     if isempty(indices.values)
         return nothing
     end
     return ((0, 1), 1)
 end
 
-@propagate_inbounds function iteratetoken(indices::DenseHashIndices, index::Int)
+@propagate_inbounds function Dictionaries.iteratetoken(indices::DenseHashIndices, index::Int)
     if index == length(indices.values)
         return nothing
     end
@@ -62,7 +66,7 @@ end
 end
 
 
-function gettoken(indices::DenseHashIndices{I}, i::I) where {I}
+function Dictionaries.gettoken(indices::DenseHashIndices{I}, i::I) where {I}
     full_hash = hash(i)
     n_slots = length(indices.slots)
     bit_mask = n_slots - 1 # n_slots is always a power of two
@@ -85,14 +89,14 @@ function gettoken(indices::DenseHashIndices{I}, i::I) where {I}
     end
 end
 
-@propagate_inbounds function gettokenvalue(indices::DenseHashIndices, (_slot, index))
+@propagate_inbounds function Dictionaries.gettokenvalue(indices::DenseHashIndices, (_slot, index))
     return indices.values[index]
 end
 
 # Insertion interface
-isinsertable(::DenseHashIndices) = true
+Dictionaries.isinsertable(::DenseHashIndices) = true
 
-function gettoken!(indices::DenseHashIndices{I}, i::I) where {I}
+function Dictionaries.gettoken!(indices::DenseHashIndices{I}, i::I) where {I}
     full_hash = hash(i)
     n_slots = length(indices.slots)
     bit_mask = n_slots - 1 # n_slots is always a power of two
@@ -131,7 +135,7 @@ function gettoken!(indices::DenseHashIndices{I}, i::I) where {I}
     return (false, (trial_slot, n_values + 1))
 end
 
-@propagate_inbounds function deletetoken!(indices::DenseHashIndices, (slot, index))
+@propagate_inbounds function Dictionaries.deletetoken!(indices::DenseHashIndices, (slot, index))
     indices.slots[slot] = -1
     splice!(indices.hashes, index)
     splice!(indices.values, index)
@@ -168,28 +172,28 @@ Base.keys(dict::DenseHashDictionary) = dict.indices
 
 # tokens
 
-tokenized(dict::DenseHashDictionary) = dict.values
+Dictionaries.tokenized(dict::DenseHashDictionary) = dict.values
 
 # values
 
-function istokenassigned(dict::DenseHashDictionary, (_slot, index))
+function Dictionaries.istokenassigned(dict::DenseHashDictionary, (_slot, index))
     return isassigned(dict.values, index)
 end
 
-@propagate_inbounds function gettokenvalue(dict::DenseHashDictionary, (_slot, index))
+@propagate_inbounds function Dictionaries.gettokenvalue(dict::DenseHashDictionary, (_slot, index))
     return dict.values[index]
 end
 
-issettable(::DenseHashDictionary) = true
+Dictionaries.issettable(::DenseHashDictionary) = true
 
-@propagate_inbounds function settokenvalue!(dict::DenseHashDictionary{<:Any, T}, (_slot, index), value::T) where {T}
+@propagate_inbounds function Dictionaries.settokenvalue!(dict::DenseHashDictionary{<:Any, T}, (_slot, index), value::T) where {T}
     dict.values[index] = value
     return dict
 end
 
 # insertion
 
-function gettoken!(dict::DenseHashDictionary{I}, i::I) where {I}
+function Dictionaries.gettoken!(dict::DenseHashDictionary{I}, i::I) where {I}
     (hadtoken, (slot, index)) = gettoken!(keys(dict), i)
     if !hadtoken
         resize!(dict.values, length(dict.values) + 1)
@@ -197,7 +201,7 @@ function gettoken!(dict::DenseHashDictionary{I}, i::I) where {I}
     return (hadtoken, (slot, index))
 end
 
-function deletetoken!(dict::DenseHashDictionary, (slot, index))
+function Dictionaries.deletetoken!(dict::DenseHashDictionary, (slot, index))
     deletetoken!(dict.indices, (slot, index))
     splice!(dict.values, index)
     return dict
@@ -211,3 +215,5 @@ Base.empty(::DenseHashIndices, ::Type{I}, ::Type{T}) where {I, T} = DenseHashDic
 function Base.similar(indices::DenseHashIndices{I}, ::Type{T}) where {I, T}
     return DenseHashDictionary(indices, Vector{T}(undef, length(indices)))
 end
+
+end # module
\ No newline at end of file
diff --git a/src/Dictionaries.jl b/src/Dictionaries.jl
index 71dd505..a0b0c64 100644
--- a/src/Dictionaries.jl
+++ b/src/Dictionaries.jl
@@ -8,7 +8,7 @@ export getindices, setindices!
 
 export AbstractDictionary, AbstractIndices, IndexError, Indices, HashIndices, HashDictionary, Dictionary, MappedDictionary, DictionaryView, FilteredDictionary, FilteredIndices, BroadcastedDictionary
 
-export dictionary, distinct, disjoint, filterview
+export dictionary, index, distinct, disjoint, filterview
 export issettable, isinsertable, set!, unset!
 export istokenizable, tokentype, tokens, tokenized, gettoken, gettokenvalue, istokenassigned, settokenvalue!, gettoken!, deletetoken!, sharetokens
 
diff --git a/src/Dictionary.jl b/src/Dictionary.jl
index d0ee71a..6478ec2 100644
--- a/src/Dictionary.jl
+++ b/src/Dictionary.jl
@@ -38,13 +38,15 @@ undefined/unitialized.
 Dictionary{I, T}(inds, ::UndefInitializer) where {I, T} = Dictionary{I, T}(inds, Vector{T}(undef, length(inds)))
 
 """
-    Dictionary(dict::AbstractDictionary)
+    Dictionary(indexable)
 
-Construct a `Dictionary` copy of `dict` with the same keys and values.
+Construct a `Dictionary` from an indexable container `indexable` with the same `keys` and
+`values`, equivalent to `Dictionary(keys(indexable), values(indexable))`. Note that
+`indexable` may not be copied.
 """
-Dictionary(dict::AbstractDictionary) = Dictionary(keys(dict), dict)
-Dictionary{I}(dict::AbstractDictionary) where {I} = Dictionary{I}(keys(dict), dict)
-Dictionary{I, T}(dict::AbstractDictionary) where {I, T} = Dictionary{I, T}(keys(dict), dict)
+Dictionary(indexable) = Dictionary(keys(indexable), values(indexable))
+Dictionary{I}(indexable) where {I} = Dictionary{I}(keys(indexable), values(indexable))
+Dictionary{I, T}(indexable) where {I, T} = Dictionary{I, T}(keys(indexable), values(indexable))
 
 
 function Base.keys(d::Dictionary{I}) where {I}
@@ -66,7 +68,7 @@ const VectorDictionary{I, T} = Dictionary{I, T, Vector{I}, Vector{T}}
 # token interface
 istokenizable(::VectorDictionary) = true
 
-istokenassigned(d::VectorDictionary, t::Int) = isassigned(d.values, t)
+istokenassigned(d::VectorDictionary, t) = isassigned(d.values, t)
 @propagate_inbounds gettokenvalue(d::VectorDictionary{<:Any, T}, t::Int) where {T} = d.values[t]::T
 
 # settable interface
diff --git a/src/HashDictionary.jl b/src/HashDictionary.jl
index 3543301..b003646 100644
--- a/src/HashDictionary.jl
+++ b/src/HashDictionary.jl
@@ -2,19 +2,89 @@ struct HashDictionary{I, T} <: AbstractDictionary{I, T}
     indices::HashIndices{I}
     values::Vector{T}
 
-    function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}) where {I, T}
-        # TODO make sure sizes match, deal with the fact that inds.holes might be nonzero
+    function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}, ::Nothing) where {I, T}
+        @assert length(values) == length(inds.values)
         return new(inds, values)
     end
 end
 
+"""
+    HashDictionary{I,T}(;sizehint = 8)
+
+Construct an empty hash-based dictionary. `I` and `T` default to `Any` if not specified. A
+`sizehint` may be specified to set the initial size of the hash table, which may speed up
+subsequent `insert!` operations.
+
+# Example
+
+```julia
+julia> d = HashDictionary{Int, Int}()
+0-element HashDictionary{Int64,Int64}
+```
+"""
 HashDictionary(; sizehint = 8) = HashDictionary{Any, Any}(; sizehint = sizehint)
 HashDictionary{I}(; sizehint = 8) where {I} = HashDictionary{I, Any}(; sizehint = sizehint)
 
 function HashDictionary{I, T}(; sizehint = 8) where {I, T}
-    HashDictionary{I, T}(HashIndices{I}(; sizehint = sizehint), Vector{T}())
+    HashDictionary{I, T}(HashIndices{I}(; sizehint = sizehint), Vector{T}(), nothing)
 end
 
+"""
+    HashDictionary(indexable)
+    HashDictionary{I}(indexable)
+    HashDictionary{I,T}(indexable)
+
+Construct a hash-based dictionary from an indexable input `indexable`, equivalent to
+`HashDictionary(keys(indexable), values(indexable))`. The input might not be copied.
+
+Note: to construct a dictionary from `Pair`s use the `dictionary` function. See also the
+`index` function.
+
+# Examples
+
+```julia
+julia> HashDictionary(Dict(:a=>1, :b=>2))
+2-element HashDictionary{Symbol,Int64}
+ :a │ 1
+ :b │ 2
+
+julia> HashDictionary(3:-1:1)
+3-element HashDictionary{Int64,Int64}
+ 1 │ 3
+ 2 │ 2
+ 3 │ 1 
+```
+"""
+HashDictionary(indexable) = HashDictionary(keys(indexable), values(indexable))
+HashDictionary{I}(indexable) where {I} = HashDictionary{I}(keys(indexable), values(indexable))
+HashDictionary{I, T}(indexable) where {I, T} = HashDictionary{I, T}(keys(indexable), values(indexable))
+
+"""
+    HashDictionary(inds, values)
+    HashDictionary{I}(inds, values)
+    HashDictionary{I, T}(inds, values)
+
+Construct a hash-based dictionary from two iterable inputs `inds` and `values`. The first
+value of `inds` will be the index for the first value of `values`. The input might not be
+copied.
+
+Note: the values of `inds` must be distinct. Consider using `dictionary(zip(inds, values))`
+if they are not. See also the `index` function.
+
+# Example
+
+julia> HashDictionary(["a", "b", "c"], [1, 2, 3])
+3-element HashDictionary{String,Int64}
+ "a" │ 1
+ "b" │ 2
+ "c" │ 3
+
+julia> HashDictionary{String, Float64}(["a", "b", "c"], [1, 2, 3])
+3-element HashDictionary{String,Float6464}
+ "a" │ 1.0
+ "b" │ 2.0
+ "c" │ 3.0
+"""
 function HashDictionary(inds, values)
     return HashDictionary(HashIndices(inds), values)
 end
@@ -40,63 +110,140 @@ function HashDictionary{I, T}(inds, values) where {I, T}
 end
 
 function HashDictionary{I, T}(inds::HashIndices{I}, values) where {I, T}
+    if inds.holes != 0
+        inds = copy(inds)
+    end
+
     iter_size = Base.IteratorSize(values)
     if iter_size isa Union{Base.HasLength, Base.HasShape}
         vs = Vector{T}(undef, length(values))
         @inbounds for (i, v) in enumerate(values)
             vs[i] = v
         end
-        return HashDictionary{I, T}(inds, vs)
+        return HashDictionary{I, T}(inds, vs, nothing)
     else
         vs = Vector{T}()
         for v in values
             push!(vs, v)
         end
-        return HashDictionary{I, T}(inds, vs)
+        return HashDictionary{I, T}(inds, vs, nothing)
     end
 end
 
 """
     dictionary(iter)
 
-Construct a new `AbstractDictionary` from an iterable `iter` of key-value `Pair`s. The
-default container type is `HashDictionary`.
+Construct a new `AbstractDictionary` from an iterable `iter` of key-value `Pair`s (or other
+iterables of two elements, such as a two-tuples). The default container type is
+`HashDictionary`. If duplicate keys are detected, the first encountered value is retained.
+
+See also the `index` function.
+
+# Examples
+
+```julia
+julia> dictionary(["a"=>1, "b"=>2, "c"=>3])
+3-element HashDictionary{String,Int64}
+ "a" │ 1
+ "b" │ 2
+ "c" │ 3
+
+julia> dictionary(["a"=>1, "b"=>2, "c"=>3, "a"=>4])
+3-element HashDictionary{String,Int64}
+ "a" │ 1
+ "b" │ 2
+ "c" │ 3
+
+julia> dictionary(zip(["a","b","c"], [1,2,3]))
+3-element HashDictionary{String,Int64}
+ "a" │ 1
+ "b" │ 2
+ "c" │ 3
+```
 """
 function dictionary(iter)
-    if Base.IteratorEltype(iter) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
-        iter = collect(iter)
-    end
-    _dictionary(eltype(iter), iter)
+    return _dictionary(first, last, HashDictionary, iter)
 end
 
-dictionary(p1::Pair, p2::Pair...) = dictionary((p1, p2...))
+# An auto-widening HashDictionary constructor
+function _dictionary(key, value, ::Type{HashDictionary}, iter)
+    tmp = iterate(iter)
+    if tmp === nothing
+        IT = Base.@default_eltype(iter)
+        I = Core.Compiler.return_type(first, Tuple{IT})
+        T = Core.Compiler.return_type(last, Tuple{IT})
+        return HashDictionary{I, T}()
+    end
+    (x, s) = tmp
+    i = key(x)
+    v = value(x)
+    dict = HashDictionary{typeof(i), typeof(v)}()
+    insert!(dict, i, v)
+    return __dictionary(key, value, dict, iter, s)
+end
 
-function _dictionary(::Type{Pair{I, T}}, iter) where {I, T}
-    iter_size = Base.IteratorSize(iter)
-    if iter_size isa Union{Base.HasLength, Base.HasShape}
-        n = length(iter)
-        inds = Vector{I}(undef, n)
-        vals = Vector{T}(undef, n)
-        j = 1
-        @inbounds for (i, v) in iter
-            inds[j] = i
-            vals[j] = v
-            j += 1
+# An auto-widening AbstractDictionary constructor
+function __dictionary(key, value, dict, iter, s)
+    I = keytype(dict)
+    T = eltype(dict)
+    tmp = iterate(iter, s)
+    while tmp !== nothing
+        (x, s) = tmp
+        i = key(x)
+        v = value(x)
+        if !(i isa I)
+            new_inds = copy(keys(dict), promote_type(I, typeof(i)))
+            new_dict = similar(new_inds, promote_type(T, typeof(v)))
+            (hadtoken, token) = gettoken!(new_dict, i)
+            if !hadtoken
+                @inbounds settokenvalue!(new_dict, token, v)
+            end
+            return __dictionary(key, value, new_dict, iter, s)
+        elseif !(v isa T)
+            new_dict = copy(dict, promote_type(T, typeof(v)))
+            (hadtoken, token) = gettoken!(new_dict, i)
+            if !hadtoken
+                @inbounds settokenvalue!(new_dict, token, v)
+            end
+            return __dictionary(key, value, new_dict, iter, s)
         end
-        return HashDictionary{I, T}(inds, vals)
-    else
-        inds = Vector{I}()
-        vals = Vector{T}()
-        @inbounds for (i, v) in iter
-            push!(inds, i)
-            push!(vals, v)
-        end        
-        return HashDictionary{I, T}(inds, vals)
+        (hadtoken, token) = gettoken!(dict, i)
+        if !hadtoken
+            @inbounds settokenvalue!(dict, token, v)
+        end
+        tmp = iterate(iter, s)
     end
+    return dict
+end
+
+"""
+    index(f, iter)
+
+Return a dictionary associating the values `x` of iterable collection `iter` with the key
+`f(x)`. If keys are repeated, only the first is kept. Somewhat similar to `unique(f, iter)`
+
+See also the `dictionary` function.
+
+# Examples
+
+```julia
+julia> index(first, ["Alice", "Bob", "Charlie"])
+3-element HashDictionary{Char,String}
+ 'A' │ "Alice"
+ 'B' │ "Bob"
+ 'C' │ "Charlie"
+
+julia> index(iseven, 1:10)
+2-element HashDictionary{Bool,Int64}
+ false │ 1
+  true │ 2
+```
+"""
+function index(f, iter)
+    _dictionary(f, identity, HashDictionary, iter)
 end
 
-# indices
+# indicesi
 
 Base.keys(dict::HashDictionary) = dict.indices
 
@@ -166,19 +313,6 @@ end
 # Factories
 
 function Base.similar(indices::HashIndices{I}, ::Type{T}) where {I, T}
-    return HashDictionary(indices, Vector{T}(undef, length(indices.values)))
+    return HashDictionary{I, T}(indices, Vector{T}(undef, length(indices.values)), nothing)
 end
 
-function _distinct(f, ::Type{HashDictionary}, itr)
-    tmp = iterate(itr)
-    if tmp === nothing
-        T = Base.@default_eltype(itr)
-        I = Core.Compiler.return_type(f, Tuple{T})
-        return HashDictionary{I, T}()
-    end
-    (x, s) = tmp
-    i = f(x)
-    dict = HashDictionary{typeof(i), typeof(x)}()
-    insert!(dict, i, x)
-    return __distinct(f, dict, itr, s)
-end
diff --git a/src/MappedDictionary.jl b/src/MappedDictionary.jl
index 7df76a1..71ef4a9 100644
--- a/src/MappedDictionary.jl
+++ b/src/MappedDictionary.jl
@@ -26,7 +26,7 @@ end
 # TODO FIXME what do about tokens when there is more than one mapped dictioanry? For now, we disable them...
 istokenizable(d::MappedDictionary) = false
 function istokenizable(d::MappedDictionary{I, T, <:Any, <:Tuple{AbstractDictionary{<:I}}}) where {I, T}
-    return istokenizable(d.maps[1])
+    return istokenizable(d.dicts[1])
 end
 
 @propagate_inbounds function gettokenvalue(d::MappedDictionary{I, T, <:Any, <:Tuple{AbstractDictionary{<:I}}}, t) where {I, T}
diff --git a/src/OldHashDictionary.jl b/src/OldHashDictionary.jl
index ae3389f..dc39c85 100644
--- a/src/OldHashDictionary.jl
+++ b/src/OldHashDictionary.jl
@@ -156,7 +156,7 @@ function Base.sizehint!(d::OldHashDictionary, sz::Int)
     return d
 end
 
-function Base.rehash!(d::OldHashDictionary, newsz::Int = length(d.inds))
+function Base.rehash!(d::OldHashDictionary, newsz::Int = length(d.indices))
     _rehash!(d.indices, d.values, newsz)
     return d
 end
diff --git a/test/HashDictionary.jl b/test/HashDictionary.jl
index f1da44d..5d45eba 100644
--- a/test/HashDictionary.jl
+++ b/test/HashDictionary.jl
@@ -168,4 +168,18 @@
             @test h[i] == i+1
         end
     end
+
+    @testset "dictionary" begin
+        res = HashDictionary(['a','b','c'], [1,2,3])
+        @test isequal(dictionary(pairs(res)), res)
+        @test isequal(dictionary(['a'=>1, 'b'=>2, 'c'=>3]), res)
+        @test isequal(dictionary(['a'=>1, 'b'=>2, 'c'=>3, 'a'=>4]), res)
+        @test isequal(dictionary((k,v) for (k,v) in pairs(res)), res)
+    end
+
+    @testset "index" begin
+        res = HashDictionary(['A','B','C'], ["Alice","Bob","Charlie"])
+        @test isequal(index(first, ["Alice", "Bob", "Charlie"]), res)
+        @test isequal(index(first, ["Alice", "Bob", "Charlie", "Conner"]), res)
+    end
 end
\ No newline at end of file
diff --git a/test/HashIndices.jl b/test/HashIndices.jl
index fec66fa..c99bdc2 100644
--- a/test/HashIndices.jl
+++ b/test/HashIndices.jl
@@ -94,5 +94,12 @@
         end
     end
 
+    @testset "distinct" begin
+        res = HashIndices([1,2,3])
+        @test distinct(res) === res
+        @test isequal(distinct([1,2,3]), res)
+        @test isequal(distinct([1,2,3,1]), res)
+        @test isequal(distinct([1,2,3]), res)
+    end
     # TODO: token interface
 end
\ No newline at end of file

From 0dbbbf5f1b005bd8a9b744dc2bec88872e78e6ec Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Wed, 10 Jun 2020 13:26:44 +1000
Subject: [PATCH 11/20] Fix co-iteration

---
 README.md             | 14 +++++++-------
 src/HashDictionary.jl |  4 ++--
 src/HashIndices.jl    |  4 ++++
 src/tokens.jl         |  3 ++-
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index b2a19f1..4e81282 100644
--- a/README.md
+++ b/README.md
@@ -513,7 +513,7 @@ julia> d1 = HashDictionary(1:10_000_000, 10_000_000:-1:1);
 julia> d2 = d1 .+ 1;
 
 julia> @btime map(+, d1, d2);
-  23.362 ms (18 allocations: 76.29 MiB)
+  25.712 ms (20 allocations: 76.29 MiB)
 ```
 
 The `copy` below makes `keys(d1) !== keys(d2)`, disabling token co-iteration (requiring
@@ -521,7 +521,7 @@ mulitple hash-table lookups per element).
 
 ```julia
 julia> @btime map(+, d1, $(HashDictionary(copy(keys(d2)), d2)));
-  1.485 s (18 allocations: 76.29 MiB)
+  61.615 ms (20 allocations: 76.29 MiB)
 ```
 
 For a comparitive baseline benchmark, we can try the same with dense vectors.
@@ -532,7 +532,7 @@ julia> v1 = collect(10_000_000:-1:1);
 julia> v2 = v1 .+ 1;
 
 julia> @btime map(+, v1, v2);
-  25.449 ms (5 allocations: 76.29 MiB)
+  27.587 ms (5 allocations: 76.29 MiB)
 ```
 
 Here, the vector results are in line with the dictionary co-iteration!
@@ -550,7 +550,7 @@ julia> function f(d1, d2)
 f (generic function with 1 method)
 
 julia> @btime f(d1, d2);
-  2.793 s (10000090 allocations: 668.42 MiB)
+  2.819 s (10000091 allocations: 668.42 MiB)
 ```
 
 Unfortunately, insertion appears to be the idiomatic way of doing things with `Base.Dict`.
@@ -569,7 +569,7 @@ julia> function g(d1, d2)
 g (generic function with 1 method)
 
 julia> @btime g(dict1, dict2);
-  9.362 s (72 allocations: 541.17 MiB)
+  9.507 s (72 allocations: 541.17 MiB)
 ```
 
 The result is similar with generators, which is possibly the easiest way of dealing with
@@ -577,8 +577,8 @@ The result is similar with generators, which is possibly the easiest way of deal
 
 ```julia
 julia> @btime Dict(i => dict1[i] + dict2[i] for i in keys(dict1));
-  13.787 s (89996503 allocations: 2.02 GiB)
+  13.046 s (89996503 allocations: 2.02 GiB)
 ```
 
-This represents a 590x speedup between the first example with `HashDictionary` to this last
+This represents a 500x speedup between the first example with `HashDictionary` to this last
 example with `Base.Dict`.
diff --git a/src/HashDictionary.jl b/src/HashDictionary.jl
index b003646..ce86caf 100644
--- a/src/HashDictionary.jl
+++ b/src/HashDictionary.jl
@@ -3,8 +3,8 @@ struct HashDictionary{I, T} <: AbstractDictionary{I, T}
     values::Vector{T}
 
     function HashDictionary{I, T}(inds::HashIndices{I}, values::Vector{T}, ::Nothing) where {I, T}
-        @assert length(values) == length(inds.values)
-        return new(inds, values)
+       @assert length(values) == length(inds.values)
+       return new{I,T}(inds, values)
     end
 end
 
diff --git a/src/HashIndices.jl b/src/HashIndices.jl
index 2112345..bb062bd 100644
--- a/src/HashIndices.jl
+++ b/src/HashIndices.jl
@@ -456,3 +456,7 @@ function __distinct!(indices::AbstractIndices, itr, s, x_old)
     end
     return indices
 end
+
+# CAUTION: I have observed Julia 1.4.2 fail to preserve the object identity of HashIndices
+# (or perhaps there is a coding error I don't understand that causes it to be recreated)
+sharetokens(i1::HashIndices, i2::HashIndices) = i1.slots === i2.slots
\ No newline at end of file
diff --git a/src/tokens.jl b/src/tokens.jl
index 2bdd003..1b6686d 100644
--- a/src/tokens.jl
+++ b/src/tokens.jl
@@ -222,5 +222,6 @@ performed quickly (e.g. O(1) rather than O(N)). Return `false` otherwise.
 
 Note: the test may not be precise, this defaults to `tokens(dict1) === tokens(dict2)`.
 """
-sharetokens(d1, d2) = istokenizable(d1) && istokenizable(d2) && tokens(d1) === tokens(d2)
+sharetokens(i1::AbstractIndices, i2::AbstractIndices) = istokenizable(i1) && istokenizable(i2) && i1 === i2
+sharetokens(d1, d2) = sharetokens(keys(d1), keys(d2))
 sharetokens(d1, d2, ds...) = sharetokens(d1, d2) && sharetokens(d1, ds...)

From 2426c3065b515a74b5102ecedcd74e6117986f0b Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Wed, 10 Jun 2020 15:05:38 +1000
Subject: [PATCH 12/20] Fix a test

---
 src/AbstractIndices.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/AbstractIndices.jl b/src/AbstractIndices.jl
index 61d16e5..08db8e6 100644
--- a/src/AbstractIndices.jl
+++ b/src/AbstractIndices.jl
@@ -115,6 +115,7 @@ julia> distinct([1,2,3,3])
 ```
 """
 distinct(itr) = _distinct(HashIndices, itr)
+distinct(inds::AbstractIndices) = inds
 
 function _distinct(::Type{T}, itr) where T
     out = T()

From cb905c678e00db144a34da23eb0a34e588ea8bdd Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 12:53:33 +1000
Subject: [PATCH 13/20] Move OldHashIndices/OldHashDictionaries to contrib/

---
 benchmark/bench_indices.jl                    | 177 +++++++--------
 ...Dictionary.jl => DenseHashDictionaries.jl} |   0
 .../OldHashDictionaries.jl                    | 204 +++++++++++++++++-
 src/Dictionaries.jl                           |   8 +-
 src/OldHashDictionary.jl                      | 173 ---------------
 test/group.jl                                 |  39 ----
 6 files changed, 285 insertions(+), 316 deletions(-)
 rename contrib/{DenseHashDictionary.jl => DenseHashDictionaries.jl} (100%)
 rename src/OldHashIndices.jl => contrib/OldHashDictionaries.jl (56%)
 delete mode 100644 src/OldHashDictionary.jl
 delete mode 100644 test/group.jl

diff --git a/benchmark/bench_indices.jl b/benchmark/bench_indices.jl
index 3bcaee2..c168fb8 100644
--- a/benchmark/bench_indices.jl
+++ b/benchmark/bench_indices.jl
@@ -3,11 +3,12 @@ module BenchHashIndices
 using BenchmarkTools
 using Dictionaries
 using OrderedCollections
+# include("../contrib/OldHashDictionaries.jl")
+# using .OldHashDictionaries
 
 const suite = BenchmarkGroup()
 
-#sizes = [(8 .^ (0:8))...]
-sizes = [10, 100, 1000, 10_000] #, 10_000, 10_000_000]
+sizes = [10, 100, 1000, 10_000]
 cutoff = 101
 
 function build_vector_by_insertion(n)
@@ -50,13 +51,13 @@ function build_hashindices_by_insertion(n)
     return out
 end
 
-function build_old_hashindices_by_insertion(n)
-    out = Dictionaries.OldHashIndices{Int}()
-    for i in 1:n
-        insert!(out, i)
-    end
-    return out
-end
+# function build_old_hashindices_by_insertion(n)
+#     out = OldHashIndices{Int}()
+#     for i in 1:n
+#         insert!(out, i)
+#     end
+#     return out
+# end
 
 function empty_by_deletion(set::Vector, n)
     for i in 1:n
@@ -300,53 +301,53 @@ function basic_hash_indices_test(N)
     return out
 end
 
-function basic_old_hash_indices_test(N)
-    h = Dictionaries.OldHashIndices{Int}()
-    out = true
-    for i in 1:N
-        insert!(h, i)
-    end
-    for i in 1:N
-        out &= i in h
-    end
-    for i in 1:2:N
-        delete!(h, i)
-    end
-    for i in 1:N
-        out &= (i in h) == iseven(i)
-    end
-    for i in 1:2:N
-        insert!(h, i)
-    end
-    for i in 1:N
-        out &= i in h
-    end
-    for i in 1:N
-        delete!(h, i)
-    end
-    out &= isempty(h)
-    insert!(h, 7)
-    out &= 7 in h
-    for i in 1:N
-        set!(h, i)
-    end
-    for i in 1:N
-        out &= i in h
-    end
-    for i in 1:2:N
-        delete!(h, i)
-    end
-    for i in 1:N
-        out &= (i in h) == iseven(i)
-    end
-    for i in N+1:2N
-        insert!(h, i)
-    end
-    for i in 1:2N
-        out &= (i in h) == (i > N || iseven(i))
-    end
-    return out
-end
+# function basic_old_hash_indices_test(N)
+#     h = OldHashIndices{Int}()
+#     out = true
+#     for i in 1:N
+#         insert!(h, i)
+#     end
+#     for i in 1:N
+#         out &= i in h
+#     end
+#     for i in 1:2:N
+#         delete!(h, i)
+#     end
+#     for i in 1:N
+#         out &= (i in h) == iseven(i)
+#     end
+#     for i in 1:2:N
+#         insert!(h, i)
+#     end
+#     for i in 1:N
+#         out &= i in h
+#     end
+#     for i in 1:N
+#         delete!(h, i)
+#     end
+#     out &= isempty(h)
+#     insert!(h, 7)
+#     out &= 7 in h
+#     for i in 1:N
+#         set!(h, i)
+#     end
+#     for i in 1:N
+#         out &= i in h
+#     end
+#     for i in 1:2:N
+#         delete!(h, i)
+#     end
+#     for i in 1:N
+#         out &= (i in h) == iseven(i)
+#     end
+#     for i in N+1:2N
+#         insert!(h, i)
+#     end
+#     for i in 1:2N
+#         out &= (i in h) == (i > N || iseven(i))
+#     end
+#     return out
+# end
 
 for n in sizes
     r = 1:n
@@ -380,9 +381,9 @@ for n in sizes
     even_hash_indices = HashIndices(2:2:n)
     odd_hash_indices = HashIndices(1:2:n)
 
-    old_hash_indices = Dictionaries.OldHashIndices(r)
-    even_old_hash_indices = Dictionaries.OldHashIndices(2:2:n)
-    odd_old_hash_indices = Dictionaries.OldHashIndices(1:2:n)
+    # old_hash_indices = OldHashIndices(r)
+    # even_old_hash_indices = OldHashIndices(2:2:n)
+    # odd_old_hash_indices = OldHashIndices(1:2:n)
 
     suite_n = suite["$n"] = BenchmarkGroup()
 
@@ -392,7 +393,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable OrderedSet($r)
     n < cutoff && (s["Indices"] = @benchmarkable Indices($r))
     s["HashIndices"] = @benchmarkable HashIndices($r)
-    s["OldHashIndices"] = @benchmarkable HashIndices($r)
+    #s["OldHashIndices"] = @benchmarkable HashIndices($r)
 
     s = suite_n["build by insertion ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector (push!)"] = @benchmarkable build_set_by_insertion($n))
@@ -400,7 +401,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable build_ordered_set_by_insertion($n)
     n < cutoff && (s["Set"] = @benchmarkable build_indices_by_insertion($n))
     s["HashIndices"] = @benchmarkable build_hashindices_by_insertion($n)
-    s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
+    #s["OldHashIndices"] = @benchmarkable build_old_hashindices_by_insertion($n)
 
     s = suite_n["copy ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector (pop!)"] = @benchmarkable copy($vec))
@@ -408,7 +409,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable copy($ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable copy($indices))
     s["HashIndices"] = @benchmarkable copy($hash_indices)
-    s["OldHashIndices"] = @benchmarkable copy($old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable copy($old_hash_indices)
 
     s = suite_n["copy and empty by deletion ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector (pop!)"] = @benchmarkable empty_by_deletion(copy($vec), $n))
@@ -416,21 +417,21 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable empty_by_deletion(copy($ordered_set), $n)
     n < cutoff && (s["Indices"] = @benchmarkable empty_by_deletion(copy($indices), $n))
     s["HashIndices"] = @benchmarkable empty_by_deletion(copy($hash_indices), $n)
-    s["OldHashIndices"] = @benchmarkable empty_by_deletion(copy($old_hash_indices), $n)
+    #s["OldHashIndices"] = @benchmarkable empty_by_deletion(copy($old_hash_indices), $n)
 
     s = suite_n["insertion/deletion tests ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable basic_set_test($n)
     s["OrderedSet"] = @benchmarkable basic_ordered_set_test($n)
     n < cutoff && (s["Indices"] = @benchmarkable basic_indices_test($n))
     s["HashIndices"] = @benchmarkable basic_hash_indices_test($n)
-    s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
+    #s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
 
     s = suite_n["insertion/deletion tests ($n)"] = BenchmarkGroup()
     s["Set"] = @benchmarkable basic_set_test($n)
     s["OrderedSet"] = @benchmarkable basic_ordered_set_test($n)
     n < cutoff && (s["Indices"] = @benchmarkable basic_indices_test($n))
     s["HashIndices"] = @benchmarkable basic_hash_indices_test($n)
-    s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
+    #s["OldHashIndices"] = @benchmarkable basic_old_hash_indices_test($n)
 
     s = suite_n["unique/distinct (high uniqueness, unsorted) ($n)"] = BenchmarkGroup()
     s["Vector (unique)"] = @benchmarkable unique($mostly_unique)
@@ -462,7 +463,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable all_in($ordered_set, $n)
     n < cutoff && (s["Indices"] = @benchmarkable all_in($indices, $n))
     s["HashIndices"] = @benchmarkable all_in($hash_indices, $n)
-    s["OldHashIndices"] = @benchmarkable all_in($old_hash_indices, $n)
+    #s["OldHashIndices"] = @benchmarkable all_in($old_hash_indices, $n)
 
     s = suite_n["not in ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable not_in($vec, $n))
@@ -470,7 +471,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable not_in($ordered_set, $n)
     n < cutoff && (s["Indices"] = @benchmarkable not_in($indices, $n))
     s["HashIndices"] = @benchmarkable not_in($hash_indices, $n)
-    s["OldHashIndices"] = @benchmarkable not_in($old_hash_indices, $n)
+    #s["OldHashIndices"] = @benchmarkable not_in($old_hash_indices, $n)
 
     s = suite_n["count ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable count(iseven, $vec))
@@ -478,7 +479,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable count(iseven, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable count(iseven, $indices))
     s["HashIndices"] = @benchmarkable count(iseven, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable count(iseven, $old_hash_indices)
 
     s = suite_n["sum ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable sum($vec))
@@ -486,7 +487,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable sum($ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable sum($indices))
     s["HashIndices"] = @benchmarkable sum($hash_indices)
-    s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable sum($old_hash_indices)
 
     s = suite_n["foreach ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable foreachsum($vec))
@@ -494,7 +495,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable foreachsum($ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable foreachsum($indices))
     s["HashIndices"] = @benchmarkable foreachsum($hash_indices)
-    s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable foreachsum($old_hash_indices)
 
     s = suite_n["filter-map-reduce via generator ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable sum($(2x for x in vec if isodd(x))))
@@ -502,7 +503,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable sum($(2x for x in ordered_set if isodd(x)))
     n < cutoff && (s["Indices"] = @benchmarkable sum($(2x for x in indices if isodd(x))))
     s["HashIndices"] = @benchmarkable sum($(2x for x in hash_indices if isodd(x)))
-    s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
+    #s["OldHashIndices"] = @benchmarkable sum($(2x for x in old_hash_indices if isodd(x)))
 
     s = suite_n["filter (most) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter($pred1, $vec))
@@ -510,7 +511,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter($pred1, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable filter($pred1, $indices))
     s["HashIndices"] = @benchmarkable filter($pred1, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable filter($pred1, $old_hash_indices)
 
     s = suite_n["filter (half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter(iseven, $vec))
@@ -518,7 +519,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter(iseven, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable filter(iseven, $indices))
     s["HashIndices"] = @benchmarkable filter(iseven, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable filter(iseven, $old_hash_indices)
 
     s = suite_n["filter (few) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter($pred2, $vec))
@@ -526,7 +527,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter($pred2, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable filter($pred2, $indices))
     s["HashIndices"] = @benchmarkable filter($pred2, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable filter($pred2, $old_hash_indices)
 
     s = suite_n["copy and filter! (most) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter!($pred1, copy($vec)))
@@ -534,7 +535,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter!($pred1, copy($ordered_set))
     n < cutoff && (s["Indices"] = @benchmarkable filter!($pred1, copy($indices)))
     s["HashIndices"] = @benchmarkable filter!($pred1, copy($hash_indices))
-    s["OldHashIndices"] = @benchmarkable filter!($pred1, copy($old_hash_indices))
+    #s["OldHashIndices"] = @benchmarkable filter!($pred1, copy($old_hash_indices))
 
     s = suite_n["copy and filter! (half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter!(iseven, copy($vec)))
@@ -542,7 +543,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter!(iseven, copy($ordered_set))
     n < cutoff && (s["Indices"] = @benchmarkable filter!(iseven, copy($indices)))
     s["HashIndices"] = @benchmarkable filter!(iseven, copy($hash_indices))
-    s["OldHashIndices"] = @benchmarkable filter!(iseven, copy($old_hash_indices))
+    #s["OldHashIndices"] = @benchmarkable filter!(iseven, copy($old_hash_indices))
 
     s = suite_n["copy and filter! (few) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable filter!($pred2, copy($vec)))
@@ -550,7 +551,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable filter!($pred2, copy($ordered_set))
     n < cutoff && (s["Indices"] = @benchmarkable filter!($pred2, copy($indices)))
     s["HashIndices"] = @benchmarkable filter!($pred2, copy($hash_indices))
-    s["OldHashIndices"] = @benchmarkable filter!($pred2, copy($old_hash_indices))
+    #s["OldHashIndices"] = @benchmarkable filter!($pred2, copy($old_hash_indices))
 
     s = suite_n["union ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable union($even_vec, $odd_vec))
@@ -558,7 +559,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable union($even_ordered_set, $odd_ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable union($even_indices, $even_indices))
     s["HashIndices"] = @benchmarkable union($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable union($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["intersect (empty) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $odd_vec))
@@ -566,7 +567,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $odd_ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["intersect (half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable intersect($even_vec, $vec))
@@ -574,7 +575,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable intersect($even_ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable intersect($even_indices, $indices))
     s["HashIndices"] = @benchmarkable intersect($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable intersect($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["intersect (whole) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable intersect($vec, $vec))
@@ -582,7 +583,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable intersect($ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable intersect($indices, $indices))
     s["HashIndices"] = @benchmarkable intersect($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable intersect($old_hash_indices, $old_hash_indices)
 
     s = suite_n["setdiff (whole) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $odd_vec))
@@ -590,7 +591,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $odd_ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["setdiff (half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable setdiff($even_vec, $vec))
@@ -598,7 +599,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable setdiff($even_ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable setdiff($even_indices, $indices))
     s["HashIndices"] = @benchmarkable setdiff($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable setdiff($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["setdiff (empty) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable setdiff($vec, $vec))
@@ -606,7 +607,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable setdiff($ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable setdiff($indices, $indices))
     s["HashIndices"] = @benchmarkable setdiff($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable setdiff($old_hash_indices, $old_hash_indices)
 
     s = suite_n["symdiff (whole) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $odd_vec))
@@ -614,7 +615,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $odd_ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $odd_indices))
     s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["symdiff (left half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $odd_vec))
@@ -622,7 +623,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $odd_ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $odd_indices))
     s["HashIndices"] = @benchmarkable symdiff($hash_indices, $odd_hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $odd_old_hash_indices)
 
     s = suite_n["symdiff (right half) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable symdiff($even_vec, $vec))
@@ -630,7 +631,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable symdiff($even_ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable symdiff($even_indices, $indices))
     s["HashIndices"] = @benchmarkable symdiff($even_hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable symdiff($even_old_hash_indices, $old_hash_indices)
 
     s = suite_n["symdiff (empty) ($n)"] = BenchmarkGroup()
     n < cutoff && (s["Vector"] = @benchmarkable symdiff($vec, $vec))
@@ -638,7 +639,7 @@ for n in sizes
     s["OrderedSet"] = @benchmarkable symdiff($ordered_set, $ordered_set)
     n < cutoff && (s["Indices"] = @benchmarkable symdiff($indices, $indices))
     s["HashIndices"] = @benchmarkable symdiff($hash_indices, $hash_indices)
-    s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
+    #s["OldHashIndices"] = @benchmarkable symdiff($old_hash_indices, $old_hash_indices)
 end
 
 end  # module
diff --git a/contrib/DenseHashDictionary.jl b/contrib/DenseHashDictionaries.jl
similarity index 100%
rename from contrib/DenseHashDictionary.jl
rename to contrib/DenseHashDictionaries.jl
diff --git a/src/OldHashIndices.jl b/contrib/OldHashDictionaries.jl
similarity index 56%
rename from src/OldHashIndices.jl
rename to contrib/OldHashDictionaries.jl
index 4708104..b45521e 100644
--- a/src/OldHashIndices.jl
+++ b/contrib/OldHashDictionaries.jl
@@ -1,3 +1,10 @@
+module OldHashDictionaries
+
+using Dictionaries
+using Base: @propagate_inbounds
+
+export OldHashIndices, OldHashDictionary
+
 # These can be changed, to trade off better performance for space
 const global maxallowedprobe = 16
 const global maxprobeshift   = 6
@@ -67,8 +74,8 @@ Base.length(h::OldHashIndices) = h.count
 
 ## Token interface
 
-istokenizable(::OldHashIndices) = true
-tokentype(::OldHashIndices) = Int
+Dictionaries.istokenizable(::OldHashIndices) = true
+Dictionaries.tokentype(::OldHashIndices) = Int
 
 @propagate_inbounds isslotempty(h::OldHashIndices, i::Int) = h.slots[i] == 0x0
 @propagate_inbounds isslotfilled(h::OldHashIndices, i::Int) = h.slots[i] == 0x1
@@ -86,7 +93,7 @@ function skip_deleted(h::OldHashIndices, i)
     return i
 end
 
-@propagate_inbounds function iteratetoken(h::OldHashIndices{T}) where {T}
+@propagate_inbounds function Dictionaries.iteratetoken(h::OldHashIndices{T}) where {T}
     idx = skip_deleted(h, h.idxfloor)
     h.idxfloor = idx # An optimization to skip unnecessary elements when iterating multiple times
     
@@ -97,7 +104,7 @@ end
     end
 end
 
-@propagate_inbounds function iteratetoken(h::OldHashIndices{T}, idx::Int) where {T}
+@propagate_inbounds function Dictionaries.iteratetoken(h::OldHashIndices{T}, idx::Int) where {T}
     idx = skip_deleted(h, idx)
     
     if idx > length(h.inds)
@@ -113,7 +120,7 @@ function hashtoken(key, sz::Int)
     (((hash(key)%Int) & (sz-1)) + 1)::Int
 end
 
-function gettoken(h::OldHashIndices{T}, key::T) where {T}
+function Dictionaries.gettoken(h::OldHashIndices{T}, key::T) where {T}
     sz = length(h.inds)
     iter = 0
     maxprobe = h.maxprobe
@@ -136,13 +143,13 @@ function gettoken(h::OldHashIndices{T}, key::T) where {T}
 end
 
 # gettokenvalue
-@propagate_inbounds function gettokenvalue(h::OldHashIndices, token::Int)
+@propagate_inbounds function Dictionaries.gettokenvalue(h::OldHashIndices, token::Int)
     return h.inds[token]
 end
 
 
 # insertable interface
-isinsertable(::OldHashIndices) = true
+Dictionaries.isinsertable(::OldHashIndices) = true
 
 function Base.empty!(h::OldHashIndices{T}) where {T}
     fill!(h.slots, 0x0) # It should be OK to reduce this back to some smaller size.
@@ -155,7 +162,7 @@ function Base.empty!(h::OldHashIndices{T}) where {T}
     return h
 end
 
-function Base.rehash!(h::OldHashIndices, newsz::Int = length(h.inds))
+function rehash!(h::OldHashIndices, newsz::Int = length(h.inds))
     _rehash!(h, nothing, newsz)
     return h
 end
@@ -226,7 +233,7 @@ end
 
 
 
-function gettoken!(h::OldHashIndices{T}, key::T) where {T}
+function Dictionaries.gettoken!(h::OldHashIndices{T}, key::T) where {T}
     (token, _) = _gettoken!(h, nothing, key) # This will make sure a slot is available at `token` (or `-token` if it is new)
 
     if token < 0
@@ -316,7 +323,7 @@ end
 end
 
 
-function deletetoken!(h::OldHashIndices{T}, token::Int) where {T}
+function Dictionaries.deletetoken!(h::OldHashIndices{T}, token::Int) where {T}
     h.slots[token] = 0x2
     isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.inds, token-1)
     
@@ -330,3 +337,180 @@ Base.filter!(pred, h::OldHashIndices) = Base.unsafe_filter!(pred, h)
 
 # The default insertable indices
 Base.empty(d::OldHashIndices, ::Type{T}) where {T} = OldHashIndices{T}()
+
+
+mutable struct OldHashDictionary{I,T} <: AbstractDictionary{I, T}
+    indices::OldHashIndices{I}
+    values::Vector{T}
+
+    OldHashDictionary{I, T}(indices::OldHashIndices{I}, values::Vector{T}, ::Nothing) where {I, T} = new(indices, values)
+end
+
+"""
+    OldHashDictionary{I, T}()
+
+Construct an empty `OldHashDictionary` with index type `I` and element type `T`. This type of
+dictionary uses hashes for fast lookup and insertion, and is both mutable and insertable.
+(See `issettable` and `isinsertable`).
+"""
+function OldHashDictionary{I, T}(; sizehint::Int = 16) where {I, T}
+    indices = OldHashIndices{I}(; sizehint=sizehint)
+    OldHashDictionary{I, T}(indices, Vector{T}(undef, length(indices.slots)), nothing)
+end
+OldHashDictionary{I}() where {I} = OldHashDictionary{I, Any}()
+OldHashDictionary() = OldHashDictionary{Any}()
+
+"""
+    OldHashDictionary{I, T}(indices, undef::UndefInitializer)
+
+Construct a `OldHashDictionary` with index type `I` and element type `T`. The container is
+initialized with `keys` that match the values of `indices`, but the values are unintialized.
+"""
+function OldHashDictionary{I, T}(indices, ::UndefInitializer) where {I, T} 
+    return OldHashDictionary{I, T}(OldHashIndices{I}(indices), undef)
+end
+
+function OldHashDictionary{I, T}(h::OldHashIndices{I}, ::UndefInitializer) where {I, T}
+    return OldHashDictionary{I, T}(h, Vector{T}(undef, length(h.slots)), nothing)
+end
+
+function OldHashDictionary{I, T}(indices::OldHashIndices{I}, values) where {I, T}
+    vals = Vector{T}(undef, length(indices.slots))
+    d = OldHashDictionary{I, T}(indices, vals, nothing)
+
+    @inbounds for (i, v) in zip(tokens(indices), values)
+        vals[i] = v
+    end
+
+    return d
+end
+
+"""
+    OldHashDictionary(indices, values)
+    OldHashDictionary{I}(indices, values)
+    OldHashDictionary{I, T}(indices, values)
+
+Construct a `OldHashDictionary` with indices from `indices` and values from `values`, matched
+in iteration order.
+"""
+function OldHashDictionary{I, T}(indices, values) where {I, T}
+    iter_size = Base.IteratorSize(indices)
+    if iter_size isa Union{Base.HasLength, Base.HasShape}
+        d = OldHashDictionary{I, T}(; sizehint = length(indices)*2)
+    else
+        d = OldHashDictionary{I, T}()
+    end
+
+    for (i, v) in zip(indices, values)
+        insert!(d, i, v)
+    end
+
+    return d
+end
+function OldHashDictionary{I}(indices, values) where {I}
+    if Base.IteratorEltype(values) === Base.EltypeUnknown()
+        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
+        values = collect(values)
+    end
+
+    return OldHashDictionary{I, eltype(values)}(indices, values)
+end
+
+function OldHashDictionary(indices, values)
+    if Base.IteratorEltype(indices) === Base.EltypeUnknown()
+        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
+        indices = collect(indices)
+    end
+
+    return OldHashDictionary{eltype(indices)}(indices, values)
+end
+
+"""
+    OldHashDictionary(dict::AbstractDictionary)
+    OldHashDictionary{I}(dict::AbstractDictionary)
+    OldHashDictionary{I, T}(dict::AbstractDictionary)
+
+Construct a copy of `dict` with the same keys and values.
+(For copying an `AbstractDict` or other iterable of `Pair`s, see `dictionary`).
+"""
+OldHashDictionary(dict::AbstractDictionary) = OldHashDictionary(keys(dict), dict)
+OldHashDictionary{I}(dict::AbstractDictionary) where {I} = OldHashDictionary{I}(keys(dict), dict)
+OldHashDictionary{I, T}(dict::AbstractDictionary) where {I, T} = OldHashDictionary{I, T}(keys(dict), dict)
+
+## Implementation
+
+Base.keys(d::OldHashDictionary) = d.indices
+Dictionaries.isinsertable(d::OldHashDictionary) = true
+Dictionaries.issettable(d::OldHashDictionary) = true
+
+@propagate_inbounds function Dictionaries.gettoken(d::OldHashDictionary{I}, i::I) where {I}
+    return gettoken(keys(d), i)
+end
+
+@inline function Dictionaries.gettokenvalue(d::OldHashDictionary, token)
+    return @inbounds d.values[token]
+end
+
+function Dictionaries.istokenassigned(d::OldHashDictionary, token)
+    return isassigned(d.values, token)
+end
+
+@inline function Dictionaries.settokenvalue!(d::OldHashDictionary{I, T}, token, value::T) where {I, T}
+    @inbounds d.values[token] = value
+    return d
+end
+
+function Dictionaries.gettoken!(d::OldHashDictionary{T}, key::T) where {T}
+    indices = keys(d)
+    (token, values) = _gettoken!(indices, d.values, key)
+    if token < 0
+        (token, values) = _insert!(indices, values, key, -token)
+        d.values = values
+        return (false, token)
+    else
+        d.values = values
+        return (true, token)
+    end 
+end
+
+function Base.copy(d::OldHashDictionary{I, T}, ::Type{I}, ::Type{T}) where {I, T}
+    return OldHashDictionary{I, T}(d.indices, copy(d.values), nothing)
+end
+
+Dictionaries.tokenized(d::OldHashDictionary) = d.values
+
+function Base.empty!(d::OldHashDictionary)
+    empty!(d.indices)
+    empty!(d.values)
+    resize!(d.values, length(keys(d).slots))
+    return d
+end
+
+function Dictionaries.deletetoken!(d::OldHashDictionary{I, T}, token) where {I, T}
+    deletetoken!(keys(d), token)
+    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), d.values, token-1)
+    return d
+end
+
+function Base.sizehint!(d::OldHashDictionary, sz::Int)
+    d.values = _sizehint!(d.indices, d.values, sz)
+    return d
+end
+
+function Base.rehash!(d::OldHashDictionary, newsz::Int = length(d.indices))
+    _rehash!(d.indices, d.values, newsz)
+    return d
+end
+
+Base.filter!(pred, d::OldHashDictionary) = Base.unsafe_filter!(pred, d)
+
+# For `OldHashIndices` we don't copy the indices, we allow the `keys` to remain identical (`===`)
+function Base.similar(indices::OldHashIndices{I}, ::Type{T}) where {I, T}
+    return OldHashDictionary{I, T}(indices, undef)
+end
+
+function Base.empty(indices::OldHashIndices, ::Type{I}, ::Type{T}) where {I, T}
+    return OldHashDictionary{I, T}()
+end
+
+end # module
\ No newline at end of file
diff --git a/src/Dictionaries.jl b/src/Dictionaries.jl
index a0b0c64..92c978c 100644
--- a/src/Dictionaries.jl
+++ b/src/Dictionaries.jl
@@ -32,20 +32,16 @@ include("HashIndices.jl")
 include("HashDictionary.jl")
 include("MappedDictionary.jl")
 
-include("OldHashIndices.jl")
-include("OldHashDictionary.jl")
-
 end # module
 
 # # TODO
 #
 # * Improved printing - don't calculate length (beyond some cutoff) if it is `SizeUnknown` and limit=true, fix indentiation problems for wider values
-# * `hash` and `isless`
-# * TODO: have `delete!` return next element, `deletetoken!` return next token.
+# * TODO: have `delete!` return next key, `deletetoken!` return next token.
 #   For these kinds of algorithms, probably need: firstindex, firsttoken, nextind, prevind,
 #   nexttoken, prevtoken, lastindex, lasttoken.
 # * A surface interface for updates like https://github.com/JuliaLang/julia/pull/31367
-# * Soon we will have the concept of "ordered" indices/sets (sort-based dictionaries and
+# * More operations for "ordered" indices/sets (sort-based dictionaries and
 #   B-trees). We can probably formalize an interface around a trait here. Certain operations
 #   like slicing out an interval or performing a sort-merge co-iteration for `merge` become
 #   feasible.
diff --git a/src/OldHashDictionary.jl b/src/OldHashDictionary.jl
deleted file mode 100644
index dc39c85..0000000
--- a/src/OldHashDictionary.jl
+++ /dev/null
@@ -1,173 +0,0 @@
-mutable struct OldHashDictionary{I,T} <: AbstractDictionary{I, T}
-    indices::OldHashIndices{I}
-    values::Vector{T}
-
-    OldHashDictionary{I, T}(indices::OldHashIndices{I}, values::Vector{T}, ::Nothing) where {I, T} = new(indices, values)
-end
-
-"""
-    OldHashDictionary{I, T}()
-
-Construct an empty `OldHashDictionary` with index type `I` and element type `T`. This type of
-dictionary uses hashes for fast lookup and insertion, and is both mutable and insertable.
-(See `issettable` and `isinsertable`).
-"""
-function OldHashDictionary{I, T}(; sizehint::Int = 16) where {I, T}
-    indices = OldHashIndices{I}(; sizehint=sizehint)
-    OldHashDictionary{I, T}(indices, Vector{T}(undef, length(indices.slots)), nothing)
-end
-OldHashDictionary{I}() where {I} = OldHashDictionary{I, Any}()
-OldHashDictionary() = OldHashDictionary{Any}()
-
-"""
-    OldHashDictionary{I, T}(indices, undef::UndefInitializer)
-
-Construct a `OldHashDictionary` with index type `I` and element type `T`. The container is
-initialized with `keys` that match the values of `indices`, but the values are unintialized.
-"""
-function OldHashDictionary{I, T}(indices, ::UndefInitializer) where {I, T} 
-    return OldHashDictionary{I, T}(OldHashIndices{I}(indices), undef)
-end
-
-function OldHashDictionary{I, T}(h::OldHashIndices{I}, ::UndefInitializer) where {I, T}
-    return OldHashDictionary{I, T}(h, Vector{T}(undef, length(h.slots)), nothing)
-end
-
-function OldHashDictionary{I, T}(indices::OldHashIndices{I}, values) where {I, T}
-    vals = Vector{T}(undef, length(indices.slots))
-    d = OldHashDictionary{I, T}(indices, vals, nothing)
-
-    @inbounds for (i, v) in zip(tokens(indices), values)
-        vals[i] = v
-    end
-
-    return d
-end
-
-"""
-    OldHashDictionary(indices, values)
-    OldHashDictionary{I}(indices, values)
-    OldHashDictionary{I, T}(indices, values)
-
-Construct a `OldHashDictionary` with indices from `indices` and values from `values`, matched
-in iteration order.
-"""
-function OldHashDictionary{I, T}(indices, values) where {I, T}
-    iter_size = Base.IteratorSize(indices)
-    if iter_size isa Union{Base.HasLength, Base.HasShape}
-        d = OldHashDictionary{I, T}(; sizehint = length(indices)*2)
-    else
-        d = OldHashDictionary{I, T}()
-    end
-
-    for (i, v) in zip(indices, values)
-        insert!(d, i, v)
-    end
-
-    return d
-end
-function OldHashDictionary{I}(indices, values) where {I}
-    if Base.IteratorEltype(values) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
-        values = collect(values)
-    end
-
-    return OldHashDictionary{I, eltype(values)}(indices, values)
-end
-
-function OldHashDictionary(indices, values)
-    if Base.IteratorEltype(indices) === Base.EltypeUnknown()
-        # TODO: implement automatic widening from iterators of Base.EltypeUnkown
-        indices = collect(indices)
-    end
-
-    return OldHashDictionary{eltype(indices)}(indices, values)
-end
-
-"""
-    OldHashDictionary(dict::AbstractDictionary)
-    OldHashDictionary{I}(dict::AbstractDictionary)
-    OldHashDictionary{I, T}(dict::AbstractDictionary)
-
-Construct a copy of `dict` with the same keys and values.
-(For copying an `AbstractDict` or other iterable of `Pair`s, see `dictionary`).
-"""
-OldHashDictionary(dict::AbstractDictionary) = OldHashDictionary(keys(dict), dict)
-OldHashDictionary{I}(dict::AbstractDictionary) where {I} = OldHashDictionary{I}(keys(dict), dict)
-OldHashDictionary{I, T}(dict::AbstractDictionary) where {I, T} = OldHashDictionary{I, T}(keys(dict), dict)
-
-## Implementation
-
-Base.keys(d::OldHashDictionary) = d.indices
-isinsertable(d::OldHashDictionary) = true
-issettable(d::OldHashDictionary) = true
-
-@propagate_inbounds function gettoken(d::OldHashDictionary{I}, i::I) where {I}
-    return gettoken(keys(d), i)
-end
-
-@inline function gettokenvalue(d::OldHashDictionary, token)
-    return @inbounds d.values[token]
-end
-
-function istokenassigned(d::OldHashDictionary, token)
-    return isassigned(d.values, token)
-end
-
-@inline function settokenvalue!(d::OldHashDictionary{I, T}, token, value::T) where {I, T}
-    @inbounds d.values[token] = value
-    return d
-end
-
-function gettoken!(d::OldHashDictionary{T}, key::T) where {T}
-    indices = keys(d)
-    (token, values) = _gettoken!(indices, d.values, key)
-    if token < 0
-        (token, values) = _insert!(indices, values, key, -token)
-        d.values = values
-        return (false, token)
-    else
-        d.values = values
-        return (true, token)
-    end 
-end
-
-function Base.copy(d::OldHashDictionary{I, T}, ::Type{I}, ::Type{T}) where {I, T}
-    return OldHashDictionary{I, T}(d.indices, copy(d.values), nothing)
-end
-
-tokenized(d::OldHashDictionary) = d.values
-
-function Base.empty!(d::OldHashDictionary)
-    empty!(d.indices)
-    empty!(d.values)
-    resize!(d.values, length(keys(d).slots))
-    return d
-end
-
-function deletetoken!(d::OldHashDictionary{I, T}, token) where {I, T}
-    deletetoken!(keys(d), token)
-    isbitstype(T) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), d.values, token-1)
-    return d
-end
-
-function Base.sizehint!(d::OldHashDictionary, sz::Int)
-    d.values = _sizehint!(d.indices, d.values, sz)
-    return d
-end
-
-function Base.rehash!(d::OldHashDictionary, newsz::Int = length(d.indices))
-    _rehash!(d.indices, d.values, newsz)
-    return d
-end
-
-Base.filter!(pred, d::OldHashDictionary) = Base.unsafe_filter!(pred, d)
-
-# For `OldHashIndices` we don't copy the indices, we allow the `keys` to remain identical (`===`)
-function Base.similar(indices::OldHashIndices{I}, ::Type{T}) where {I, T}
-    return OldHashDictionary{I, T}(indices, undef)
-end
-
-function Base.empty(indices::OldHashIndices, ::Type{I}, ::Type{T}) where {I, T}
-    return OldHashDictionary{I, T}()
-end
\ No newline at end of file
diff --git a/test/group.jl b/test/group.jl
deleted file mode 100644
index 77f1e78..0000000
--- a/test/group.jl
+++ /dev/null
@@ -1,39 +0,0 @@
-@testset "group" begin
-    @test Dictionaries.group(identity, 11:20) == HashDictionary(11:20, (x->[x]).(11:20))
-    @test Dictionaries.group(iseven, 1:10) == dictionary(true => [2,4,6,8,10], false => [1,3,5,7,9])
-
-    @test Dictionaries.group(iseven, x -> x*2, 1:10) == dictionary(true => [4,8,12,16,20], false => [2,6,10,14,18])
-
-    @test Dictionaries.group((x,y) -> iseven(x+y), (x,y) -> x, 1:10, [1,3,4,2,5,6,4,2,3,9]) == dictionary(true => [1,4,5,6,8,9], false => [2,3,7,10])
-end
-
-#@testset "groupunique" begin
-    #@test Dictionaries.groupunique(identity, 11:20)::HashIndices{Int} == HashIndices(11:20)
-    #@test Dictionaries.groupunique(identity, 11:20)::HashIndices{Int} == HashIndices(11:20)
-#end
-#=
-@testset "groupinds" begin
-    @test Dictionaries.groupinds(identity, 11:20) == dictionary(Pair.(11:20, (x->[x]).(1:10)))
-    @test Dictionaries.groupinds(iseven, 11:20) == dictionary(true => [2,4,6,8,10], false => [1,3,5,7,9])
-end
-
-@testset "groupview" begin
-    @test Dictionaries.groupview(identity, 11:20)::Groups == group(identity, 11:20)::dictionary
-    @test Dictionaries.groupview(iseven, 11:20)::Groups == group(iseven, 11:20)::dictionary
-end
-=#
-@testset "groupreduce" begin
-    @test Dictionaries.groupreduce(identity, +, 1:10) == dictionary(Pair.(1:10, 1:10))
-    @test Dictionaries.groupreduce(iseven, +, 1:10) == dictionary(true => 30, false => 25)
-
-    @test Dictionaries.groupreduce(iseven, x -> x*2, +, 1:10) == dictionary(true => 60, false => 50)
-
-    @test Dictionaries.groupreduce(iseven, x -> x*2, +, 1:10; init=10) == dictionary(true => 70, false => 60)
-
-    @test Dictionaries.groupreduce((x,y) -> iseven(x+y), (x,y) -> x+y, +, 1:10, 1:10; init=10) == dictionary(true => 120)
-    @test Dictionaries.groupreduce((x,y) -> iseven(x+y), (x,y) -> x+y, +, 1:10, [1,3,4,2,5,6,4,2,3,9]; init=10) == dictionary(true => 62, false => 52)
-
-    @test Dictionaries.groupcount(iseven, 1:10) == dictionary(true => 5, false => 5)
-    @test Dictionaries.groupsum(iseven, 1:10) == dictionary(true => 30, false => 25)
-    @test Dictionaries.groupprod(iseven, 1:10) == dictionary(true => 2*4*6*8*10, false => 1*3*5*7*9)
-end
\ No newline at end of file

From 5bc53c53ed9bfa72001044762da3c38e2c119f41 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 14:54:50 +1000
Subject: [PATCH 14/20] More tests

---
 LICENSE.md                |  2 +-
 README.md                 |  2 +-
 src/AbstractDictionary.jl | 42 ++++++-------------------
 src/HashDictionary.jl     |  8 ++---
 src/insertion.jl          |  2 +-
 test/HashDictionary.jl    | 55 ++++++++++++++++++++++++++++----
 test/HashIndices.jl       | 66 +++++++++++++++++++++++++++++++++++++--
 test/filter.jl            | 22 ++++++++-----
 8 files changed, 142 insertions(+), 57 deletions(-)

diff --git a/LICENSE.md b/LICENSE.md
index ecd7077..68713a4 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
 The Dictionaries.jl package is licensed under the MIT "Expat" License:
 
-> Copyright (c) 2018-2019: Andy Ferris.
+> Copyright (c) 2018-2020: Andy Ferris.
 >
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 4e81282..4a75cde 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 ![Test Status](https://github.com/andyferris/Dictionaries.jl/workflows/Test/badge.svg)
 [![Codecov](https://codecov.io/gh/andyferris/Dictionaries.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/andyferris/Dictionaries.jl)
 
-This package is still quite young - new features are being added and some (low-level) interfaces may be tweaked in the future, but things should be stable enough for general usage. Contributions welcome - please submit an issue or PR!
+This package is somewhat young - new features are being added and some (low-level) interfaces may be tweaked in the future, but things should be stable enough for general usage. Contributions welcome - please submit an issue or PR!
 
 ## Motivation
 
diff --git a/src/AbstractDictionary.jl b/src/AbstractDictionary.jl
index 15a0bab..8617c8e 100644
--- a/src/AbstractDictionary.jl
+++ b/src/AbstractDictionary.jl
@@ -276,38 +276,6 @@ function _distinct(f, ::Type{T}, itr) where T
     return out
 end
 
-# An auto-widening AbstractDictionary constructor
-function __distinct(f, dict, itr, s)
-    I = keytype(dict)
-    T = eltype(dict)
-    tmp = iterate(itr, s)
-    while tmp !== nothing
-        (x, s) = tmp
-        i = f(x)
-        if !(i isa I)
-            new_inds = copy(keys(dict), promote_type(I, typeof(i)))
-            new_dict = similar(new_inds, promote_type(T, typeof(x)))
-            (hadtoken, token) = gettoken!(new_dict, i)
-            if !hadtoken
-                @inbounds settokenvalue!(new_dict, token, x)
-            end
-            return __distinct(f, new_dict, itr, s)
-        elseif !(x isa T)
-            new_dict = copy(dict, promote_type(T, typeof(x)))
-            (hadtoken, token) = gettoken!(new_dict, i)
-            if !hadtoken
-                @inbounds settokenvalue!(new_dict, token, x)
-            end
-            return __distinct(f, new_dict, itr, s)
-        end
-        (hadtoken, token) = gettoken!(dict, i)
-        if !hadtoken
-            @inbounds settokenvalue!(dict, token, x)
-        end
-        tmp = iterate(itr, s)
-    end
-    return dict
-end
 
 ### Settable interface
 
@@ -362,7 +330,7 @@ end
     similar(d::AbstractDictionary, [T=eltype(d)])
 
 Construct a new `issettable` dictionary with identical `keys` as `d` and an element type of
-`T`. The initial values are unitialized/undefined.
+`T`. The initial values are3unitialized/undefined.
 """
 Base.similar(d::AbstractDictionary) = similar(keys(d), eltype(d))
 Base.similar(d::AbstractDictionary, ::Type{T}) where {T} = similar(keys(d), T)
@@ -371,6 +339,14 @@ function Base.similar(indices::AbstractIndices{I}, ::Type{T}) where {I, T}
     return similar(convert(HashIndices{I}, indices), T)
 end
 
+function Base.merge(d1::AbstractDictionary, d2::AbstractDictionary)
+    # Note: need to copy the keys
+    out = similar(copy(keys(d1)), eltype(d1))
+    copyto!(out, d1)
+    merge!(out, d2)
+    return out
+end
+
 # fill! and fill
 
 function Base.fill!(d::AbstractDictionary, value)
diff --git a/src/HashDictionary.jl b/src/HashDictionary.jl
index ce86caf..3f61c4f 100644
--- a/src/HashDictionary.jl
+++ b/src/HashDictionary.jl
@@ -293,8 +293,8 @@ end
 
 function Base.filter!(pred, dict::HashDictionary)
     indices = keys(dict)
-    _filter!(i -> pred(@inbounds dict.values[i]), keys(indices.values), indices.values, indices.hashes, (dict.values,))
-    indices.deleted = 0
+    _filter!(i -> pred(@inbounds dict.values[i]), indices.values, indices.hashes, (dict.values,))
+    indices.holes = 0
     newsize = Base._tablesz(3*length(indices.values) >> 0x01)
     rehash!(indices, newsize, (dict.values,))
     return dict
@@ -303,8 +303,8 @@ end
 function Base.filter!(pred, dict::PairDictionary{<:Any, <:Any, <:HashDictionary})
     d = dict.d
     indices = keys(d)
-    _filter!(i -> pred(@inbounds indices.values[i] => d.values[i]), keys(indices.values), indices.values, indices.hashes, (d.values,))
-    indices.deleted = 0
+    _filter!(i -> pred(@inbounds indices.values[i] => d.values[i]), indices.values, indices.hashes, (d.values,))
+    indices.holes = 0
     newsize = Base._tablesz(3*length(indices.values) >> 0x01)
     rehash!(indices, newsize, (d.values,))
     return dict
diff --git a/src/insertion.jl b/src/insertion.jl
index ffe9724..ca6f3af 100644
--- a/src/insertion.jl
+++ b/src/insertion.jl
@@ -475,4 +475,4 @@ Base.empty(d::AbstractDictionary) = empty(keys(d), keytype(d), eltype(d))
 
 Base.empty(d::AbstractDictionary, ::Type{I}) where {I} = empty(keys(d), I)
 
-Base.empty(::AbstractIndices, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()
+Base.empty(::AbstractDictionary, ::Type{I}, ::Type{T}) where {I, T} = HashDictionary{I, T}()
diff --git a/test/HashDictionary.jl b/test/HashDictionary.jl
index 5d45eba..702ebc9 100644
--- a/test/HashDictionary.jl
+++ b/test/HashDictionary.jl
@@ -31,7 +31,7 @@
     @test_throws IndexError d[10] = 11
     @test_throws IndexError delete!(d, 10)
 
-    insert!(d, 10, 11)
+    insert!(d, 10.0, 11.0)
 
     @test d[10] == 11
     @test get(d, 10, 15) == 11
@@ -77,11 +77,11 @@
     @test cmp(fill(0, copy(keys(d))), d) == -1
     @test cmp(d, fill(0, copy(keys(d)))) == 1
     @test_throws IndexError insert!(d, 10, 12)
-    @test d[10] == 11
-    set!(d, 10, 12)
+    @test d[10.0] == 11
+    set!(d, 10.0, 12.0)
     @test length(d) == 1
     @test d[10] == 12
-    d[10] = 13
+    d[10.0] = 13.0
     @test length(d) == 1
     @test d[10] == 13
     io = IOBuffer(); print(io, d); @test String(take!(io)) == "{10 │ 13}"
@@ -90,10 +90,10 @@
     @test isequal(d, copy(d))
     @test isempty(empty(d))
 
-    delete!(d, 10)
+    delete!(d, 10.0)
     @test isequal(d, HashDictionary{Int64, Int64}())
 
-    @test get!(d, 10, 14) == 14
+    @test get!(d, 10, 14.0) == 14
     @test d[10] == 14
     delete!(d, 10)
     
@@ -105,6 +105,29 @@
     @test all(in(i, keys(d)) == iseven(i) for i in 2:2:1000)
     @test isempty(empty!(d))
    
+    @test get!(() -> 15, d, 10) == 15
+    @test get!(() -> 16, d, 10) == 15
+
+    d = HashDictionary([:a, :b], [1, 2])
+    d2 = HashDictionary((a=1, b=2))
+    @test isequal(d, d2)
+    d3 = dictionary([:a=>1, :b=>2])
+    @test isequal(d, d3)
+    d4 = dictionary(zip([:a, :b], [1, 2]))
+    @test isequal(d, d4)
+    @test !isless(d, d4)
+    @test !isless(d4, d)
+    @test hash(d) == hash(d4)
+
+    @test isequal(merge(d, d), d)
+    @test isequal(merge(d, d2), d)
+    
+    @test isequal(merge(d, HashDictionary([:c], [3])), HashDictionary([:a, :b, :c], [1, 2, 3]))
+    @test isequal(merge(d, HashDictionary([:b, :c], [4, 3])), HashDictionary([:a, :b, :c], [1, 4, 3]))
+
+    @test isequal(index(first, ["Alice", "Bob", "Charlie"]), HashDictionary(['A', 'B', 'C'], ["Alice", "Bob", "Charlie"]))
+    @test isequal(index(first, ["Alice", "Bob", "Charlie", "Conner"]), HashDictionary(['A', 'B', 'C'], ["Alice", "Bob", "Charlie"]))
+
     # TODO token interface
 
     @testset "Dict tests from Base" begin
@@ -182,4 +205,24 @@
         @test isequal(index(first, ["Alice", "Bob", "Charlie"]), res)
         @test isequal(index(first, ["Alice", "Bob", "Charlie", "Conner"]), res)
     end
+
+    @testset "Factories" begin
+        d = HashDictionary(['a','b','c'], [1,2,3])
+        @test similar(d) isa HashDictionary{Char, Int}
+        @test similar(d, Float64) isa HashDictionary{Char, Float64}
+        @test sharetokens(d, similar(d))
+
+        @test isempty(empty(d)::HashDictionary{Char, Int})
+        @test isempty(empty(d, Float64)::HashIndices{Float64})
+        @test isempty(empty(d, String, Float64)::HashDictionary{String, Float64})
+
+        @test isequal(zeros(d)::HashDictionary{Char, Float64}, HashDictionary(['a','b','c'],[0.0,0.0,0.0]))
+        @test isequal(zeros(Int64, d)::HashDictionary{Char, Int64}, HashDictionary(['a','b','c'],[0,0,0]))
+
+        @test isequal(ones(d)::HashDictionary{Char, Float64}, HashDictionary(['a','b','c'],[1.0,1.0,1.0]))
+        @test isequal(ones(Int64, d)::HashDictionary{Char, Int64}, HashDictionary(['a','b','c'],[1,1,1]))
+
+        @test isequal(keys(rand(1:10, d)::HashDictionary{Char, Int}), HashIndices(['a','b','c']))
+        @test isequal(keys(randn(d)::HashDictionary{Char, Float64}), HashIndices(['a','b','c']))
+    end
 end
\ No newline at end of file
diff --git a/test/HashIndices.jl b/test/HashIndices.jl
index c99bdc2..de28fa5 100644
--- a/test/HashIndices.jl
+++ b/test/HashIndices.jl
@@ -16,7 +16,7 @@
     io = IOBuffer(); show(io, MIME"text/plain"(), h); @test String(take!(io)) == "0-element HashIndices{Int64}"
     @test_throws IndexError delete!(h, 10)
 
-    insert!(h, 10)
+    insert!(h, 10.0)
 
     @test length(h) == 1
     @test keys(h) === h
@@ -25,7 +25,7 @@
     @test h == copy(h)
     @test !isempty(h)
     @test isequal(copy(h), h)
-    @test h[10] == 10
+    @test h[10.0] == 10
     @test_throws IndexError insert!(h, 10)
     @test length(set!(h, 10)) == 1
     @test_throws IndexError insert!(h, 10)
@@ -35,7 +35,7 @@
     @test isequal(h, copy(h))
     @test isempty(empty(h))
 
-    delete!(h, 10)
+    delete!(h, 10.0)
 
     @test isequal(h, HashIndices{Int64}())
 
@@ -46,6 +46,39 @@
     @test all(in(i, h) == iseven(i) for i in 2:1000)
     @test isempty(empty!(h))
 
+    # set
+    @test length(set!(h, 1)) == 1
+    @test length(set!(h, 2, 2)) == 2
+    @test length(set!(h, 3.0, 3.0)) == 3
+    @test_throws ErrorException set!(h, 4, 5)
+
+    @testset "Comparison" begin
+        i1 = HashIndices([1,2,3])
+        i2 = HashIndices([1,2])
+        i3 = HashIndices([1,2,3,4])
+        i4 = HashIndices([3,2,1])
+
+        @test isequal(i1, i1)
+        @test hash(i1) == hash(copy(i1))
+
+        @test isless(i2, i1)
+        @test !isless(i1, i2)
+        @test !isequal(i1, i2)
+        @test hash(i1) != hash(i2)
+
+        @test isless(i2, i1)
+        @test !isless(i1, i2)
+        @test !isequal(i1, i2)
+
+        @test isless(i1, i3)
+        @test !isless(i3, i1)
+        @test !isequal(i1, i3)
+
+        @test isless(i1, i4)
+        @test !isless(i4, i1)
+        @test !isequal(i1, i4)
+    end
+
     @testset "Adapated from Dict tests from Base" begin
         h = HashIndices{Int}()
         N = 10000
@@ -101,5 +134,32 @@
         @test isequal(distinct([1,2,3,1]), res)
         @test isequal(distinct([1,2,3]), res)
     end
+
+    @testset "set logic" begin
+        i1 = HashIndices([1,2])
+        i2 = HashIndices([2,3])
+        i3 = HashIndices([3,4])
+        i4 = HashIndices([2])
+
+        @test !issetequal(i1, i2)
+        @test !(i1 ⊆ i2)
+        @test i4 ⊆ i1
+
+        @test !disjoint(i1, i2)
+        @test disjoint(i1, i3)
+        
+        @test isequal(union(i1, i2), HashIndices([1,2,3]))
+        @test isequal(union(i2, i1), HashIndices([2,3,1]))
+        @test isequal(union(i1, i3), HashIndices([1,2,3,4]))
+
+        @test isequal(intersect(i1, i2), HashIndices([2]))
+        @test isequal(intersect(i1, i3), HashIndices([]))
+
+        @test isequal(setdiff(i1, i2), HashIndices([1]))
+        @test isequal(setdiff(i1, i3), HashIndices([1, 2]))
+
+        @test isequal(symdiff(i1, i2), HashIndices([1, 3]))
+        @test isequal(symdiff(i1, i3), HashIndices([1, 2, 3, 4]))
+    end
     # TODO: token interface
 end
\ No newline at end of file
diff --git a/test/filter.jl b/test/filter.jl
index ae70b40..e4b899d 100644
--- a/test/filter.jl
+++ b/test/filter.jl
@@ -1,17 +1,23 @@
 @testset "filter" begin
     i = HashIndices([1,2,3,4,5])
 
-    @test issetequal(filter(iseven, i)::HashIndices, [2, 4])
-    @test issetequal(filter(isodd, i)::HashIndices, [1, 3, 5])
+    @test isequal(filter(iseven, i)::HashIndices, HashIndices([2, 4]))
+    @test isequal(filter(isodd, i)::HashIndices, HashIndices([1, 3, 5]))
  
-    @test issetequal(filterview(iseven, i)::Dictionaries.FilteredIndices, [2, 4])
-    @test issetequal(filterview(isodd, i)::Dictionaries.FilteredIndices, [1, 3, 5])
+    @test isequal(filterview(iseven, i)::Dictionaries.FilteredIndices, HashIndices([2, 4]))
+    @test isequal(filterview(isodd, i)::Dictionaries.FilteredIndices, HashIndices([1, 3, 5]))
+
+    filter!(iseven, i)
+    @test isequal(i, HashIndices([2, 4]))
 
     d = HashDictionary([1,2,3,4,5], [1,3,2,4,5])
 
-    @test issetequal(pairs(filter(iseven, d)::HashDictionary), [3=>2, 4=>4])
-    @test issetequal(pairs(filter(isodd, d)::HashDictionary), [1=>1, 2=>3, 5=>5])
+    @test isequal(filter(iseven, d)::HashDictionary, dictionary([3=>2, 4=>4]))
+    @test isequal(filter(isodd, d)::HashDictionary, dictionary([1=>1, 2=>3, 5=>5]))
+
+    @test isequal(filterview(iseven, d)::Dictionaries.FilteredDictionary, dictionary([3=>2, 4=>4]))
+    @test isequal(filterview(isodd, d)::Dictionaries.FilteredDictionary, dictionary([1=>1, 2=>3, 5=>5]))
 
-    @test issetequal(pairs(filterview(iseven, d)::Dictionaries.FilteredDictionary), [3=>2, 4=>4])
-    @test issetequal(pairs(filterview(isodd, d)::Dictionaries.FilteredDictionary), [1=>1, 2=>3, 5=>5])
+    filter!(iseven, d)
+    @test isequal(d, HashDictionary([3,4],[2,4]))
 end
\ No newline at end of file

From bdc07ed0ec2afde3ea0e22b7f8d0fa8073e7a0bc Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 15:22:57 +1000
Subject: [PATCH 15/20] Forbid IteratorSize of HasShape for Indices

Fixes #20
---
 src/AbstractIndices.jl | 2 +-
 src/Indices.jl         | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/AbstractIndices.jl b/src/AbstractIndices.jl
index 08db8e6..27f5236 100644
--- a/src/AbstractIndices.jl
+++ b/src/AbstractIndices.jl
@@ -63,7 +63,7 @@ function Base.in(i, indices::AbstractIndices{I}) where I
 end
 
 # Match the default setting from Base - the majority of containers will know their size
-Base.IteratorSize(indices::AbstractIndices) = Base.HasLength() 
+Base.IteratorSize(::AbstractIndices) = Base.HasLength() 
 
 function Base.length(indices::AbstractIndices)
     if Base.IteratorSize(indices) isa Base.SizeUnknown
diff --git a/src/Indices.jl b/src/Indices.jl
index 6ae91ae..0025b5f 100644
--- a/src/Indices.jl
+++ b/src/Indices.jl
@@ -26,7 +26,13 @@ Indices{I}(iter) where {I} = Indices{I, typeof(iter)}(iter) # There is a corner
 end
 
 Base.in(i::I, inds::Indices{I}) where {I} = in(i, inds.inds)
-Base.IteratorSize(i::Indices) = Base.IteratorSize(i.inds)
+function Base.IteratorSize(i::Indices)
+    out = Base.IteratorSize(i.inds)
+    if out isa Base.HasShape
+        return Base.HasLength()
+    end
+    return out
+end
 Base.length(i::Indices) = length(i.inds)
 
 # Specialize for `Vector` elements. Satisfy the tokenization and insertion interface

From a9a98667e22660ae2bd12ac8f8f5b249d5af7518 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 15:31:32 +1000
Subject: [PATCH 16/20] Fix merge / mergewith

Closes #18
---
 src/AbstractDictionary.jl | 10 +++++++++
 src/insertion.jl          | 43 +++++++++++----------------------------
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/src/AbstractDictionary.jl b/src/AbstractDictionary.jl
index 8617c8e..ab254cf 100644
--- a/src/AbstractDictionary.jl
+++ b/src/AbstractDictionary.jl
@@ -347,6 +347,16 @@ function Base.merge(d1::AbstractDictionary, d2::AbstractDictionary)
     return out
 end
 
+if isdefined(Base, :mergewith) # Julia 1.5+
+    function Base.mergewith(combner, d1::AbstractDictionary, d2::AbstractDictionary)
+        # Note: need to copy the keys
+        out = similar(copy(keys(d1)), eltype(d1))
+        copyto!(out, d1)
+        mergewith!(combner, out, d2)
+        return out
+    end
+end
+
 # fill! and fill
 
 function Base.fill!(d::AbstractDictionary, value)
diff --git a/src/insertion.jl b/src/insertion.jl
index ca6f3af..ee7752e 100644
--- a/src/insertion.jl
+++ b/src/insertion.jl
@@ -323,44 +323,25 @@ function unset!(d::AbstractDictionary{I}, i::I) where {I}
 end
 
 ### Non-scalar insertion/deletion
-
-Base.merge!(d::AbstractDictionary, ds::AbstractDictionary...) = merge!(last, d, ds...)
-
-function Base.merge!(combiner::Callable, d::AbstractDictionary, d2::AbstractDictionary)
-    for (i, v) in pairs(d2)
-        (hasindex, token) = gettoken!(d, i)
-        if hasindex
-            @inbounds settokenvalue!(d, token, combiner(gettokenvalue(d, token), v))
-        else
-            @inbounds settokenvalue!(d, token, v)
-        end
-    end
-    return d
-end
-
-# TODO `last` is incorrect, it should be `latter(x,y) = y`
-function Base.merge!(::typeof(last), d::AbstractDictionary, d2::AbstractDictionary)
+function Base.merge!(d::AbstractDictionary, d2::AbstractDictionary)
     for (i, v) in pairs(d2)
         set!(d, i, v)
     end
     return d
 end
 
-# TODO `first` is incorrect, it should be `former(x,y) = y`
-function Base.merge!(::typeof(first), d::AbstractDictionary, d2::AbstractDictionary)
-    for (i, v) in pairs(d2)
-        get!(d, i, v)
+if isdefined(Base, :mergewith) # Julia 1.5+
+    function Base.mergewith!(combiner::Callable, d::AbstractDictionary, d2::AbstractDictionary)
+        for (i, v) in pairs(d2)
+            (hasindex, token) = gettoken!(d, i)
+            if hasindex
+                @inbounds settokenvalue!(d, token, combiner(gettokenvalue(d, token), v))
+            else
+                @inbounds settokenvalue!(d, token, v)
+            end
+        end
+        return d
     end
-    return d
-end
-
-function Base.merge!(combiner::Callable, d::AbstractDictionary, d2::AbstractDictionary, ds::AbstractDictionary...)
-    merge!(combiner, merge!(combiner, d, d2), ds...)
-end
-
-function Base.merge!(combiner::Callable, d::AbstractIndices, d2::AbstractIndices)
-    # Hopefully no-one provides a bad combiner
-    union!(d, d2)
 end
 
 # TODO some kind of exclusive merge (throw on key clash like `insert!`)

From f891d95238e3cf8bfcb0cd1625d3655846f68cc3 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 16:06:22 +1000
Subject: [PATCH 17/20] Finalize == semantics

Fixes #11
---
 src/AbstractDictionary.jl | 76 ++++++++++++++++++++++++++++++---------
 src/AbstractIndices.jl    | 24 ++++++++++---
 src/Dictionaries.jl       |  2 +-
 test/HashDictionary.jl    | 17 +++++++++
 test/HashIndices.jl       |  6 ++++
 5 files changed, 103 insertions(+), 22 deletions(-)

diff --git a/src/AbstractDictionary.jl b/src/AbstractDictionary.jl
index ab254cf..9117d7d 100644
--- a/src/AbstractDictionary.jl
+++ b/src/AbstractDictionary.jl
@@ -94,41 +94,85 @@ function Base.isequal(d1::AbstractDictionary, d2::AbstractDictionary)
     return true
 end
 
-# `==` doesn't care about the iteration order. Keys must be `isequal` and values `==`
+# The indices must be isequal and the values ==, same ordering
 function Base.:(==)(d1::AbstractDictionary, d2::AbstractDictionary)
-    if d1 === d2
-        return true
-    end
+    out = true
 
     if sharetokens(d1, d2)
         @inbounds for t in tokens(d1)
-            if gettokenvalue(d1, t) != gettokenvalue(d2, t)
+            out &= gettokenvalue(d1, t) == gettokenvalue(d2, t)
+            if out === false
                 return false
             end
         end
-        return true
+        return out
     end
 
     if length(d1) != length(d2)
         return false
     end
 
-    if istokenizable(d2)
-        for (i,v) in pairs(d1)
-            (hastoken, token) = gettoken(d2, i)
-            if !hastoken || v != gettokenvalue(d2, token)
-                return false
-            end
+    for ((i1,x1), (i2,x2)) in zip(pairs(d1), pairs(d2))
+        if !isequal(i1, i2)
+            return false
         end
-    else
-        for (i,v) in pairs(d1)
-            if !haskey(d2, i) || v != d2[i]
+        out &= x1 == x2 # make sure it works for `missing`
+        if out === false
+            return false
+        end
+    end
+
+    return out
+end
+
+"""
+    isdictequal(d1, d2)
+
+Determine if two dictionaries are equivalent. Dictionaries `d1` and `d2` are equivalent if
+`issetequal(keys(d1), keys(d2))` and for each key `i`, `d1[i] == d2[i]`.
+
+Example
+
+```julia
+julia> isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','a'],[2,1]))
+true
+
+julia> isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','a'],[2,3]))
+false
+
+julia> isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','a'],[2,missing]))
+missing
+```
+"""
+function isdictequal(d1::AbstractDictionary, d2::AbstractDictionary)
+    out = true
+
+    if sharetokens(d1, d2)
+        @inbounds for t in tokens(d1)
+            out &= gettokenvalue(d1, t) == gettokenvalue(d2, t)
+            if out === false
                 return false
             end
         end
+        return out
     end
 
-    return true
+    if length(d1) != length(d2)
+        return false
+    end
+
+    for (i,x1) in pairs(d1)
+        (hastoken, t) = gettoken(d2, i)
+        if !hastoken
+            return false
+        end
+        out &= x1 == gettokenvalue(d2, t) # make sure it works for `missing`
+        if out === false
+            return false
+        end
+    end
+
+    return out
 end
 
 # Lexical ordering based on iteration (of pairs - lesser key takes priority over lesser value, as implmeneted in `cmp(::Pair)`)
diff --git a/src/AbstractIndices.jl b/src/AbstractIndices.jl
index 27f5236..eba4cce 100644
--- a/src/AbstractIndices.jl
+++ b/src/AbstractIndices.jl
@@ -159,23 +159,37 @@ function Base.isequal(i1::AbstractIndices, i2::AbstractIndices)
     return true
 end
 
-# Use `issetequal` semantics
+# The indices must be isequal and the values ==, same ordering
 function Base.:(==)(i1::AbstractIndices, i2::AbstractIndices)
+    out = true
+
     if sharetokens(i1, i2)
-        return true
+        # TODO - can we get rid of this loop for reflexive == element types?
+        @inbounds for t in tokens(i1)
+            # make sure it works for `missing`
+            out &= gettokenvalue(i1, t) == gettokenvalue(i2, t)
+            if out === false
+                return false
+            end
+        end
+        return out
     end
 
     if length(i1) != length(i2)
         return false
     end
 
-    for i in i1
-        if !(i in i2)
+    for (j1, j2) in zip(i1, i2)
+        if !isequal(j1, j2)
+            return false
+        end
+        out &= j1 == j2 # make sure it works for `missing`
+        if out === false
             return false
         end
     end
 
-    return true
+    return out
 end
 
 # Lexical ordering based on iteration
diff --git a/src/Dictionaries.jl b/src/Dictionaries.jl
index 92c978c..bb8117a 100644
--- a/src/Dictionaries.jl
+++ b/src/Dictionaries.jl
@@ -8,7 +8,7 @@ export getindices, setindices!
 
 export AbstractDictionary, AbstractIndices, IndexError, Indices, HashIndices, HashDictionary, Dictionary, MappedDictionary, DictionaryView, FilteredDictionary, FilteredIndices, BroadcastedDictionary
 
-export dictionary, index, distinct, disjoint, filterview
+export dictionary, index, distinct, disjoint, isdictequal, filterview
 export issettable, isinsertable, set!, unset!
 export istokenizable, tokentype, tokens, tokenized, gettoken, gettokenvalue, istokenassigned, settokenvalue!, gettoken!, deletetoken!, sharetokens
 
diff --git a/test/HashDictionary.jl b/test/HashDictionary.jl
index 702ebc9..e384c4d 100644
--- a/test/HashDictionary.jl
+++ b/test/HashDictionary.jl
@@ -109,8 +109,12 @@
     @test get!(() -> 16, d, 10) == 15
 
     d = HashDictionary([:a, :b], [1, 2])
+    @test isequal(d, d)
+    @test d == d
+    @test !isless(d, d)
     d2 = HashDictionary((a=1, b=2))
     @test isequal(d, d2)
+    @test d == d
     d3 = dictionary([:a=>1, :b=>2])
     @test isequal(d, d3)
     d4 = dictionary(zip([:a, :b], [1, 2]))
@@ -119,6 +123,19 @@
     @test !isless(d4, d)
     @test hash(d) == hash(d4)
 
+    @test isdictequal(d, copy(d))
+    @test isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','a'],[2,1]))
+    @test !isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','c'],[2,1]))
+    @test !isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['a','b','c'],[1,2,3]))
+    @test !isdictequal(HashDictionary(['a','b'],[1,2]), HashDictionary(['b','a'],[2,3]))
+    
+    d5 = HashDictionary(['a','b'],[1,missing])
+    @test isdictequal(d5, d5) === missing
+    @test (d5 == d5) === missing
+    d6 = HashDictionary(['a','b'],[1,missing])
+    @test isdictequal(d5, d5) === missing
+    @test (d5 == d5) === missing
+
     @test isequal(merge(d, d), d)
     @test isequal(merge(d, d2), d)
     
diff --git a/test/HashIndices.jl b/test/HashIndices.jl
index de28fa5..ad219da 100644
--- a/test/HashIndices.jl
+++ b/test/HashIndices.jl
@@ -77,6 +77,12 @@
         @test isless(i1, i4)
         @test !isless(i4, i1)
         @test !isequal(i1, i4)
+
+        i5 = HashIndices([1,2,missing])
+        @test isequal(i5, i5)
+        @test !isless(i5, i5)
+        @test (i5 == i5) === missing
+        @test (i5 == HashIndices([1,2,missing])) === missing
     end
 
     @testset "Adapated from Dict tests from Base" begin

From 456b90ad41f6ee0607923a04a6b17618678cdc9e Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 16:34:23 +1000
Subject: [PATCH 18/20] Auto-convert settokenvalue!

---
 src/tokens.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tokens.jl b/src/tokens.jl
index 1b6686d..d1a5f3c 100644
--- a/src/tokens.jl
+++ b/src/tokens.jl
@@ -198,7 +198,11 @@ end
     return isassigned(d, token)
 end
 
-@propagate_inbounds function settokenvalue!(d::AbstractDictionary, i, value)
+@propagate_inbounds function settokenvalue!(d::AbstractDictionary{<:Any,T}, t, value::T) where {T}
+    return settokenvalue!(d, t, convert(T, value))
+end
+
+@propagate_inbounds function settokenvalue!(d::AbstractDictionary{<:Any,T}, i, value::T) where {T}
     if !issettable(d)
         error("Cannot mutate values of dictionary: $(typeof(d))")
     end

From ba4bd052123076d6cfb763a7fb784c363892b728 Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 16:35:08 +1000
Subject: [PATCH 19/20] Small cleanup

---
 src/tokens.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tokens.jl b/src/tokens.jl
index d1a5f3c..50c3beb 100644
--- a/src/tokens.jl
+++ b/src/tokens.jl
@@ -226,6 +226,6 @@ performed quickly (e.g. O(1) rather than O(N)). Return `false` otherwise.
 
 Note: the test may not be precise, this defaults to `tokens(dict1) === tokens(dict2)`.
 """
-sharetokens(i1::AbstractIndices, i2::AbstractIndices) = istokenizable(i1) && istokenizable(i2) && i1 === i2
+sharetokens(i1::AbstractIndices, i2::AbstractIndices) = istokenizable(i1) && i1 === i2
 sharetokens(d1, d2) = sharetokens(keys(d1), keys(d2))
 sharetokens(d1, d2, ds...) = sharetokens(d1, d2) && sharetokens(d1, ds...)

From 0d34363b6220102271e7a636fd7032e4441462ee Mon Sep 17 00:00:00 2001
From: Andy Ferris <ferris.andy@gmail.com>
Date: Thu, 11 Jun 2020 16:44:41 +1000
Subject: [PATCH 20/20] Fix settokenvalue!

---
 src/tokens.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tokens.jl b/src/tokens.jl
index 50c3beb..e36c60c 100644
--- a/src/tokens.jl
+++ b/src/tokens.jl
@@ -198,7 +198,7 @@ end
     return isassigned(d, token)
 end
 
-@propagate_inbounds function settokenvalue!(d::AbstractDictionary{<:Any,T}, t, value::T) where {T}
+@propagate_inbounds function settokenvalue!(d::AbstractDictionary{<:Any,T}, t, value) where {T}
     return settokenvalue!(d, t, convert(T, value))
 end