Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Base.Ordering for heap, and other performance improvements #547

Merged
merged 5 commits into from
Aug 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ os:
- linux
- osx
julia:
- 0.7
- 1.0
- nightly
notifications:
Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.0-DEV"

[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"

[compat]
Compat = "3.0.0"
OrderedCollections = "1.1.0"
julia = "1"

Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
environment:
matrix:
- julia_version: 0.7
- julia_version: 1.0
- julia_version: latest

platform:
Expand Down
44 changes: 14 additions & 30 deletions benchmark/bench_heap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@ heaptypes = [BinaryHeap, MutableBinaryHeap]
aexps = [1,3]
datatypes = [Int, Float64]
baseorderings = Dict(
"Min" => DataStructures.LessThan,
#"Max" => DataStructures.GreaterThan,
"Min" => Base.ForwardOrdering,
#"Max" => Base.ReverseOrdering,
)
fastfloatorderings = Dict(
# These will be enabled upon reordering change
#"FastMin" => DataStructures.FasterForward(),
#"FastMax" => DataStructures.FasterReverse(),
"Min" => DataStructures.FasterForward,
"Max" => DataStructures.FasterReverse,
)

for heap in heaptypes
Expand All @@ -41,7 +40,8 @@ for heap in heaptypes
Random.seed!(0)
a = rand(dt, 10^aexp)

orderings = baseorderings
# Dict types to force use of abstract type if containing single value
orderings = Dict{String, DataType}(baseorderings)
if dt == Float64
# swap to faster ordering operation
for (k,v) in orderings
Expand All @@ -66,38 +66,22 @@ for heap in heaptypes
end
end

# Quick check to ensure no Float regressions with Min/Max convenience functions
# These don't fit in well with the above loop, since ordering is hardcoded.
heapalias = Dict(
"BinaryMinHeap" => BinaryMinHeap,
"BinaryMaxHeap" => BinaryMaxHeap,
"BinaryMinMaxHeap" => BinaryMinMaxHeap, # <- no alias issue
)
for (heapname, heap) in heapalias
for aexp in aexps
for dt in [Float64]
Random.seed!(0)
a = rand(dt, 10^aexp)
prepath = [heapname]
postpath = [string(dt), "10^"*string(aexp)]
suite[vcat(prepath, ["make"], postpath)] =
@benchmarkable $(heap)($a)
suite[vcat(prepath, ["push"], postpath)] =
@benchmarkable push_heap(h, $a) setup=(h=$(heap){$dt}())
suite[vcat(prepath, ["pop"], postpath)] =
@benchmarkable pop_heap(h) setup=(h=$(heap)($a))
end
end
end
fast_extreme_orderings = Dict(
nsmallest => DataStructures.FasterForward(),
nlargest => DataStructures.FasterReverse(),
)
oxinabox marked this conversation as resolved.
Show resolved Hide resolved

for func in [nlargest, nsmallest]
fastord = fast_extreme_orderings[func]
for aexp in [4]
Random.seed!(0);
a = rand(10^aexp);
for nexp in [2]
n = 10^nexp
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
suite[["Slow " * string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
@benchmarkable $(func)($n, $a)
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
@benchmarkable DataStructures.nextreme($fastord, $n, $a)
end
end
end
Expand Down
42 changes: 34 additions & 8 deletions docs/src/heaps.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,32 @@ All heaps in this package are derived from `AbstractHeap`, and provide
the following interface:

```julia
# Let h be a heap, i be a handle, and v be a value.
# Let `h` be a heap, `v` be a value, and `n` be an integer size

length(h) # returns the number of elements
length(h) # returns the number of elements

isempty(h) # returns whether the heap is empty
isempty(h) # returns whether the heap is empty

push!(h, v) # add a value to the heap
push!(h, v) # add a value to the heap

first(h) # return the first (top) value of a heap
first(h) # return the first (top) value of a heap

pop!(h) # removes the first (top) value, and returns it
pop!(h) # removes the first (top) value, and returns it

extract_all!(h) # removes all elements and returns sorted array

extract_all_rev!(h) # removes all elements and returns reverse sorted array

sizehint!(h, n) # reserve capacity for at least `n` elements
```

Mutable heaps (values can be changed after being pushed to a heap) are
derived from `AbstractMutableHeap <: AbstractHeap`, and additionally
provides the following interface:

```julia
# Let `h` be a heap, `i` be a handle, and `v` be a value.

i = push!(h, v) # adds a value to the heap and and returns a handle to v

update!(h, i, v) # updates the value of an element (referred to by the handle i)
Expand Down Expand Up @@ -54,6 +61,21 @@ h = MutableBinaryMinHeap([1,4,3,2])
h = MutableBinaryMaxHeap([1,4,3,2]) # create a mutable min/max heap from a vector
```

Heaps may be constructed with a custom ordering. One use case for custom orderings
is to achieve faster performance with `Float` elements with the risk of random ordering
if any elements are `NaN`. The provided `DataStructures.FasterForward` and
`DataStructures.FasterReverse` orderings are optimized for this purpose.
Custom orderings may also be used for defining the order of structs as heap elements.
```julia
h = BinaryHeap{Float64, DataStructures.FasterForward}() # faster min heap
h = BinaryHeap{Float64, DataStructures.FasterReverse}() # faster max heap

h = MutableBinaryHeap{Float64, DataStructures.FasterForward}() # faster mutable min heap
h = MutableBinaryHeap{Float64, DataStructures.FasterReverse}() # faster mutable max heap

h = BinaryHeap{MyStruct, MyStructOrdering}() # heap containing custom struct
```

## Min-max heaps
Min-max heaps maintain the minimum _and_ the maximum of a set,
allowing both to be retrieved in constant (`O(1)`) time.
Expand Down Expand Up @@ -97,5 +119,9 @@ nlargest(3, [0,21,-12,68,-25,14]) # => [68,21,14]
nsmallest(3, [0,21,-12,68,-25,14]) # => [-25,-12,0]
```

`nlargest(n, a)` is equivalent to `sort(a, lt = >)[1:min(n, end)]`, and
`nsmallest(n, a)` is equivalent to `sort(a, lt = <)[1:min(n, end)]`.
Note that if the array contains floats and is free of NaN values,
then the following alternatives may be used to achieve a 2x performance boost.
```
DataStructures.nextreme(DataStructures.FasterReverse(), n, a) # faster nlargest(n, a)
DataStructures.nextreme(DataStructures.FasterForward(), n, a) # faster nsmallest(n, a)
```
3 changes: 2 additions & 1 deletion src/DataStructures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module DataStructures
zero, checkbounds


using Compat # Provides Base.Order.ReverseOrdering(). May remove this line with julia 1.4
using OrderedCollections
import OrderedCollections: filter, filter!, isordered
export OrderedDict, OrderedSet, LittleDict
Expand All @@ -30,7 +31,7 @@ module DataStructures
export IntDisjointSets, DisjointSets, num_groups, find_root!, in_same_set, root_union!
export FenwickTree, length, inc!, dec!, incdec!, prefixsum

export AbstractHeap, compare, extract_all!
export AbstractHeap, compare, extract_all!, extract_all_rev!
export BinaryHeap, BinaryMinHeap, BinaryMaxHeap, nlargest, nsmallest
export MutableBinaryHeap, MutableBinaryMinHeap, MutableBinaryMaxHeap
export heapify!, heapify, heappop!, heappush!, isheap
Expand Down
83 changes: 51 additions & 32 deletions src/heaps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#
# - sizehint!(h, s) set size hint to a heap
#
# - first(h) return the first (top) value of a heap
# - first(h) return the first (top) value of a heap
#
# - pop!(h) removes the first (top) value, and
# returns it
Expand All @@ -37,7 +37,7 @@
# - update!(h, i, v) updates the value of an element
# (referred to by the handle i)
#
# - delete!(h, i) deletes the node with
# - delete!(h, i) deletes the node with
# handle i from the heap
#
# - top_with_handle(h) return the top value of a heap
Expand All @@ -55,28 +55,24 @@ abstract type AbstractMutableHeap{VT,HT} <: AbstractHeap{VT} end

abstract type AbstractMinMaxHeap{VT} <: AbstractHeap{VT} end

# comparer

struct LessThan
end

struct GreaterThan
end

compare(c::LessThan, x, y) = x < y
compare(c::GreaterThan, x, y) = x > y

# heap implementations

include("heaps/binary_heap.jl")
include("heaps/mutable_binary_heap.jl")
include("heaps/arrays_as_heaps.jl")
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
include("heaps/minmax_heap.jl")

# generic functions

Base.eltype(::Type{<:AbstractHeap{T}}) where T = T

"""
extract_all!(h)

Return an array of heap elements in sorted order (heap head at first index).

Note that for simple heaps (not mutable or minmax)
sorting the internal array of elements in-place is faster.
"""
function extract_all!(h::AbstractHeap{VT}) where VT
n = length(h)
r = Vector{VT}(undef, n)
Expand All @@ -86,6 +82,14 @@ function extract_all!(h::AbstractHeap{VT}) where VT
return r
end

"""
extract_all_rev!(h)

Return an array of heap elements in reverse sorted order (heap head at last index).

Note that for simple heaps (not mutable or minmax)
sorting the internal array of elements in-place is faster.
"""
function extract_all_rev!(h::AbstractHeap{VT}) where VT
n = length(h)
r = Vector{VT}(undef, n)
Expand All @@ -97,50 +101,65 @@ end

# Array functions using heaps

function nextreme(comp::Comp, n::Int, arr::AbstractVector{T}) where {T, Comp}
"""
nextreme(ord, n, arr)

return an array of the first `n` values of `arr` sorted by `ord`.
"""
function nextreme(ord::Base.Ordering, n::Int, arr::AbstractVector{T}) where T
if n <= 0
return T[] # sort(arr)[1:n] returns [] for n <= 0
elseif n >= length(arr)
return sort(arr, lt = (x, y) -> compare(comp, y, x))
return sort(arr, order = ord)
end

buffer = BinaryHeap{T,Comp}()
rev = Base.ReverseOrdering(ord)

for i = 1 : n
@inbounds xi = arr[i]
push!(buffer, xi)
end
buffer = heapify(arr[1:n], rev)

for i = n + 1 : length(arr)
@inbounds xi = arr[i]
if compare(comp, first(buffer), xi)
# This could use a pushpop method
pop!(buffer)
push!(buffer, xi)
if Base.lt(rev, buffer[1], xi)
buffer[1] = xi
percolate_down!(buffer, 1, rev)
end
end

return extract_all_rev!(buffer)
return sort!(buffer, order = ord)
end

"""
nlargest(n, arr)

Return the `n` largest elements of the array `arr`.

Equivalent to `sort(arr, lt = >)[1:min(n, end)]`
Equivalent to:
sort(arr, order = Base.Reverse)[1:min(n, end)]

Note that if `arr` contains floats and is free of NaN values,
then the following alternative may be used to achieve 2x performance.
DataStructures.nextreme(DataStructures.FasterReverse(), n, arr)
This faster version is equivalent to:
sort(arr, lt = >)[1:min(n, end)]
"""
function nlargest(n::Int, arr::AbstractVector{T}) where T
return nextreme(LessThan(), n, arr)
function nlargest(n::Int, arr::AbstractVector)
return nextreme(Base.Reverse, n, arr)
end

"""
nsmallest(n, arr)

Return the `n` smallest elements of the array `arr`.

Equivalent to `sort(arr, lt = <)[1:min(n, end)]`
Equivalent to:
sort(arr, order = Base.Forward)[1:min(n, end)]

Note that if `arr` contains floats and is free of NaN values,
then the following alternative may be used to achieve 2x performance.
DataStructures.nextreme(DataStructures.FasterForward(), n, arr)
This faster version is equivalent to:
sort(arr, lt = <)[1:min(n, end)]
"""
function nsmallest(n::Int, arr::AbstractVector{T}) where T
return nextreme(GreaterThan(), n, arr)
function nsmallest(n::Int, arr::AbstractVector)
return nextreme(Base.Forward, n, arr)
end
7 changes: 4 additions & 3 deletions src/heaps/arrays_as_heaps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function percolate_up!(xs::AbstractArray, i::Integer, x=xs[i], o::Ordering=Forwa
xs[i] = x
end

percolate_up!(xs::AbstractArray{T}, i::Integer, o::Ordering) where {T} = percolate_up!(xs, i, xs[i], o)
percolate_up!(xs::AbstractArray, i::Integer, o::Ordering) = percolate_up!(xs, i, xs[i], o)

"""
heappop!(v, [ord])
Expand All @@ -69,12 +69,12 @@ For efficiency, this function does not check that the array is indeed heap-order
"""
function heappush!(xs::AbstractArray, x, o::Ordering=Forward)
push!(xs, x)
percolate_up!(xs, length(xs), x, o)
percolate_up!(xs, length(xs), o)
return xs
end


# Turn an arbitrary array into a binary min-heap in linear time.
# Turn an arbitrary array into a binary min-heap (by default) in linear time.
"""
heapify!(v, ord::Ordering=Forward)

Expand Down Expand Up @@ -111,6 +111,7 @@ julia> heapify(a, Base.Order.Reverse)
2
```
"""
# Todo, benchmarking shows copy(xs) outperforms copyto!(similar(xs), xs) for 10^6 Float64
heapify(xs::AbstractArray, o::Ordering=Forward) = heapify!(copyto!(similar(xs), xs), o)

"""
Expand Down
Loading