Skip to content

Commit

Permalink
Use Base.Ordering for heap, and other performance changes
Browse files Browse the repository at this point in the history
  • Loading branch information
milesfrain committed Oct 25, 2019
1 parent bd57ace commit beae028
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 219 deletions.
29 changes: 12 additions & 17 deletions benchmark/bench_heap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@ heaptypes = [BinaryHeap, MutableBinaryHeap]
aexps = [1,3]
datatypes = [Int, Float64]
baseorderings = Dict(
"Min" => DataStructures.LessThan,
#"Max" => DataStructures.GreaterThan,
"Min" => Base.Forward,
#"Max" => Base.Reverse,
)
fastfloatorderings = Dict(
# These will be enabled upon reordering change
#"FastMin" => DataStructures.FasterForward(),
#"FastMax" => DataStructures.FasterReverse(),
"Min" => DataStructures.FasterForward(),
"Max" => DataStructures.FasterReverse(),
)

for heap in heaptypes
Expand All @@ -41,7 +40,8 @@ for heap in heaptypes
Random.seed!(0)
a = rand(dt, 10^aexp)

orderings = baseorderings
# Dict types to force use of abstract type if containing single value
orderings = Dict{String, Base.Ordering}(baseorderings)
if dt == Float64
# swap to faster ordering operation
for (k,v) in orderings
Expand All @@ -56,34 +56,29 @@ for heap in heaptypes
prepath = [string(heap)]
postpath = [string(dt), "10^"*string(aexp), ord_str]
suite[vcat(prepath, ["make"], postpath)] =
@benchmarkable $(heap){$dt,$ord}($a)
@benchmarkable $(heap)($a, $ord)
suite[vcat(prepath, ["push"], postpath)] =
@benchmarkable push_heap(h, $a) setup=(h=$(heap){$dt,$ord}())
@benchmarkable push_heap(h, $a) setup=(h=$(heap)($dt, $ord))
suite[vcat(prepath, ["pop"], postpath)] =
@benchmarkable pop_heap(h) setup=(h=$(heap){$dt,$ord}($a))
@benchmarkable pop_heap(h) setup=(h=$(heap)($a, $ord))
end
end
end
end

# Quick check to ensure no Float regressions with Min/Max convenience functions
# These don't fit in well with the above loop, since ordering is hardcoded.
heapalias = Dict(
"BinaryMinHeap" => BinaryMinHeap,
"BinaryMaxHeap" => BinaryMaxHeap,
"BinaryMinMaxHeap" => BinaryMinMaxHeap, # <- no alias issue
)
for (heapname, heap) in heapalias
for heap in [BinaryMinHeap, BinaryMaxHeap, BinaryMinMaxHeap]
for aexp in aexps
for dt in [Float64]
Random.seed!(0)
a = rand(dt, 10^aexp)
prepath = [heapname]
prepath = [string(heap)]
postpath = [string(dt), "10^"*string(aexp)]
suite[vcat(prepath, ["make"], postpath)] =
@benchmarkable $(heap)($a)
suite[vcat(prepath, ["push"], postpath)] =
@benchmarkable push_heap(h, $a) setup=(h=$(heap){$dt}())
@benchmarkable push_heap(h, $a) setup=(h=$(heap)($dt))
suite[vcat(prepath, ["pop"], postpath)] =
@benchmarkable pop_heap(h) setup=(h=$(heap)($a))
end
Expand Down
46 changes: 17 additions & 29 deletions src/heaps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,28 +55,22 @@ abstract type AbstractMutableHeap{VT,HT} <: AbstractHeap{VT} end

abstract type AbstractMinMaxHeap{VT} <: AbstractHeap{VT} end

# comparer

struct LessThan
end

struct GreaterThan
end

compare(c::LessThan, x, y) = x < y
compare(c::GreaterThan, x, y) = x > y

# heap implementations

include("heaps/binary_heap.jl")
include("heaps/mutable_binary_heap.jl")
include("heaps/arrays_as_heaps.jl")
include("heaps/minmax_heap.jl")

# generic functions

Base.eltype(::Type{<:AbstractHeap{T}}) where T = T

#=
Note that extract_all and extract_all_rev are slower than
sorting the array of values in-place.
Leaving these function here for use in testing.
=#

function extract_all!(h::AbstractHeap{VT}) where VT
n = length(h)
r = Vector{VT}(undef, n)
Expand All @@ -97,30 +91,24 @@ end

# Array functions using heaps

function nextreme(comp::Comp, n::Int, arr::AbstractVector{T}) where {T, Comp}
function nextreme(ord::Base.Ordering, n::Int, arr::AbstractVector{T}) where T
if n <= 0
return T[] # sort(arr)[1:n] returns [] for n <= 0
elseif n >= length(arr)
return sort(arr, lt = (x, y) -> compare(comp, y, x))
return sort(arr, order = Base.ReverseOrdering(ord))
end

buffer = BinaryHeap{T,Comp}()

for i = 1 : n
@inbounds xi = arr[i]
push!(buffer, xi)
end
buffer = heapify(arr[1:n], ord)

for i = n + 1 : length(arr)
@inbounds xi = arr[i]
if compare(comp, top(buffer), xi)
# This could use a pushpop method
pop!(buffer)
push!(buffer, xi)
if Base.lt(ord, buffer[1], xi)
buffer[1] = xi
percolate_down!(buffer, 1, ord)
end
end

return extract_all_rev!(buffer)
return sort!(buffer, order = Base.ReverseOrdering(ord))
end

"""
Expand All @@ -130,8 +118,8 @@ Return the `n` largest elements of the array `arr`.
Equivalent to `sort(arr, lt = >)[1:min(n, end)]`
"""
function nlargest(n::Int, arr::AbstractVector{T}) where T
return nextreme(LessThan(), n, arr)
function nlargest(n::Int, arr::AbstractVector)
return nextreme(FasterForward(), n, arr)
end

"""
Expand All @@ -141,6 +129,6 @@ Return the `n` smallest elements of the array `arr`.
Equivalent to `sort(arr, lt = <)[1:min(n, end)]`
"""
function nsmallest(n::Int, arr::AbstractVector{T}) where T
return nextreme(GreaterThan(), n, arr)
function nsmallest(n::Int, arr::AbstractVector)
return nextreme(FasterReverse(), n, arr)
end
7 changes: 4 additions & 3 deletions src/heaps/arrays_as_heaps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function percolate_up!(xs::AbstractArray, i::Integer, x=xs[i], o::Ordering=Forwa
xs[i] = x
end

percolate_up!(xs::AbstractArray{T}, i::Integer, o::Ordering) where {T} = percolate_up!(xs, i, xs[i], o)
percolate_up!(xs::AbstractArray, i::Integer, o::Ordering) = percolate_up!(xs, i, xs[i], o)

"""
heappop!(v, [ord])
Expand All @@ -69,12 +69,12 @@ For efficiency, this function does not check that the array is indeed heap-order
"""
function heappush!(xs::AbstractArray, x, o::Ordering=Forward)
push!(xs, x)
percolate_up!(xs, length(xs), x, o)
percolate_up!(xs, length(xs), o)
xs
end


# Turn an arbitrary array into a binary min-heap in linear time.
# Turn an arbitrary array into a binary min-heap (by default) in linear time.
"""
heapify!(v, ord::Ordering=Forward)
Expand Down Expand Up @@ -111,6 +111,7 @@ julia> heapify(a, Base.Order.Reverse)
2
```
"""
# Todo, benchmarking shows copy(xs) outperforms copyto!(similar(xs), xs) for 10^6 Float64
heapify(xs::AbstractArray, o::Ordering=Forward) = heapify!(copyto!(similar(xs), xs), o)

"""
Expand Down
142 changes: 32 additions & 110 deletions src/heaps/binary_heap.jl
Original file line number Diff line number Diff line change
@@ -1,124 +1,41 @@
# Binary heap (non-mutable)

#################################################
#
# core implementation
#
#################################################

function _heap_bubble_up!(comp::Comp, valtree::Array{T}, i::Int) where {Comp,T}
i0::Int = i
@inbounds v = valtree[i]

while i > 1 # nd is not root
p = i >> 1
@inbounds vp = valtree[p]

if compare(comp, v, vp)
# move parent downward
@inbounds valtree[i] = vp
i = p
else
break
end
end

if i != i0
@inbounds valtree[i] = v
end
end

function _heap_bubble_down!(comp::Comp, valtree::Array{T}, i::Int) where {Comp,T}
@inbounds v::T = valtree[i]
swapped = true
n = length(valtree)
last_parent = n >> 1

while swapped && i <= last_parent
lc = i << 1
if lc < n # contains both left and right children
rc = lc + 1
@inbounds lv = valtree[lc]
@inbounds rv = valtree[rc]
if compare(comp, rv, lv)
if compare(comp, rv, v)
@inbounds valtree[i] = rv
i = rc
else
swapped = false
end
else
if compare(comp, lv, v)
@inbounds valtree[i] = lv
i = lc
else
swapped = false
end
end
else # contains only left child
@inbounds lv = valtree[lc]
if compare(comp, lv, v)
@inbounds valtree[i] = lv
i = lc
else
swapped = false
end
end
end

valtree[i] = v
end


function _binary_heap_pop!(comp::Comp, valtree::Array{T}) where {Comp,T}
# extract root
v = valtree[1]

if length(valtree) == 1
empty!(valtree)
else
valtree[1] = pop!(valtree)
if length(valtree) > 1
_heap_bubble_down!(comp, valtree, 1)
end
end
v
end


function _make_binary_heap(comp::Comp, ty::Type{T}, xs) where {Comp,T}
n = length(xs)
valtree = copy(xs)
for i = 2 : n
_heap_bubble_up!(comp, valtree, i)
end
valtree
end

include("arrays_as_heaps.jl")

#################################################
#
# heap type and constructors
#
#################################################

mutable struct BinaryHeap{T,Comp} <: AbstractHeap{T}
comparer::Comp
#=
These structs may be substituted by Base.Forward and Base.Reverse,
but float comparison will be 2x slower to preserve ordering with NAN values.
=#
struct FasterForward <: Base.Ordering end
struct FasterReverse <: Base.Ordering end
Base.lt(o::FasterForward, a, b) = a < b
Base.lt(o::FasterReverse, a, b) = a > b

mutable struct BinaryHeap{T, O <: Base.Ordering} <: AbstractHeap{T}
valtree::Vector{T}
ordering::O

BinaryHeap{T,Comp}() where {T,Comp} = new{T,Comp}(Comp(), Vector{T}())
# min heap by default
function BinaryHeap(::Type{T}, ordering::O = FasterForward()) where {T,O}
new{T,O}(Vector{T}(), ordering)
end

function BinaryHeap{T,Comp}(xs::AbstractVector{T}) where {T,Comp}
valtree = _make_binary_heap(Comp(), T, xs)
new{T,Comp}(Comp(), valtree)
function BinaryHeap(xs::AbstractVector{T}, ordering::O = FasterForward()) where {T,O}
valtree = heapify(xs, ordering)
new{T,O}(valtree, ordering)
end
end

const BinaryMinHeap{T} = BinaryHeap{T, LessThan}
const BinaryMaxHeap{T} = BinaryHeap{T, GreaterThan}

BinaryMinHeap(xs::AbstractVector{T}) where T = BinaryMinHeap{T}(xs)
BinaryMaxHeap(xs::AbstractVector{T}) where T = BinaryMaxHeap{T}(xs)
BinaryMinHeap(xs::AbstractVector) = BinaryHeap(xs, FasterForward())
BinaryMaxHeap(xs::AbstractVector) = BinaryHeap(xs, FasterReverse())
BinaryMinHeap(::Type{T}) where T = BinaryHeap(T, FasterForward())
BinaryMaxHeap(::Type{T}) where T = BinaryHeap(T, FasterReverse())


#################################################
Expand All @@ -127,14 +44,14 @@ BinaryMaxHeap(xs::AbstractVector{T}) where T = BinaryMaxHeap{T}(xs)
#
#################################################

# Todo document and reorder these

length(h::BinaryHeap) = length(h.valtree)

isempty(h::BinaryHeap) = isempty(h.valtree)

function push!(h::BinaryHeap, v)
valtree = h.valtree
push!(valtree, v)
_heap_bubble_up!(h.comparer, valtree, length(valtree))
heappush!(h.valtree, v, h.ordering)
h
end

Expand All @@ -150,4 +67,9 @@ Returns the element at the top of the heap `h`.
"""
@inline top(h::BinaryHeap) = h.valtree[1]

pop!(h::BinaryHeap{T}) where {T} = _binary_heap_pop!(h.comparer, h.valtree)
"""
pop(h::BinaryHeap)
Removes and returns the element at the top of the heap `h`.
"""
pop!(h::BinaryHeap) = heappop!(h.valtree, h.ordering)
4 changes: 2 additions & 2 deletions src/heaps/minmax_heap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
mutable struct BinaryMinMaxHeap{T} <: AbstractMinMaxHeap{T}
valtree::Vector{T}

BinaryMinMaxHeap{T}() where {T} = new{T}(Vector{T}())
BinaryMinMaxHeap(::Type{T}) where T = new{T}(Vector{T}())

function BinaryMinMaxHeap(xs::AbstractVector{T}) where {T}
function BinaryMinMaxHeap(xs::AbstractVector{T}) where T
valtree = _make_binary_minmax_heap(xs)
new{T}(valtree)
end
Expand Down
Loading

0 comments on commit beae028

Please sign in to comment.