From 3b6221fa07505eac363028d46cb5120ed9bad629 Mon Sep 17 00:00:00 2001 From: Kevin Squire Date: Tue, 14 Jul 2015 23:25:41 -0700 Subject: [PATCH] Update quicksort algorithms * Break out select_pivot!, partition!, functions * Share these among different quicksort variants * Switch select to use PartialQuickSort --- base/sort.jl | 210 ++++++++++++++++++++------------------------ doc/stdlib/sort.rst | 13 ++- 2 files changed, 105 insertions(+), 118 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 6d4832aa2863e..87938fda9f360 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -58,73 +58,17 @@ issorted(itr; lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) = issorted(itr, ord(lt,by,rev,order)) -function select!(v::AbstractVector, k::Int, lo::Int, hi::Int, o::Ordering) - lo <= k <= hi || throw(ArgumentError("select index $k is out of range $lo:$hi")) - @inbounds while lo < hi - if hi-lo == 1 - if lt(o, v[hi], v[lo]) - v[lo], v[hi] = v[hi], v[lo] - end - return v[k] - end - pivot = v[(lo+hi)>>>1] - i, j = lo, hi - while true - while lt(o, v[i], pivot); i += 1; end - while lt(o, pivot, v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 - end - if k <= j - hi = j - elseif i <= k - lo = i - else - return pivot - end - end - return v[lo] +function select!(v::AbstractVector, k::Union{Int,OrdinalRange}, o::Ordering) + sort!(v, 1, length(v), PartialQuickSort(k), o) + v[k] end - -function select!(v::AbstractVector, r::OrdinalRange, lo::Int, hi::Int, o::Ordering) - isempty(r) && (return v[r]) - a, b = extrema(r) - lo <= a <= b <= hi || throw(ArgumentError("selection $r is out of range $lo:$hi")) - @inbounds while true - if lo == a && hi == b - sort!(v, lo, hi, DEFAULT_UNSTABLE, o) - return v[r] - end - pivot = v[(lo+hi)>>>1] - i, j = lo, hi - while true - while lt(o, v[i], pivot); i += 1; end - while lt(o, pivot, v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 - end - if b <= j - hi = j - elseif i <= a - lo = i - else - a <= j && select!(v, a, lo, j, o) - b >= i && select!(v, b, i, hi, o) - sort!(v, a, b, DEFAULT_UNSTABLE, o) - return v[r] - end - end -end - -select!(v::AbstractVector, k::Union{Int,OrdinalRange}, o::Ordering) = select!(v,k,1,length(v),o) select!(v::AbstractVector, k::Union{Int,OrdinalRange}; lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) = select!(v, k, ord(lt,by,rev,order)) select(v::AbstractVector, k::Union{Int,OrdinalRange}; kws...) = select!(copy(v), k; kws...) + # reference on sorted binary search: # http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary @@ -250,13 +194,10 @@ abstract Algorithm immutable InsertionSortAlg <: Algorithm end immutable QuickSortAlg <: Algorithm end immutable MergeSortAlg <: Algorithm end -immutable PartialQuickSort <: Algorithm - k::Int -end - -# partially sort until the end of the range -PartialQuickSort(r::OrdinalRange) = PartialQuickSort(last(r)) +immutable PartialQuickSort{T <: Union(Int,OrdinalRange)} <: Algorithm + k::T +end const InsertionSort = InsertionSortAlg() const QuickSort = QuickSortAlg() @@ -284,10 +225,20 @@ function sort!(v::AbstractVector, lo::Int, hi::Int, ::InsertionSortAlg, o::Order return v end -function sort!(v::AbstractVector, lo::Int, hi::Int, a::QuickSortAlg, o::Ordering) - @inbounds while lo < hi - hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) +# selectpivot! +# +# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and +# choose the middle value as a pivot +# +# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be +# greater than the pivot + +@inline function selectpivot!(v::AbstractVector, lo::Int, hi::Int, o::Ordering) + @inbounds begin mi = (lo+hi)>>>1 + + # sort the values in v[lo], v[mi], v[hi] + if lt(o, v[mi], v[lo]) v[mi], v[lo] = v[lo], v[mi] end @@ -298,17 +249,43 @@ function sort!(v::AbstractVector, lo::Int, hi::Int, a::QuickSortAlg, o::Ordering v[hi], v[mi] = v[mi], v[hi] end end - v[mi], v[lo] = v[lo], v[mi] - i, j = lo, hi + + # move v[mi] to v[lo] and use it as the pivot + v[lo], v[mi] = v[mi], v[lo] pivot = v[lo] - while true - i += 1; j -= 1; - while lt(o, v[i], pivot); i += 1; end; - while lt(o, pivot, v[j]); j -= 1; end; - i >= j && break - v[i], v[j] = v[j], v[i] - end - v[j], v[lo] = v[lo], v[j] + end + + # return the pivot + return pivot +end + +# partition! +# +# select a pivot, and partition v according to the pivot + +function partition!(v::AbstractVector, lo::Int, hi::Int, o::Ordering) + pivot = selectpivot!(v, lo, hi, o) + # pivot == v[lo], v[hi] > pivot + i, j = lo, hi + @inbounds while true + i += 1; j -= 1 + while lt(o, v[i], pivot); i += 1; end; + while lt(o, pivot, v[j]); j -= 1; end; + i >= j && break + v[i], v[j] = v[j], v[i] + end + v[j], v[lo] = pivot, v[j] + + # v[j] == pivot + # v[k] >= pivot for k > j + # v[i] <= pivot for i < j + return j +end + +function sort!(v::AbstractVector, lo::Int, hi::Int, a::QuickSortAlg, o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) if j-lo < hi-j # recurse on the smaller chunk # this is necessary to preserve O(log(n)) @@ -361,38 +338,53 @@ function sort!(v::AbstractVector, lo::Int, hi::Int, a::MergeSortAlg, o::Ordering return v end -function sort!(v::AbstractVector, lo::Int, hi::Int, a::PartialQuickSort, +function sort!(v::AbstractVector, lo::Int, hi::Int, a::PartialQuickSort{Int}, o::Ordering) - k = a.k - while lo < hi + @inbounds while lo < hi hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - pivot = v[(lo+hi)>>>1] - i, j = lo, hi - while true - while lt(o, v[i], pivot); i += 1; end - while lt(o, pivot, v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 + j = partition!(v, lo, hi, o) + if j >= a.k + # we don't need to sort anything bigger than j + hi = j-1 + elseif j-lo < hi-j + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + (j+1) < hi && sort!(v, j+1, hi, a, o) + hi = j-1 end - if lo < j - if j - lo <= k - sort!(v, lo, j, QuickSort, o) + end + return v +end + + +function sort!{T<:OrdinalRange}(v::AbstractVector, lo::Int, hi::Int, a::PartialQuickSort{T}, + o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + + if j <= first(a.k) + lo = j+1 + elseif j >= last(a.k) + hi = j-1 + else + if j-lo < hi-j + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 else - sort!(v, lo, j, PartialQuickSort(k), o) + hi > (j+1) && sort!(v, j+1, hi, a, o) + hi = j-1 end end - jk = min(j, lo + k - 1) - if (i - lo + 1) <= k - k -= j - lo + 1 - lo = i - else - break - end end return v end + ## generic sorting methods ## defalg(v::AbstractArray) = DEFAULT_STABLE @@ -414,14 +406,8 @@ sort(v::AbstractVector; kws...) = sort!(copy(v); kws...) ## selectperm: the permutation to sort the first k elements of an array ## -function selectperm(v::AbstractVector, - k::Union(Int,OrdinalRange); - lt::Function=isless, - by::Function=identity, - rev::Bool=false, - order::Ordering=Base.Order.Forward) - select!(collect(1:length(v)), k, Perm(ord(lt, by, rev, order), v)) -end +selectperm(v::AbstractVector, k::Union(Integer,OrdinalRange); kwargs...) = + selectperm!(Vector{eltype(k)}(length(v)), v, k; kwargs..., initialized=false) function selectperm!{I<:Integer}(ix::AbstractVector{I}, v::AbstractVector, k::Union(Int, OrdinalRange); @@ -438,10 +424,6 @@ function selectperm!{I<:Integer}(ix::AbstractVector{I}, v::AbstractVector, # do partial quicksort sort!(ix, PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) - - # TODO: Not type stable. If k is an int, this will return an Int, of it is - # an OrdinalRange it will return a Vector{Int}. This, however, seems - # to be the same behavior as as `select` return ix[k] end @@ -576,7 +558,7 @@ function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering) end -fpsort!(v::AbstractVector, a::PartialQuickSort, o::Ordering) = +fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) = sort!(v, 1, length(v), a, o) sort!{T<:Floats}(v::AbstractVector{T}, a::Algorithm, o::DirectOrdering) = fpsort!(v,a,o) diff --git a/doc/stdlib/sort.rst b/doc/stdlib/sort.rst index cf550b0ed6f57..a0a9f538891c9 100644 --- a/doc/stdlib/sort.rst +++ b/doc/stdlib/sort.rst @@ -245,15 +245,20 @@ appeared in the array to be sorted. ``QuickSort`` is the default algorithm for numeric values, including integers and floats. ``PartialQuickSort(k)`` is similar to ``QuickSort``, but the output array -is only sorted up to index ``k``. For example:: +is only sorted up to index ``k`` if ``k`` is an integer, or in the range +of ``k`` if ``k`` is an ``OrdinalRange``. For example:: x = rand(1:500, 100) k = 50 + k2 = 50:100 s = sort(x; alg=QuickSort) ps = sort(x; alg=PartialQuickSort(k)) - map(issorted, (s, ps)) # => (true, false) - map(x->issorted(x[1:k]), (s, ps)) # => (true, true) - s[1:k] == ps[1:k] # => true + qs = sort(x; alg=PartialQuickSort(k2)) + map(issorted, (s, ps, qs)) # => (true, false, false) + map(x->issorted(x[1:k]), (s, ps, qs)) # => (true, true, false) + map(x->issorted(x[k2]), (s, ps, qs)) # => (true, false, true) + s[1:k] == ps[1:k] # => true + s[k2] == qs[k2] # => true ``MergeSort`` is an O(n log n) stable sorting algorithm but is not in-place – it requires a temporary array of half the size of the