From cc25f4295590cf74e350e307b8bea63c33007c49 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 3 Dec 2022 11:52:31 +0600 Subject: [PATCH] put back the old QuickSort, PartialQuickSort, and MergeSort algorithms... ...as they were in 1.8 and rename the new PartialQuickSort to QuickerSort Also improve the documentation and API for constructing QuickerSort and test the API --- base/sort.jl | 241 ++++++++++++++++++++++++++++++++++++------------ test/sorting.jl | 43 ++++++--- 2 files changed, 212 insertions(+), 72 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 932da36b9e1d6..5ee591f965e3a 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - _sort!(v, _PartialQuickSort(k), o, (;)) + _sort!(v, QuickerSort(k), o, (;)) maybeview(v, k) end @@ -931,49 +931,40 @@ end """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm + QuickerSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm + QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm -Indicate that a sorting function should use the partial quick sort algorithm. +Use the `QuickerSort` algorithm with the `next` algorithm as a base case. -Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using -[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks +`QuickerSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow +for the possibility of maintaining stability. + +If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering +but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it +is treated as the first or last index of the input, respectively. + +`lo` and `hi` may be specified together as an `AbstractUnitRange`. Characteristics: * *stable*: preserves the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref). + * *linear runtime* if `length(lo:hi)` is constant + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm +struct QuickerSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm lo::L hi::H next::T end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) -_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) -_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) - -""" - QuickSort - -Indicate that a sorting function should use the quick sort algorithm. +QuickerSort(next::Algorithm=SMALL_ALGORITHM) = QuickerSort(missing, missing, next) +QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = QuickerSort(lo, hi, SMALL_ALGORITHM) +QuickerSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(lo, lo, next) +QuickerSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(first(r), last(r), next) -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. - -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) -""" -const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) - -# select a pivot for QuickSort +# select a pivot for QuickerSort # # This method is redefined to rand(lo:hi) in Random.jl # We can't use rand here because it is not available in Core.Compiler and @@ -1013,7 +1004,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer pivot, lo-offset end -function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; +function _sort!(v::AbstractVector, a::QuickerSort, o::Ordering, kw; t=nothing, offset=nothing, swap=false, rev=false) @getkw lo hi scratch @@ -1029,7 +1020,7 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; @inbounds v[j] = pivot swap = !swap - # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped + # For QuickerSort(), a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) @@ -1225,7 +1216,7 @@ the initial optimizations because they can change the input vector's type and or make them `UIntMappable`. If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch -to [`QuickSort`](@ref). +to [`QuickerSort`](@ref). Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then perform a presorted check ([`CheckSorted`](@ref)). @@ -1257,7 +1248,7 @@ Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that r stage. Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and -otherwise we dispatch to [`QuickSort`](@ref). +otherwise we dispatch to [`QuickerSort`](@ref). """ const DEFAULT_STABLE = InitialOptimizations( IsUIntMappable( @@ -1267,9 +1258,9 @@ const DEFAULT_STABLE = InitialOptimizations( ConsiderCountingSort( ConsiderRadixSort( Small{80}( - QuickSort)))))), + QuickerSort())))))), StableCheckSorted( - QuickSort))) + QuickerSort()))) """ DEFAULT_UNSTABLE @@ -1483,7 +1474,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) + _sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1863,18 +1854,53 @@ end ### Unused constructs for backward compatibility ### -struct MergeSortAlg{T <: Algorithm} <: Algorithm - next::T +## Old algorithms ## + +struct QuickSortAlg <: Algorithm end +struct MergeSortAlg <: Algorithm end + +""" + PartialQuickSort{T <: Union{Integer,OrdinalRange}} + +Indicate that a sorting function should use the partial quick sort +algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest +to largest, finding them and sorting them using [`QuickSort`](@ref). + +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +""" +struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm + k::T end """ - MergeSort + QuickSort -Indicate that a sorting function should use the merge sort algorithm. +Indicate that a sorting function should use the quick sort +algorithm, which is *not* stable. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for large collections. +""" +const QuickSort = QuickSortAlg() + +""" + MergeSort + +Indicate that a sorting function should use the merge sort +algorithm. Merge sort divides the collection into +subcollections and repeatedly merges them, sorting each +subcollection at each step, until the entire +collection has been recombined in sorted form. Characteristics: * *stable*: preserves the ordering of elements which compare @@ -1883,21 +1909,94 @@ Characteristics: * *not in-place* in memory. * *divide-and-conquer* sort strategy. """ -const MergeSort = MergeSortAlg(SMALL_ALGORITHM) +const MergeSort = MergeSortAlg() -function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) - @getkw lo hi scratch +# selectpivot! +# +# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and +# choose the middle value as a pivot +# +# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be +# greater than the pivot + +@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + @inbounds begin + mi = midpoint(lo, hi) + + # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place + if lt(o, v[lo], v[mi]) + v[mi], v[lo] = v[lo], v[mi] + end + + if lt(o, v[hi], v[lo]) + if lt(o, v[hi], v[mi]) + v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi] + else + v[hi], v[lo] = v[lo], v[hi] + end + end + + # return the pivot + return v[lo] + end +end + +# partition! +# +# select a pivot, and partition v according to the pivot + +function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + pivot = selectpivot!(v, lo, hi, o) + # pivot == v[lo], v[hi] > pivot + i, j = lo, hi + @inbounds while true + i += 1; j -= 1 + while lt(o, v[i], pivot); i += 1; end; + while lt(o, pivot, v[j]); j -= 1; end; + i >= j && break + v[i], v[j] = v[j], v[i] + end + v[j], v[lo] = pivot, v[j] + + # v[j] == pivot + # v[k] >= pivot for k > j + # v[i] <= pivot for i < j + return j +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + if j-lo < hi-j + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + j+1 < hi && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + return v +end + +sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T = + invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation +function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, + t0::Union{AbstractVector{T}, Nothing}=nothing) where T @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) m = midpoint(lo, hi) - if t === nothing - scratch, t = make_scratch(scratch, eltype(v), m-lo+1) - end + t = t0 === nothing ? similar(v, m-lo+1) : t0 + length(t) < m-lo+1 && resize!(t, m-lo+1) + Base.require_one_based_indexing(t) - _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) - _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) + sort!(v, lo, m, a, o, t) + sort!(v, m+1, hi, a, o, t) i, j = 1, lo while j <= m @@ -1924,9 +2023,37 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, end end - scratch + return v +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, + o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + + if j <= first(a.k) + lo = j+1 + elseif j >= last(a.k) + hi = j-1 + else + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + if j-lo < hi-j + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + hi > (j+1) && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + end + return v end +## Old extensibility mechanisms ## + # Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) @@ -1952,8 +2079,4 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) end end -# Keep old internal types so that people can keep dispatching with -# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... -const QuickSortAlg = typeof(QuickSort) - end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 37bad7d23c94b..14762e40018f4 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,9 +79,8 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), - PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] + for Alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, + Base.Sort.QuickerSort(missing, 1729), Base.Sort.QuickerSort(1729, missing)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -334,7 +333,7 @@ end @test c == v # stable algorithms - for alg in [MergeSort, QuickSort, PartialQuickSort(1:n), Base.DEFAULT_STABLE] + for alg in [MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1:n), Base.DEFAULT_STABLE] p = sortperm(v, alg=alg, rev=rev) p2 = sortperm(float(v), alg=alg, rev=rev) @test p == p2 @@ -382,7 +381,7 @@ end end v = randn_with_nans(n,0.1) - for alg in [InsertionSort, MergeSort, QuickSort, PartialQuickSort(n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], + for alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1, n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], rev in [false,true] alg === InsertionSort && n >= 3000 && continue # test float sorting with NaNs @@ -589,7 +588,7 @@ end @testset "fallback" begin @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback - @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort) + @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=Base.Sort.QuickerSort()) end @test adaptive_sort_test(rand(Int, 20)) # InsertionSort @@ -691,15 +690,16 @@ end @testset "invalid lt (#11429)" begin # lt must be a total linear order (e.g. < not <=) so this usage is # not allowed. Consequently, none of the behavior tested in this - # testset is gaurunteed to work in future minor versions of Julia. + # testset is guaranteed to work in future minor versions of Julia. + + safe_algs = [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] n = 1000 v = rand(1:5, n); s = sort(v); # Nevertheless, it still works... - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -709,16 +709,14 @@ end # where i < j if and only if lt(o, v[j], v[i]). This invariant holds even for # this invalid lt order. perm = reverse(sortperm(v, rev=true)) - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415] # lt can be very poorly behaved and sort will still permute its input in some way. - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 @@ -899,6 +897,25 @@ end @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000))) end +@testset "QuickerSort API" begin + bsqs = Base.Sort.QuickerSort + @test bsqs(1, 2, MergeSort) === bsqs(1, 2, MergeSort) + @test bsqs(missing, 2, MergeSort) === bsqs(missing, 2, MergeSort) + @test bsqs(1, missing, MergeSort) === bsqs(1, missing, MergeSort) + @test bsqs(missing, missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(1, MergeSort) === bsqs(1, 1, MergeSort) + @test bsqs(missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(MergeSort) === bsqs(missing, missing, MergeSort) + + @test bsqs(1, 2) === bsqs(1, 2, InsertionSort) + @test bsqs(missing, 2) === bsqs(missing, 2, InsertionSort) + @test bsqs(1, missing) === bsqs(1, missing, InsertionSort) + @test bsqs(missing, missing) === bsqs(missing, missing, InsertionSort) + @test bsqs(1) === bsqs(1, 1, InsertionSort) + @test bsqs(missing) === bsqs(missing, missing, InsertionSort) + @test bsqs() === bsqs(missing, missing, InsertionSort) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128,