From e97096ac6ef4017de017d757881f05d9acc25c66 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Sat, 24 Jul 2021 18:12:05 +0900 Subject: [PATCH] Add stable sort implementation to `Slice` and `Array` (#10163) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Julien Reichardt Co-authored-by: Johannes Müller --- spec/std/array_spec.cr | 172 +++++++++--- spec/std/slice_spec.cr | 207 ++++++++++----- src/array.cr | 24 +- src/compiler/crystal/tools/doc/type.cr | 12 +- src/slice.cr | 32 ++- src/slice/sort.cr | 353 +++++++++++++++++++++++++ 6 files changed, 669 insertions(+), 131 deletions(-) diff --git a/spec/std/array_spec.cr b/spec/std/array_spec.cr index 38f5b5b78dcb..5d28004fcfae 100644 --- a/spec/std/array_spec.cr +++ b/spec/std/array_spec.cr @@ -3,6 +3,40 @@ require "spec/helpers/iterate" private alias RecursiveArray = Array(RecursiveArray) +private class Spaceship + getter value : Float64 + + def initialize(@value : Float64, @return_nil = false) + end + + def <=>(other : Spaceship) + return nil if @return_nil + + value <=> other.value + end +end + +private def is_stable_sort(*, mutable, &block) + n = 42 + # [Spaceship.new(0), ..., Spaceship.new(n - 1), Spaceship.new(0), ..., Spaceship.new(n - 1)] + arr = Array.new(n * 2) { |i| Spaceship.new((i % n).to_f) } + # [Spaceship.new(0), Spaceship.new(0), ..., Spaceship.new(n - 1), Spaceship.new(n - 1)] + expected = Array.new(n * 2) { |i| arr[i % 2 * n + i // 2] } + + if mutable + yield arr + result = arr + else + result = yield arr + result.should_not eq(arr) + end + + result.size.should eq(expected.size) + expected.zip(result) do |exp, res| + res.should be(exp) # reference-equality is necessary to check sorting is stable. + end +end + describe "Array" do describe "new" do it "creates with default value" do @@ -1237,63 +1271,127 @@ describe "Array" do end describe "sort" do - it "sort without block" do - a = [3, 4, 1, 2, 5, 6] - b = a.sort - b.should eq([1, 2, 3, 4, 5, 6]) - a.should_not eq(b) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sort without block" do + a = [3, 4, 1, 2, 5, 6] + b = a.sort(stable: stable) + b.should eq([1, 2, 3, 4, 5, 6]) + a.should_not eq(b) + end + + it "sort with a block" do + a = ["foo", "a", "hello"] + b = a.sort(stable: stable) { |x, y| x.size <=> y.size } + b.should eq(["a", "foo", "hello"]) + a.should_not eq(b) + end + end + end + + it "stable sort without block" do + is_stable_sort(mutable: false, &.sort(stable: true)) + end + + it "stable sort with a block" do + is_stable_sort(mutable: false, &.sort(stable: true) { |a, b| a.value <=> b.value }) + end + + it "default is stable (without block)" do + is_stable_sort(mutable: false, &.sort) end - it "sort with a block" do - a = ["foo", "a", "hello"] - b = a.sort { |x, y| x.size <=> y.size } - b.should eq(["a", "foo", "hello"]) - a.should_not eq(b) + it "default is stable (with a block)" do + is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) end end describe "sort!" do - it "sort! without block" do - a = [3, 4, 1, 2, 5, 6] - a.sort! - a.should eq([1, 2, 3, 4, 5, 6]) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sort! without block" do + a = [3, 4, 1, 2, 5, 6] + a.sort!(stable: stable) + a.should eq([1, 2, 3, 4, 5, 6]) + end + + it "sort! with a block" do + a = ["foo", "a", "hello"] + a.sort!(stable: stable) { |x, y| x.size <=> y.size } + a.should eq(["a", "foo", "hello"]) + end + end + end + + it "stable sort! without block" do + is_stable_sort(mutable: true, &.sort!(stable: true)) + end + + it "stable sort! with a block" do + is_stable_sort(mutable: true, &.sort!(stable: true) { |a, b| a.value <=> b.value }) end - it "sort! with a block" do - a = ["foo", "a", "hello"] - a.sort! { |x, y| x.size <=> y.size } - a.should eq(["a", "foo", "hello"]) + it "default is stable (without block)" do + is_stable_sort(mutable: true, &.sort!) + end + + it "default is stable (with a block)" do + is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) end end describe "sort_by" do - it "sorts by" do - a = ["foo", "a", "hello"] - b = a.sort_by &.size - b.should eq(["a", "foo", "hello"]) - a.should_not eq(b) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sorts by" do + a = ["foo", "a", "hello"] + b = a.sort_by(stable: stable, &.size) + b.should eq(["a", "foo", "hello"]) + a.should_not eq(b) + end + + it "unpacks tuple" do + a = [{"d", 4}, {"a", 1}, {"c", 3}, {"e", 5}, {"b", 2}] + b = a.sort_by(stable: stable) { |x, y| y } + b.should eq([{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}]) + a.should_not eq(b) + end + end end - it "unpacks tuple" do - a = [{"d", 4}, {"a", 1}, {"c", 3}, {"e", 5}, {"b", 2}] - b = a.sort_by { |x, y| y } - b.should eq([{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}]) - a.should_not eq(b) + it "stable sort by" do + is_stable_sort(mutable: false, &.sort_by(stable: true, &.value)) + end + + it "default is stable" do + is_stable_sort(mutable: false, &.sort_by(&.value)) end end describe "sort_by!" do - it "sorts by!" do - a = ["foo", "a", "hello"] - a.sort_by! &.size - a.should eq(["a", "foo", "hello"]) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sorts by!" do + a = ["foo", "a", "hello"] + a.sort_by!(stable: stable, &.size) + a.should eq(["a", "foo", "hello"]) + end + + it "calls given block exactly once for each element" do + calls = Hash(String, Int32).new(0) + a = ["foo", "a", "hello"] + a.sort_by!(stable: stable) { |e| calls[e] += 1; e.size } + calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) + end + end + end + + it "stable sort by!" do + is_stable_sort(mutable: true, &.sort_by!(stable: true, &.value)) end - it "calls given block exactly once for each element" do - calls = Hash(String, Int32).new(0) - a = ["foo", "a", "hello"] - a.sort_by! { |e| calls[e] += 1; e.size } - calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) + it "default is stable" do + is_stable_sort(mutable: true, &.sort_by!(&.value)) end end diff --git a/spec/std/slice_spec.cr b/spec/std/slice_spec.cr index 4db8d9c2ba1f..86b2a0dcd837 100644 --- a/spec/std/slice_spec.cr +++ b/spec/std/slice_spec.cr @@ -22,6 +22,27 @@ private class Spaceship end end +private def is_stable_sort(mutable, &block) + n = 42 + # [Spaceship.new(0), ..., Spaceship.new(n - 1), Spaceship.new(0), ..., Spaceship.new(n - 1)] + slice = Slice.new(n * 2) { |i| Spaceship.new((i % n).to_f) } + # [Spaceship.new(0), Spaceship.new(0), ..., Spaceship.new(n - 1), Spaceship.new(n - 1)] + expected = Slice.new(n * 2) { |i| slice[i % 2 * n + i // 2] } + + if mutable + yield slice + result = slice + else + result = yield slice + result.should_not eq(slice) + end + + result.size.should eq(expected.size) + expected.zip(result) do |exp, res| + res.should be(exp) # reference-equality is necessary to check sorting is stable. + end +end + describe "Slice" do it "gets pointer and size" do pointer = Pointer.malloc(1, 0) @@ -559,93 +580,157 @@ describe "Slice" do end describe "sort" do - it "sort without block" do - slice = Slice[3, 4, 1, 2, 5, 6] - sorted_slice = slice.sort - sorted_slice.to_a.should eq([1, 2, 3, 4, 5, 6]) - slice.should_not eq(sorted_slice) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sort without block" do + slice = Slice[3, 4, 1, 2, 5, 6] + sorted_slice = slice.sort(stable: stable) + sorted_slice.to_a.should eq([1, 2, 3, 4, 5, 6]) + slice.should_not eq(sorted_slice) + end + + it "sort with a block" do + a = Slice["foo", "a", "hello"] + b = a.sort(stable: stable) { |x, y| x.size <=> y.size } + b.to_a.should eq(["a", "foo", "hello"]) + a.should_not eq(b) + end + end end - it "sort with a block" do - a = Slice["foo", "a", "hello"] - b = a.sort { |x, y| x.size <=> y.size } - b.to_a.should eq(["a", "foo", "hello"]) - a.should_not eq(b) + it "stable sort without block" do + is_stable_sort(mutable: false, &.sort(stable: true)) end - end - describe "sort!" do - it "sort! without block" do - a = [3, 4, 1, 2, 5, 6] - a.sort! - a.should eq([1, 2, 3, 4, 5, 6]) + it "stable sort with a block" do + is_stable_sort(mutable: false, &.sort(stable: true) { |a, b| a.value <=> b.value }) end - it "sort! with a block" do - a = ["foo", "a", "hello"] - a.sort! { |x, y| x.size <=> y.size } - a.should eq(["a", "foo", "hello"]) + it "default is stable (without block)" do + is_stable_sort(mutable: false, &.sort) end - it "sorts with invalid block (#4379)" do - a = [1] * 17 - b = a.sort { -1 } - a.should eq(b) + it "default is stable (with a block)" do + is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) end + end - it "can sort! just by using <=> (#6608)" do - spaceships = Slice[ - Spaceship.new(2), - Spaceship.new(0), - Spaceship.new(1), - Spaceship.new(3), - ] - - spaceships.sort! - 4.times do |i| - spaceships[i].value.should eq(i) + describe "sort!" do + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sort! without block" do + a = [3, 4, 1, 2, 5, 6] + a.sort!(stable: stable) + a.should eq([1, 2, 3, 4, 5, 6]) + end + + it "sort! with a block" do + a = ["foo", "a", "hello"] + a.sort!(stable: stable) { |x, y| x.size <=> y.size } + a.should eq(["a", "foo", "hello"]) + end + + it "sorts with invalid block (#4379)" do + a = [1] * 17 + b = a.sort(stable: stable) { -1 } + a.should eq(b) + end + + it "can sort! just by using <=> (#6608)" do + spaceships = Slice[ + Spaceship.new(2), + Spaceship.new(0), + Spaceship.new(1), + Spaceship.new(3), + ] + + spaceships.sort!(stable: stable) + 4.times do |i| + spaceships[i].value.should eq(i) + end + end + + it "raises if <=> returns nil" do + spaceships = Slice[ + Spaceship.new(2, return_nil: true), + Spaceship.new(0, return_nil: true), + ] + + expect_raises(ArgumentError) do + spaceships.sort!(stable: stable) + end + end + + it "raises if sort! block returns nil" do + expect_raises(ArgumentError) do + Slice[1, 2].sort!(stable: stable) { nil } + end + end end end - it "raises if <=> returns nil" do - spaceships = Slice[ - Spaceship.new(2, return_nil: true), - Spaceship.new(0, return_nil: true), - ] + it "stable sort! without block" do + is_stable_sort(mutable: true, &.sort!(stable: true)) + end - expect_raises(ArgumentError) do - spaceships.sort! - end + it "stable sort! with a block" do + is_stable_sort(mutable: true, &.sort!(stable: true) { |a, b| a.value <=> b.value }) end - it "raises if sort! block returns nil" do - expect_raises(ArgumentError) do - Slice[1, 2].sort! { nil } - end + it "default is stable (without block)" do + is_stable_sort(mutable: true, &.sort!) + end + + it "default is stable (with a block)" do + is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) end end describe "sort_by" do - it "sorts by" do - a = Slice["foo", "a", "hello"] - b = a.sort_by &.size - b.to_a.should eq(["a", "foo", "hello"]) - a.should_not eq(b) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sorts by" do + a = Slice["foo", "a", "hello"] + b = a.sort_by(stable: stable, &.size) + b.to_a.should eq(["a", "foo", "hello"]) + a.should_not eq(b) + end + end + end + + it "stable sort by" do + is_stable_sort(mutable: false, &.sort_by(stable: true, &.value)) + end + + it "default is stable" do + is_stable_sort(mutable: false, &.sort_by(&.value)) end end describe "sort_by!" do - it "sorts by!" do - a = Slice["foo", "a", "hello"] - a.sort_by! &.size - a.to_a.should eq(["a", "foo", "hello"]) + [true, false].each do |stable| + describe "stable: #{stable}" do + it "sorts by!" do + a = Slice["foo", "a", "hello"] + a.sort_by!(stable: stable, &.size) + a.to_a.should eq(["a", "foo", "hello"]) + end + + it "calls given block exactly once for each element" do + calls = Hash(String, Int32).new(0) + a = Slice["foo", "a", "hello"] + a.sort_by!(stable: stable) { |e| calls[e] += 1; e.size } + calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) + end + end + end + + it "stable sort by!" do + is_stable_sort(mutable: true, &.sort_by!(stable: true, &.value)) end - it "calls given block exactly once for each element" do - calls = Hash(String, Int32).new(0) - a = Slice["foo", "a", "hello"] - a.sort_by! { |e| calls[e] += 1; e.size } - calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) + it "default is stable" do + is_stable_sort(mutable: true, &.sort_by!(&.value)) end end diff --git a/src/array.cr b/src/array.cr index 617c10a46d58..066077c5ecff 100644 --- a/src/array.cr +++ b/src/array.cr @@ -1704,8 +1704,8 @@ class Array(T) # a.sort # => [1, 2, 3] # a # => [3, 1, 2] # ``` - def sort : Array(T) - dup.sort! + def sort(*, stable : Bool = true) : Array(T) + dup.sort!(stable: stable) end # Returns a new array with all elements sorted based on the comparator in the @@ -1722,12 +1722,12 @@ class Array(T) # b # => [3, 2, 1] # a # => [3, 1, 2] # ``` - def sort(&block : T, T -> U) : Array(T) forall U + def sort(*, stable : Bool = true, &block : T, T -> U) : Array(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - dup.sort! &block + dup.sort!(stable: stable, &block) end # Modifies `self` by sorting all elements based on the return value of their @@ -1738,8 +1738,8 @@ class Array(T) # a.sort! # a # => [1, 2, 3] # ``` - def sort! : Array(T) - Slice.new(to_unsafe, size).sort! + def sort!(*, stable : Bool = true) : Array(T) + Slice.new(to_unsafe, size).sort!(stable: stable) self end @@ -1756,12 +1756,12 @@ class Array(T) # a.sort! { |a, b| b <=> a } # a # => [3, 2, 1] # ``` - def sort!(&block : T, T -> U) : Array(T) forall U + def sort!(*, stable : Bool = true, &block : T, T -> U) : Array(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - Slice.new(to_unsafe, size).sort!(&block) + Slice.new(to_unsafe, size).sort!(stable: stable, &block) self end @@ -1775,8 +1775,8 @@ class Array(T) # b # => ["fig", "pear", "apple"] # a # => ["apple", "pear", "fig"] # ``` - def sort_by(&block : T -> _) : Array(T) - dup.sort_by! { |e| yield(e) } + def sort_by(*, stable : Bool = true, &block : T -> _) : Array(T) + dup.sort_by!(stable: stable) { |e| yield(e) } end # Modifies `self` by sorting all elements. The given block is called for @@ -1788,8 +1788,8 @@ class Array(T) # a.sort_by! { |word| word.size } # a # => ["fig", "pear", "apple"] # ``` - def sort_by!(&block : T -> _) : Array(T) - sorted = map { |e| {e, yield(e)} }.sort! { |x, y| x[1] <=> y[1] } + def sort_by!(*, stable : Bool = true, &block : T -> _) : Array(T) + sorted = map { |e| {e, yield(e)} }.sort!(stable: stable) { |x, y| x[1] <=> y[1] } @size.times do |i| @buffer[i] = sorted.to_unsafe[i][0] end diff --git a/src/compiler/crystal/tools/doc/type.cr b/src/compiler/crystal/tools/doc/type.cr index 5b866670f390..faa09f51bf50 100644 --- a/src/compiler/crystal/tools/doc/type.cr +++ b/src/compiler/crystal/tools/doc/type.cr @@ -176,7 +176,7 @@ class Crystal::Doc::Type defs << method(def_with_metadata.def, false) end end - stable_sort! defs, &.name.downcase + defs.sort_by!(stable: true, &.name.downcase) end end end @@ -201,7 +201,7 @@ class Crystal::Doc::Type end end end - stable_sort! class_methods, &.name.downcase + class_methods.sort_by!(stable: true, &.name.downcase) end end @@ -225,7 +225,7 @@ class Crystal::Doc::Type end end end - stable_sort! macros, &.name.downcase + macros.sort_by!(stable: true, &.name.downcase) end end @@ -844,10 +844,4 @@ class Crystal::Doc::Type def annotations(annotation_type) @type.annotations(annotation_type) end - - private def stable_sort!(list) - # TODO: use #10163 instead - i = 0 - list.sort_by! { |elem| {yield(elem), i += 1} } - end end diff --git a/src/slice.cr b/src/slice.cr index e2ab2a010c02..e33c1e40b7ae 100644 --- a/src/slice.cr +++ b/src/slice.cr @@ -690,8 +690,8 @@ struct Slice(T) # a.sort # => Slice[1, 2, 3] # a # => Slice[3, 1, 2] # ``` - def sort : Slice(T) - dup.sort! + def sort(*, stable : Bool = true) : Slice(T) + dup.sort!(stable: stable) end # Returns a new slice with all elements sorted based on the comparator in the @@ -708,12 +708,12 @@ struct Slice(T) # b # => Slice[3, 2, 1] # a # => Slice[3, 1, 2] # ``` - def sort(&block : T, T -> U) : Slice(T) forall U + def sort(*, stable : Bool = true, &block : T, T -> U) : Slice(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - dup.sort! &block + dup.sort!(stable: stable, &block) end # Modifies `self` by sorting all elements based on the return value of their @@ -724,8 +724,12 @@ struct Slice(T) # a.sort! # a # => Slice[1, 2, 3] # ``` - def sort! : Slice(T) - Slice.intro_sort!(to_unsafe, size) + def sort!(*, stable : Bool = true) : Slice(T) + if stable + Slice.merge_sort!(self) + else + Slice.intro_sort!(to_unsafe, size) + end self end @@ -742,12 +746,16 @@ struct Slice(T) # a.sort! { |a, b| b <=> a } # a # => Slice[3, 2, 1] # ``` - def sort!(&block : T, T -> U) : Slice(T) forall U + def sort!(*, stable : Bool = true, &block : T, T -> U) : Slice(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - Slice.intro_sort!(to_unsafe, size, block) + if stable + Slice.merge_sort!(self, block) + else + Slice.intro_sort!(to_unsafe, size, block) + end self end @@ -761,8 +769,8 @@ struct Slice(T) # b # => Slice["fig", "pear", "apple"] # a # => Slice["apple", "pear", "fig"] # ``` - def sort_by(&block : T -> _) : Slice(T) - dup.sort_by! { |e| yield(e) } + def sort_by(*, stable : Bool = true, &block : T -> _) : Slice(T) + dup.sort_by!(stable: stable) { |e| yield(e) } end # Modifies `self` by sorting all elements. The given block is called for @@ -774,8 +782,8 @@ struct Slice(T) # a.sort_by! { |word| word.size } # a # => Slice["fig", "pear", "apple"] # ``` - def sort_by!(&block : T -> _) : Slice(T) - sorted = map { |e| {e, yield(e)} }.sort! { |x, y| x[1] <=> y[1] } + def sort_by!(*, stable : Bool = true, &block : T -> _) : Slice(T) + sorted = map { |e| {e, yield(e)} }.sort!(stable: stable) { |x, y| x[1] <=> y[1] } size.times do |i| to_unsafe[i] = sorted.to_unsafe[i][0] end diff --git a/src/slice/sort.cr b/src/slice/sort.cr index 6c9d9e249994..a25057af94c7 100644 --- a/src/slice/sort.cr +++ b/src/slice/sort.cr @@ -206,4 +206,357 @@ struct Slice(T) raise ArgumentError.new("Comparison of #{v1} and #{v2} failed") if v.nil? v end + + # The stable sort implementation is ported from Rust. + # https://github.com/rust-lang/rust/blob/507bff92fadf1f25a830da5065a5a87113345163/library/alloc/src/slice.rs + # + # Rust License (MIT): + # + # Permission is hereby granted, free of charge, to any + # person obtaining a copy of this software and associated + # documentation files (the "Software"), to deal in the + # Software without restriction, including without + # limitation the rights to use, copy, modify, merge, + # publish, distribute, sublicense, and/or sell copies of + # the Software, and to permit persons to whom the Software + # is furnished to do so, subject to the following + # conditions: + # + # The above copyright notice and this permission notice + # shall be included in all copies or substantial portions + # of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + # ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + # TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + # SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + # IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + # DEALINGS IN THE SOFTWARE. + + # Slices of up to this length get sorted using insertion sort. + private MAX_INSERTION = 10 + + # Very short runs are extended using insertion sort to span at least this many elements. + private MIN_RUN = 10 + + # This merge sort borrows some (but not all) ideas from TimSort, which is described in detail + # [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt). + # + # The algorithm identifies strictly descending and non-descending subsequences, which are called + # natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed + # onto the stack, and then some pairs of adjacent runs are merged until these two invariants are + # satisfied: + # + # 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` + # 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` + # + # The invariants ensure that the total running time is `O(n * log(n))` worst-case. + protected def self.merge_sort!(v : Slice(T)) forall T + size = v.size + + # Short arrays get sorted in-place via insertion sort to avoid allocations. + if size <= MAX_INSERTION + if size >= 2 + (size - 1).downto(0) { |i| insert_head!(v[i..]) } + end + return + end + + # Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + # shallow copies of the contents of `v` without risking the dtors running on copies if + # `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run, + # which will always have length at most `len / 2`. + buf = Pointer(T).malloc(size // 2) + + # In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + # strange decision, but consider the fact that merges more often go in the opposite direction + # (forwards). According to benchmarks, merging forwards is slightly faster than merging + # backwards. To conclude, identifying runs by traversing backwards improves performance. + runs = [] of Range(Int32, Int32) + last = size + while last > 0 + # Find the next natural run, and reverse it if it's strictly descending. + start = last - 1 + if start > 0 + start -= 1 + if cmp(v[start + 1], v[start]) < 0 + while start > 0 && cmp(v[start], v[start - 1]) < 0 + start -= 1 + end + v[start...last].reverse! + else + while start > 0 && cmp(v[start], v[start - 1]) > 0 + start -= 1 + end + end + end + + # Insert some more elements into the run if it's too short. Insertion sort is faster than + # merge sort on short sequences, so this significantly improves performance. + while start > 0 && last - start < MIN_RUN + start -= 1 + insert_head!(v[start...last]) + end + + # Push this run onto the stack. + runs.push(start...last) + last = start + + # Merge some pairs of adjacent runs to satisfy the invariants. + while r = collapse(runs) + left = runs[r + 1] + right = runs[r] + merge!(v[left.begin...right.end], left.size, buf) + runs[r] = left.begin...right.end + runs.delete_at(r + 1) + end + end + end + + # Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. + # + # This is the integral subroutine of insertion sort. + protected def self.insert_head!(v) + if v.size >= 2 && cmp(v[1], v[0]) < 0 + x, v[0] = v[0], v[1] + (2...v.size).each do |i| + if cmp(v[i], x) < 0 + v[i - 1] = v[i] + else + v[i - 1] = x + return + end + end + v[v.size - 1] = x + end + end + + # Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and + # stores the result into `v[..]`. + protected def self.merge!(v, mid, buf) + size = v.size + + if mid <= size - mid + # The left run is shorter. + buf.copy_from(v.to_unsafe, mid) + + left = 0 + right = mid + out = v.to_unsafe + + while left < mid && right < size + # Consume the lesser side. + # If equal, prefer the left run to maintain stability. + if cmp(v[right], buf[left]) < 0 + out.value = v[right] + out += 1 + right += 1 + else + out.value = buf[left] + out += 1 + left += 1 + end + end + + out.copy_from(buf + left, mid - left) + else + # The right run is shorter. + buf.copy_from((v + mid).to_unsafe, size - mid) + + left = mid + right = size - mid + out = v.to_unsafe + size + + while left > 0 && right > 0 + # Consume the greater side. + # If equal, prefer the right run to maintain stability. + if cmp(buf[right - 1], v[left - 1]) < 0 + left -= 1 + out -= 1 + out.value = v[left] + else + right -= 1 + out -= 1 + out.value = buf[right] + end + end + + (v + left).copy_from(buf, right) + end + end + + # This merge sort borrows some (but not all) ideas from TimSort, which is described in detail + # [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt). + # + # The algorithm identifies strictly descending and non-descending subsequences, which are called + # natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed + # onto the stack, and then some pairs of adjacent runs are merged until these two invariants are + # satisfied: + # + # 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` + # 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` + # + # The invariants ensure that the total running time is `O(n * log(n))` worst-case. + protected def self.merge_sort!(v : Slice(T), comp) forall T + size = v.size + + # Short arrays get sorted in-place via insertion sort to avoid allocations. + if size <= MAX_INSERTION + if size >= 2 + (size - 1).downto(0) { |i| insert_head!(v[i..], comp) } + end + return + end + + # Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + # shallow copies of the contents of `v` without risking the dtors running on copies if + # `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run, + # which will always have length at most `len / 2`. + buf = Pointer(T).malloc(size // 2) + + # In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + # strange decision, but consider the fact that merges more often go in the opposite direction + # (forwards). According to benchmarks, merging forwards is slightly faster than merging + # backwards. To conclude, identifying runs by traversing backwards improves performance. + runs = [] of Range(Int32, Int32) + last = size + while last > 0 + # Find the next natural run, and reverse it if it's strictly descending. + start = last - 1 + if start > 0 + start -= 1 + if cmp(v[start + 1], v[start], comp) < 0 + while start > 0 && cmp(v[start], v[start - 1], comp) < 0 + start -= 1 + end + v[start...last].reverse! + else + while start > 0 && cmp(v[start], v[start - 1], comp) > 0 + start -= 1 + end + end + end + + # Insert some more elements into the run if it's too short. Insertion sort is faster than + # merge sort on short sequences, so this significantly improves performance. + while start > 0 && last - start < MIN_RUN + start -= 1 + insert_head!(v[start...last], comp) + end + + # Push this run onto the stack. + runs.push(start...last) + last = start + + # Merge some pairs of adjacent runs to satisfy the invariants. + while r = collapse(runs) + left = runs[r + 1] + right = runs[r] + merge!(v[left.begin...right.end], left.size, buf, comp) + runs[r] = left.begin...right.end + runs.delete_at(r + 1) + end + end + end + + # Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. + # + # This is the integral subroutine of insertion sort. + protected def self.insert_head!(v, comp) + if v.size >= 2 && cmp(v[1], v[0], comp) < 0 + x, v[0] = v[0], v[1] + (2...v.size).each do |i| + if cmp(v[i], x, comp) < 0 + v[i - 1] = v[i] + else + v[i - 1] = x + return + end + end + v[v.size - 1] = x + end + end + + # Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and + # stores the result into `v[..]`. + protected def self.merge!(v, mid, buf, comp) + size = v.size + + if mid <= size - mid + # The left run is shorter. + buf.copy_from(v.to_unsafe, mid) + + left = 0 + right = mid + out = v.to_unsafe + + while left < mid && right < size + # Consume the lesser side. + # If equal, prefer the left run to maintain stability. + if cmp(v[right], buf[left], comp) < 0 + out.value = v[right] + out += 1 + right += 1 + else + out.value = buf[left] + out += 1 + left += 1 + end + end + + out.copy_from(buf + left, mid - left) + else + # The right run is shorter. + buf.copy_from((v + mid).to_unsafe, size - mid) + + left = mid + right = size - mid + out = v.to_unsafe + size + + while left > 0 && right > 0 + # Consume the greater side. + # If equal, prefer the right run to maintain stability. + if cmp(buf[right - 1], v[left - 1], comp) < 0 + left -= 1 + out -= 1 + out.value = v[left] + else + right -= 1 + out -= 1 + out.value = buf[right] + end + end + + (v + left).copy_from(buf, right) + end + end + + # Examines the stack of runs and identifies the next pair of runs to merge. More specifically, + # if `r` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the + # algorithm should continue building a new run instead, `nil` is returned. + # + # TimSort is infamous for its buggy implementations, as described here: + # http://envisage-project.eu/timsort-specification-and-verification/ + # + # The gist of the story is: we must enforce the invariants on the top four runs on the stack. + # Enforcing them on just top three is not sufficient to ensure that the invariants will still + # hold for *all* runs in the stack. + # + # This function correctly checks invariants for the top four runs. Additionally, if the top + # run starts at index 0, it will always demand a merge operation until the stack is fully + # collapsed, in order to complete the sort. + @[AlwaysInline] + protected def self.collapse(runs) + n = runs.size + if n >= 2 && + (runs[n - 1].begin == 0 || + runs[n - 2].size <= runs[n - 1].size || + (n >= 3 && runs[n - 3].size <= runs[n - 2].size + runs[n - 1].size) || + (n >= 4 && runs[n - 4].size <= runs[n - 3].size + runs[n - 2].size)) + n >= 3 && runs[n - 3].size < runs[n - 1].size ? n - 3 : n - 2 + end + end end