From 6ed2eb4309667aa3db63b02588b14331d94fb0a4 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sat, 24 Feb 2018 15:39:52 +0100 Subject: [PATCH 01/10] Add first version of RFC "Add `is_sorted`" --- text/0000-is-sorted.md | 253 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 text/0000-is-sorted.md diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md new file mode 100644 index 00000000000..c96f644139e --- /dev/null +++ b/text/0000-is-sorted.md @@ -0,0 +1,253 @@ +- Feature Name: is_sorted +- Start Date: 2018-02-24 +- RFC PR: (leave this empty) +- Rust Issue: (leave this empty) + +# Summary +[summary]: #summary + +Add the methods `is_sorted`, `is_sorted_by` and `is_sorted_by_key` to `[T]` and +`Iterator`. + +# Motivation +[motivation]: #motivation + +In quite a few situations, one needs to check whether a sequence of elements +is sorted. The most important use cases are probably **unit tests** and +**pre-/post-condition checks**. + +The lack of an `is_sorted()` function in Rust's standard library has led to +[countless crates implementing their own](https://github.com/search?l=Rust&q=%22fn+is_sorted%22&type=Code&utf8=%E2%9C%93). +While it is possible to write a one-liner using iterators (e.g. +`(0..arr.len() - 1).all(|i| arr[i] < arr[i - 1])`), it is still unnecessary +overhead while writing *and* reading the code. + +In [the corresponding issue on the main repository](https://github.com/rust-lang/rust/issues/44370) +(from which I will reference a few comments) everyone seems to agree on the +basic premise: we want such a function. + +Having `is_sorted()` and friends in the standard library would: +- prevent people from spending time on writing their own, +- improve readbility of the code by clearly showing the author's intent, +- and encourage to write more unit tests and/or pre-/post-condition checks. + +Another proof of the usefulness of such a function is the inclusion in the +standard library of many other languages: +C++'s [`std::is_sorted`](http://en.cppreference.com/w/cpp/algorithm/is_sorted), +Go's [`sort.IsSorted`](https://golang.org/pkg/sort/#IsSorted), +D's [`std.algorithm.sorting.is_sorted`](https://dlang.org/library/std/algorithm/sorting/is_sorted.html) +and others. (Curiously, many (mostly) more high-level programming language – +like Ruby, Javascript, Java, Haskell and Python – seem to lack such a function.) + + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +Possible documentation of the three new methods of `Iterator`: + +> ```rust +> fn is_sorted(self) -> bool +> where +> Self::Item: Ord; +> ``` +> Checks if the elements of this iterator are sorted. +> +> That is, for each element `a` and its following element `b`, `a <= b` +> must hold. If the iterator yields exactly zero or one element, `true` +> is returned. +> +> ## Example +> +> ```rust +> assert!([1, 2, 2, 9].iter().is_sorted()); +> assert!(![1, 3, 2, 4).iter().is_sorted()); +> assert!([0].iter().is_sorted()); +> assert!(std::iter::empty::().is_sorted()); +> ``` +> --- +> +> ```rust +> fn is_sorted_by(self, compare: F) -> bool +> where +> F: FnMut(&Self::Item, &Self::Item) -> Ordering; +> ``` +> Checks if the elements of this iterator are sorted using the given +> comparator function. +> +> Instead of using `Ord::cmp`, this function uses the given `compare` +> function to determine the ordering of two elements. Apart from that, +> it's equivalent to `is_sorted`; see its documentation for more +> information. +> +> --- +> +> ```rust +> fn is_sorted_by_key(self, f: F) -> bool +> where +> F: FnMut(&Self::Item) -> K, +> K: Ord; +> ``` +> Checks if the elements of this iterator are sorted using the given +> key extraction function. +> +> Instead of comparing the iterator's elements directly, this function +> compares the keys of the elements, as determined by `f`. Apart from +> that, it's equivalent to `is_sorted`; see its documentation for more +> information. +> +> ## Example +> +> ```rust +> assert!(["c", "bb", "aaa"].iter().is_sorted_by_key(|s| s.len())); +> assert!(![-2i32, -1, 0, 3].iter().is_sorted_by_key(|n| n.abs())); +> ``` + +The methods for `[T]` will have similar documentations. + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +This RFC proposes to add the following six methods: + +```rust +impl [T] { + fn is_sorted(&self) -> bool + where + T: Ord, + { ... } + + fn is_sorted_by(&self, compare: F) -> bool + where + F: FnMut(&T, &T) -> Ordering, + { ... } + + fn is_sorted_by_key(&self, f: F) -> bool + where + F: FnMut(&Self::Item) -> K, + K: Ord, + { ... } +} + +trait Iterator { + fn is_sorted(self) -> bool + where + Self::Item: Ord, + { ... } + + fn is_sorted_by(mut self, compare: F) -> bool + where + F: FnMut(&Self::Item, &Self::Item) -> Ordering, + { ... } + + fn is_sorted_by_key(self, mut f: F) -> bool + where + F: FnMut(&Self::Item) -> K, + K: Ord, + { ... } +} +``` + +In addition to the changes shown above, the three methods should also be added +to `core::slice::SliceExt` as they don't require heap allocations. + +To repeat the exact semantics from the prior section: the methods return `true` +if and only if for each element `a` and its following element `b`, the +condition `a <= b` holds. For slices/iterators with zero or one element, `true` +is returned. + +A note about implementation: it's sufficient to only do real work in +`Iterator::is_sorted_by`. All other methods can be simply implemented by +(directly or indirectly) using `Iterator::is_sorted_by`. A sample +implementation can be found [here](https://play.rust-lang.org/?gist=8ad7ece1c99e68ee2f5c3ec2de7767b2&version=stable). + + +# Drawbacks +[drawbacks]: #drawbacks + +It increases the size of the standard library by a tiny bit. + +# Rationale and alternatives +[alternatives]: #alternatives + +### Only add the three methods to `Iterator`, but not to `[T]` +Not being able to call `is_sorted()` on a slice directly is not terrible: from +a slice, one can easily obtain an iterator via `iter()`. So instead of +`v.is_sorted()`, one would need to write `v.iter().is_sorted()`. + +This always works for `is_sorted()` because of the `Ord` blanket impl which +implements `Ord` for all references to an `Ord` type. For `is_sorted_by` and +`is_sorted_by_key` it would introduce an additional reference to the closures' +arguments (i.e. `v.iter().is_sorted_by_key(|x| ...))` where `x` is `&&T`). But +again, this is not a disaster. + +However, being able to call those three methods on slices (and everything that +can be dereferences to a slice) directly, is a lot more ergonomic (especially +given the popularity of slice-like data structures, like `Vec`). +Additionally, the `sort` method and friends is defined for slices, thus one +might expect the `is_sorted()` methode there, too. + + +### Add the three methods to additional data structures (like `LinkedList`) as well +Adding these methods to every data structure in the standard libary is a lot of +duplicate code. Optimally, we would have a trait that represents sequential +data structures and would only add `is_sorted` and friends to said trait. We +don't have such a trait right now; `Iterator` is the next best thing. Slices +deserve a special treatment due to the reasons mentioned above (popularity and +`sort()`). + + +### `Iterator::while_sorted`, `is_sorted_until`, `sorted_prefix`, `num_sorted`, ... +[In the issue on the main repository](https://github.com/rust-lang/rust/issues/44370), +concerns about completely consuming the iterator were raised. Some alternatives, +such as [`while_sorted`](https://github.com/rust-lang/rust/issues/44370#issuecomment-327873139), +were suggested. However, consuming the iterator is not really uncommon nor a +problem. Methods like `count()`, `max()` and many more consume the iterator, +too. [One comment](https://github.com/rust-lang/rust/issues/44370#issuecomment-344516366) mentions: + +> I am a bit skeptical of the equivalent on Iterator just because the return +> value does not seem actionable -- you aren't going to "sort" the iterator +> after you find out it is not already sorted. What are some use cases for this +> in real code that does not involve iterating over a slice? + +As mentioned above, `Iterator` is the next best thing to a trait representing +sequential data structures. So to check if a `LinkedList`, `VecDeque` or +another sequential data structure is sorted, one would simply call +`collection.iter().is_sorted()`. It's likely that this is the main usage for +`Iterator`'s `is_sorted` methods. Additionally, code like +`if v.is_sorted() { v.sort(); }` is not very useful: `sort()` runs in O(n) for +already sorted arrays. + +Suggestions like `is_sorted_until` are not really useful either: one can easily +get a subslice or a part of an iterator (via `.take()`) and call `is_sorted()` +on that part. + + +# Unresolved questions +[unresolved]: #unresolved-questions + + +### Is `Iterator::is_sorted_by_key` useless? +[One comment in the corresponding issue](https://github.com/rust-lang/rust/issues/44370#issuecomment-327740685) +mentions that `Iterator::is_sorted_by_key` is not really necessary, given that +you can simply call `map()` beforehand. This is true, but it migh still be +favourable to include said function for consistency and ease of use. The +standard library already hosts a number of sorting-related functions which all +come in three flavours: *raw*, `_by` and `_by_key`. By now, programmers would +probably expect there to be an `is_sorted_by_key` as well. + +### Add `std::cmp::is_sorted` instead +As suggested [here](https://github.com/rust-lang/rust/issues/44370#issuecomment-345495831), +one could also add this free function (plus the `_by` and `_by_key` versions) +to `std::cmp`: + +```rust +fn is_sorted(collection: C) -> bool +where + C: IntoIterator, + C::Item: Ord, +``` + +This can be seen as an better design as it avoids the question about which data +structure should get `is_sorted` methods. However, it might have the +disadvantage of being less discoverable and also less convenient (long path or +import). From 04e6c33a03d3432ec47056c08f331e9ab1f36138 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sat, 24 Feb 2018 19:20:02 +0100 Subject: [PATCH 02/10] Minor improvements to "add-is-sorted" RFC --- text/0000-is-sorted.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index c96f644139e..80096db46c0 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -17,9 +17,9 @@ is sorted. The most important use cases are probably **unit tests** and **pre-/post-condition checks**. The lack of an `is_sorted()` function in Rust's standard library has led to -[countless crates implementing their own](https://github.com/search?l=Rust&q=%22fn+is_sorted%22&type=Code&utf8=%E2%9C%93). +[countless programmers implementing their own](https://github.com/search?l=Rust&q=%22fn+is_sorted%22&type=Code&utf8=%E2%9C%93). While it is possible to write a one-liner using iterators (e.g. -`(0..arr.len() - 1).all(|i| arr[i] < arr[i - 1])`), it is still unnecessary +`(0..arr.len() - 1).all(|i| arr[i] < arr[i + 1])`), it is still unnecessary overhead while writing *and* reading the code. In [the corresponding issue on the main repository](https://github.com/rust-lang/rust/issues/44370) @@ -48,7 +48,7 @@ Possible documentation of the three new methods of `Iterator`: > ```rust > fn is_sorted(self) -> bool > where -> Self::Item: Ord; +> Self::Item: Ord, > ``` > Checks if the elements of this iterator are sorted. > @@ -69,7 +69,7 @@ Possible documentation of the three new methods of `Iterator`: > ```rust > fn is_sorted_by(self, compare: F) -> bool > where -> F: FnMut(&Self::Item, &Self::Item) -> Ordering; +> F: FnMut(&Self::Item, &Self::Item) -> Ordering, > ``` > Checks if the elements of this iterator are sorted using the given > comparator function. @@ -85,7 +85,7 @@ Possible documentation of the three new methods of `Iterator`: > fn is_sorted_by_key(self, f: F) -> bool > where > F: FnMut(&Self::Item) -> K, -> K: Ord; +> K: Ord, > ``` > Checks if the elements of this iterator are sorted using the given > key extraction function. @@ -107,7 +107,7 @@ The methods for `[T]` will have similar documentations. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation -This RFC proposes to add the following six methods: +This RFC proposes to add the following three methods to `[T]` (slices) and `Iterator`: ```rust impl [T] { @@ -156,7 +156,7 @@ condition `a <= b` holds. For slices/iterators with zero or one element, `true` is returned. A note about implementation: it's sufficient to only do real work in -`Iterator::is_sorted_by`. All other methods can be simply implemented by +`Iterator::is_sorted_by`. All other methods can simply be implemented by (directly or indirectly) using `Iterator::is_sorted_by`. A sample implementation can be found [here](https://play.rust-lang.org/?gist=8ad7ece1c99e68ee2f5c3ec2de7767b2&version=stable). @@ -183,7 +183,7 @@ again, this is not a disaster. However, being able to call those three methods on slices (and everything that can be dereferences to a slice) directly, is a lot more ergonomic (especially given the popularity of slice-like data structures, like `Vec`). -Additionally, the `sort` method and friends is defined for slices, thus one +Additionally, the `sort` method and friends are defined for slices, thus one might expect the `is_sorted()` methode there, too. @@ -191,7 +191,7 @@ might expect the `is_sorted()` methode there, too. Adding these methods to every data structure in the standard libary is a lot of duplicate code. Optimally, we would have a trait that represents sequential data structures and would only add `is_sorted` and friends to said trait. We -don't have such a trait right now; `Iterator` is the next best thing. Slices +don't have such a trait right now; so `Iterator` is the next best thing. Slices deserve a special treatment due to the reasons mentioned above (popularity and `sort()`). @@ -214,8 +214,8 @@ sequential data structures. So to check if a `LinkedList`, `VecDeque` or another sequential data structure is sorted, one would simply call `collection.iter().is_sorted()`. It's likely that this is the main usage for `Iterator`'s `is_sorted` methods. Additionally, code like -`if v.is_sorted() { v.sort(); }` is not very useful: `sort()` runs in O(n) for -already sorted arrays. +`if v.is_sorted() { v.sort(); }` is not very useful: `sort()` already runs in +O(n) for already sorted arrays. Suggestions like `is_sorted_until` are not really useful either: one can easily get a subslice or a part of an iterator (via `.take()`) and call `is_sorted()` @@ -231,8 +231,8 @@ on that part. mentions that `Iterator::is_sorted_by_key` is not really necessary, given that you can simply call `map()` beforehand. This is true, but it migh still be favourable to include said function for consistency and ease of use. The -standard library already hosts a number of sorting-related functions which all -come in three flavours: *raw*, `_by` and `_by_key`. By now, programmers would +standard library already hosts a number of sorting-related functions all of +which come in three flavours: *raw*, `_by` and `_by_key`. By now, programmers would probably expect there to be an `is_sorted_by_key` as well. ### Add `std::cmp::is_sorted` instead From 65199fa85d1b013105cf0923e55fc99862fa696f Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Sun, 25 Feb 2018 12:45:50 +0100 Subject: [PATCH 03/10] Additional fixes and clarifications (RFC "add is_sorted") --- text/0000-is-sorted.md | 53 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index 80096db46c0..e0546d7e72c 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -31,7 +31,7 @@ Having `is_sorted()` and friends in the standard library would: - improve readbility of the code by clearly showing the author's intent, - and encourage to write more unit tests and/or pre-/post-condition checks. -Another proof of the usefulness of such a function is the inclusion in the +Another proof of this functions' usefulness is the inclusion in the standard library of many other languages: C++'s [`std::is_sorted`](http://en.cppreference.com/w/cpp/algorithm/is_sorted), Go's [`sort.IsSorted`](https://golang.org/pkg/sort/#IsSorted), @@ -102,7 +102,7 @@ Possible documentation of the three new methods of `Iterator`: > assert!(![-2i32, -1, 0, 3].iter().is_sorted_by_key(|n| n.abs())); > ``` -The methods for `[T]` will have similar documentations. +The methods for `[T]` will have analogous documentations. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation @@ -123,7 +123,7 @@ impl [T] { fn is_sorted_by_key(&self, f: F) -> bool where - F: FnMut(&Self::Item) -> K, + F: FnMut(&T) -> K, K: Ord, { ... } } @@ -170,29 +170,29 @@ It increases the size of the standard library by a tiny bit. [alternatives]: #alternatives ### Only add the three methods to `Iterator`, but not to `[T]` -Not being able to call `is_sorted()` on a slice directly is not terrible: from -a slice, one can easily obtain an iterator via `iter()`. So instead of +Without `is_sorted()` defined for slices directly, one can still fairly easily +test if a slice is sorted by obtaining an iterator via `iter()`. So instead of `v.is_sorted()`, one would need to write `v.iter().is_sorted()`. This always works for `is_sorted()` because of the `Ord` blanket impl which implements `Ord` for all references to an `Ord` type. For `is_sorted_by` and `is_sorted_by_key` it would introduce an additional reference to the closures' -arguments (i.e. `v.iter().is_sorted_by_key(|x| ...))` where `x` is `&&T`). But -again, this is not a disaster. +arguments (i.e. `v.iter().is_sorted_by_key(|x| ...))` where `x` is `&&T`). -However, being able to call those three methods on slices (and everything that -can be dereferences to a slice) directly, is a lot more ergonomic (especially -given the popularity of slice-like data structures, like `Vec`). -Additionally, the `sort` method and friends are defined for slices, thus one -might expect the `is_sorted()` methode there, too. +While these two inconveniences are not deal-breakers, being able to call those +three methods on slices (and all `Deref` types) directly, could be +favourable for many programmers (especially given the popularity of slice-like +data structures, like `Vec`). Additionally, the `sort` method and friends +are defined for slices, thus one might expect the `is_sorted()` method there, +too. ### Add the three methods to additional data structures (like `LinkedList`) as well Adding these methods to every data structure in the standard libary is a lot of duplicate code. Optimally, we would have a trait that represents sequential data structures and would only add `is_sorted` and friends to said trait. We -don't have such a trait right now; so `Iterator` is the next best thing. Slices -deserve a special treatment due to the reasons mentioned above (popularity and +don't have such a trait as of now; so `Iterator` is the next best thing. Slices +deserve special treatment due to the reasons mentioned above (popularity and `sort()`). @@ -200,7 +200,7 @@ deserve a special treatment due to the reasons mentioned above (popularity and [In the issue on the main repository](https://github.com/rust-lang/rust/issues/44370), concerns about completely consuming the iterator were raised. Some alternatives, such as [`while_sorted`](https://github.com/rust-lang/rust/issues/44370#issuecomment-327873139), -were suggested. However, consuming the iterator is not really uncommon nor a +were suggested. However, consuming the iterator is neither uncommon nor a problem. Methods like `count()`, `max()` and many more consume the iterator, too. [One comment](https://github.com/rust-lang/rust/issues/44370#issuecomment-344516366) mentions: @@ -227,15 +227,18 @@ on that part. ### Is `Iterator::is_sorted_by_key` useless? + [One comment in the corresponding issue](https://github.com/rust-lang/rust/issues/44370#issuecomment-327740685) mentions that `Iterator::is_sorted_by_key` is not really necessary, given that -you can simply call `map()` beforehand. This is true, but it migh still be +you can simply call `map()` beforehand. This is true, but it might still be favourable to include said function for consistency and ease of use. The standard library already hosts a number of sorting-related functions all of which come in three flavours: *raw*, `_by` and `_by_key`. By now, programmers would probably expect there to be an `is_sorted_by_key` as well. + ### Add `std::cmp::is_sorted` instead + As suggested [here](https://github.com/rust-lang/rust/issues/44370#issuecomment-345495831), one could also add this free function (plus the `_by` and `_by_key` versions) to `std::cmp`: @@ -247,7 +250,23 @@ where C::Item: Ord, ``` -This can be seen as an better design as it avoids the question about which data +This can be seen as a better design as it avoids the question about which data structure should get `is_sorted` methods. However, it might have the disadvantage of being less discoverable and also less convenient (long path or import). + + +### About the closure of `Iterator::is_sorted_by_key` + +The method `Iterator::is_sorted_by_key` as proposed above takes a closure +`F: FnMut(&Self::Item) -> K`. Since the iterator is consumed and – in theory – +one only needs to extract the key once per element, the closure could take +`Self::Item` by value instead of by reference. It is not immediately clear, +whether this would have any real advantages. + +It has the disadvantage of being a bit special: `is_sorted_by`'s closure *has +to* receive its arguments by reference, as do the closures of `[T]::is_sorted_by` +and `[T]::is_sorted_by_key`. Additionally, when taking `Self::Item` by value, +one can no longer implement `Iterator::is_sorted_by_key` with +`Iterator::is_sorted_by` but would have to write a new implementation, taking +care to call the key extraction method only once for each element. From 77310a3530c4451e011f029637d22ff5bceaa881 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Mon, 26 Feb 2018 13:26:18 +0100 Subject: [PATCH 04/10] Improve formulation (remove "I") --- text/0000-is-sorted.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index e0546d7e72c..970860849ed 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -23,7 +23,7 @@ While it is possible to write a one-liner using iterators (e.g. overhead while writing *and* reading the code. In [the corresponding issue on the main repository](https://github.com/rust-lang/rust/issues/44370) -(from which I will reference a few comments) everyone seems to agree on the +(from which a few comments are referenced) everyone seems to agree on the basic premise: we want such a function. Having `is_sorted()` and friends in the standard library would: From 1a4bfdc2b3479360da5aa9ca9f304832c292b8d7 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 1 Mar 2018 16:08:54 +0100 Subject: [PATCH 05/10] Update the "is_sorted" RFC to only require PartialOrd (instead of Ord) --- text/0000-is-sorted.md | 64 +++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index 970860849ed..e23247dfb06 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -48,7 +48,7 @@ Possible documentation of the three new methods of `Iterator`: > ```rust > fn is_sorted(self) -> bool > where -> Self::Item: Ord, +> Self::Item: PartialOrd, > ``` > Checks if the elements of this iterator are sorted. > @@ -56,6 +56,10 @@ Possible documentation of the three new methods of `Iterator`: > must hold. If the iterator yields exactly zero or one element, `true` > is returned. > +> Note that if `Self::Item` is only `PartialOrd`, but not `Ord`, the above +> definition implies that this function returns `false` if any two +> consecutive items are not comparable. +> > ## Example > > ```rust @@ -63,20 +67,21 @@ Possible documentation of the three new methods of `Iterator`: > assert!(![1, 3, 2, 4).iter().is_sorted()); > assert!([0].iter().is_sorted()); > assert!(std::iter::empty::().is_sorted()); +> assert!(![0.0, 1.0, std::f32::NAN].iter().is_sorted()); > ``` > --- > > ```rust > fn is_sorted_by(self, compare: F) -> bool > where -> F: FnMut(&Self::Item, &Self::Item) -> Ordering, +> F: FnMut(&Self::Item, &Self::Item) -> Option, > ``` > Checks if the elements of this iterator are sorted using the given > comparator function. > -> Instead of using `Ord::cmp`, this function uses the given `compare` -> function to determine the ordering of two elements. Apart from that, -> it's equivalent to `is_sorted`; see its documentation for more +> Instead of using `PartialOrd::partial_cmp`, this function uses the given +> `compare` function to determine the ordering of two elements. Apart from +> that, it's equivalent to `is_sorted`; see its documentation for more > information. > > --- @@ -85,7 +90,7 @@ Possible documentation of the three new methods of `Iterator`: > fn is_sorted_by_key(self, f: F) -> bool > where > F: FnMut(&Self::Item) -> K, -> K: Ord, +> K: PartialOrd, > ``` > Checks if the elements of this iterator are sorted using the given > key extraction function. @@ -113,36 +118,36 @@ This RFC proposes to add the following three methods to `[T]` (slices) and `Iter impl [T] { fn is_sorted(&self) -> bool where - T: Ord, + T: PartialOrd, { ... } fn is_sorted_by(&self, compare: F) -> bool where - F: FnMut(&T, &T) -> Ordering, + F: FnMut(&T, &T) -> Option, { ... } fn is_sorted_by_key(&self, f: F) -> bool where F: FnMut(&T) -> K, - K: Ord, + K: PartialOrd, { ... } } trait Iterator { fn is_sorted(self) -> bool where - Self::Item: Ord, + Self::Item: PartialOrd, { ... } fn is_sorted_by(mut self, compare: F) -> bool where - F: FnMut(&Self::Item, &Self::Item) -> Ordering, + F: FnMut(&Self::Item, &Self::Item) -> Option, { ... } fn is_sorted_by_key(self, mut f: F) -> bool where F: FnMut(&Self::Item) -> K, - K: Ord, + K: PartialOrd, { ... } } ``` @@ -150,15 +155,17 @@ trait Iterator { In addition to the changes shown above, the three methods should also be added to `core::slice::SliceExt` as they don't require heap allocations. -To repeat the exact semantics from the prior section: the methods return `true` -if and only if for each element `a` and its following element `b`, the -condition `a <= b` holds. For slices/iterators with zero or one element, `true` -is returned. +To repeat the exact semantics from the prior section: the methods return +`true` if and only if for each element `a` and its following element `b`, the +condition `a <= b` holds. For slices/iterators with zero or one element, +`true` is returned. For elements which implement `PartialOrd`, but not `Ord`, +the function returns `false` if any two consecutive elements are not +comparable (this is an implication of the `a <= b` condition from above). A note about implementation: it's sufficient to only do real work in `Iterator::is_sorted_by`. All other methods can simply be implemented by (directly or indirectly) using `Iterator::is_sorted_by`. A sample -implementation can be found [here](https://play.rust-lang.org/?gist=8ad7ece1c99e68ee2f5c3ec2de7767b2&version=stable). +implementation can be found [here](https://play.rust-lang.org/?gist=431ff42fe8ba5980fcf9250c8bc4492b&version=stable). # Drawbacks @@ -270,3 +277,26 @@ and `[T]::is_sorted_by_key`. Additionally, when taking `Self::Item` by value, one can no longer implement `Iterator::is_sorted_by_key` with `Iterator::is_sorted_by` but would have to write a new implementation, taking care to call the key extraction method only once for each element. + + +### Require `Ord` instead of only `PartialOrd` + +As proposed in this RFC, `is_sorted` only requires its elements to be +`PartialOrd`. If two non-comparable elements are encountered, `false` is +returned. This is probably the only useful way to define the function for +partially orderable elements. + +While it's convenient to call `is_sorted()` on slices containing only +partially orderable elements (like floats), we might want to use the stronger +`Ord` bound: + +- Firstly, for most programmers it's probably not *immediately* clear how the + function is defined for partially ordered elements (the documentation should + be sufficient as explanation, though). +- Secondly, being able to call `is_sorted` on something will probably make + most programmers think, that calling `sort` on the same thing is possible, + too. Having different bounds for `is_sorted` and `sort` thus might lead to + confusion. +- Lastly, the `is_sorted_by` function currently uses a closure which returns + `Option`. This differs from the closure for `sort_by` and looks a + bit more complicated than necessary for most cases. From 22d12d92bfb54375918a2994de1bbd01456610c0 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 12 Apr 2018 10:31:44 +0200 Subject: [PATCH 06/10] Change `is_sorted` RFC to not add `Iterator::is_sorted_by_key` --- text/0000-is-sorted.md | 82 ++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index e23247dfb06..68ac67b2c67 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -6,8 +6,8 @@ # Summary [summary]: #summary -Add the methods `is_sorted`, `is_sorted_by` and `is_sorted_by_key` to `[T]` and -`Iterator`. +Add the methods `is_sorted`, `is_sorted_by` and `is_sorted_by_key` to `[T]`; +add the methods `is_sorted` and `is_sorted_by` to `Iterator`. # Motivation [motivation]: #motivation @@ -43,7 +43,8 @@ like Ruby, Javascript, Java, Haskell and Python – seem to lack such a function # Guide-level explanation [guide-level-explanation]: #guide-level-explanation -Possible documentation of the three new methods of `Iterator`: +Possible documentation of the two new methods of `Iterator` as well as +`[T]::is_sorted_by_key`: > ```rust > fn is_sorted(self) -> bool @@ -86,16 +87,18 @@ Possible documentation of the three new methods of `Iterator`: > > --- > +> (*for `[T]`*) +> > ```rust -> fn is_sorted_by_key(self, f: F) -> bool +> fn is_sorted_by_key(&self, f: F) -> bool > where -> F: FnMut(&Self::Item) -> K, +> F: FnMut(&T) -> K, > K: PartialOrd, > ``` -> Checks if the elements of this iterator are sorted using the given +> Checks if the elements of this slice are sorted using the given > key extraction function. > -> Instead of comparing the iterator's elements directly, this function +> Instead of comparing the slice's elements directly, this function > compares the keys of the elements, as determined by `f`. Apart from > that, it's equivalent to `is_sorted`; see its documentation for more > information. @@ -103,16 +106,18 @@ Possible documentation of the three new methods of `Iterator`: > ## Example > > ```rust -> assert!(["c", "bb", "aaa"].iter().is_sorted_by_key(|s| s.len())); -> assert!(![-2i32, -1, 0, 3].iter().is_sorted_by_key(|n| n.abs())); +> assert!(["c", "bb", "aaa"].is_sorted_by_key(|s| s.len())); +> assert!(![-2i32, -1, 0, 3].is_sorted_by_key(|n| n.abs())); > ``` -The methods for `[T]` will have analogous documentations. +The methods `[T]::is_sorted` and `[T]::is_sorted_by` will have analogous +documentations to the ones shown above. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation -This RFC proposes to add the following three methods to `[T]` (slices) and `Iterator`: +This RFC proposes to add the following methods to `[T]` (slices) and +`Iterator`: ```rust impl [T] { @@ -143,17 +148,12 @@ trait Iterator { where F: FnMut(&Self::Item, &Self::Item) -> Option, { ... } - - fn is_sorted_by_key(self, mut f: F) -> bool - where - F: FnMut(&Self::Item) -> K, - K: PartialOrd, - { ... } } ``` -In addition to the changes shown above, the three methods should also be added -to `core::slice::SliceExt` as they don't require heap allocations. +In addition to the changes shown above, the three methods added to `[T]` should +also be added to `core::slice::SliceExt` as they don't require heap +allocations. To repeat the exact semantics from the prior section: the methods return `true` if and only if for each element `a` and its following element `b`, the @@ -176,15 +176,16 @@ It increases the size of the standard library by a tiny bit. # Rationale and alternatives [alternatives]: #alternatives -### Only add the three methods to `Iterator`, but not to `[T]` +### Only add the methods to `Iterator`, but not to `[T]` Without `is_sorted()` defined for slices directly, one can still fairly easily test if a slice is sorted by obtaining an iterator via `iter()`. So instead of `v.is_sorted()`, one would need to write `v.iter().is_sorted()`. -This always works for `is_sorted()` because of the `Ord` blanket impl which -implements `Ord` for all references to an `Ord` type. For `is_sorted_by` and -`is_sorted_by_key` it would introduce an additional reference to the closures' -arguments (i.e. `v.iter().is_sorted_by_key(|x| ...))` where `x` is `&&T`). +This always works for `is_sorted()` because of the `PartialOrd` blanket impl +which implements `PartialOrd` for all references to an `PartialOrd` type. For +`is_sorted_by` it would introduce an additional reference to the closures' +arguments (i.e. `v.iter().is_sorted_by(|a, b| ...))` where `a` and `b` are +`&&T`). While these two inconveniences are not deal-breakers, being able to call those three methods on slices (and all `Deref` types) directly, could be @@ -233,15 +234,16 @@ on that part. [unresolved]: #unresolved-questions -### Is `Iterator::is_sorted_by_key` useless? +### Should `Iterator::is_sorted_by_key` be added as well? -[One comment in the corresponding issue](https://github.com/rust-lang/rust/issues/44370#issuecomment-327740685) -mentions that `Iterator::is_sorted_by_key` is not really necessary, given that -you can simply call `map()` beforehand. This is true, but it might still be -favourable to include said function for consistency and ease of use. The -standard library already hosts a number of sorting-related functions all of -which come in three flavours: *raw*, `_by` and `_by_key`. By now, programmers would -probably expect there to be an `is_sorted_by_key` as well. +This RFC proposes to add `is_sorted_by_key` only to `[T]` but not to +`Iterator`. The latter addition wouldn't be too useful since once could easily +achieve the same effect as `.is_sorted_by_key(...)` by calling +`.map(...).is_sorted()`. It might still be favourable to include said function +for consistency and ease of use. The standard library already hosts a number of +sorting-related functions all of which come in three flavours: *raw*, `_by` and +`_by_key`. By now, programmers could expect there to be an `is_sorted_by_key` +as well. ### Add `std::cmp::is_sorted` instead @@ -263,22 +265,6 @@ disadvantage of being less discoverable and also less convenient (long path or import). -### About the closure of `Iterator::is_sorted_by_key` - -The method `Iterator::is_sorted_by_key` as proposed above takes a closure -`F: FnMut(&Self::Item) -> K`. Since the iterator is consumed and – in theory – -one only needs to extract the key once per element, the closure could take -`Self::Item` by value instead of by reference. It is not immediately clear, -whether this would have any real advantages. - -It has the disadvantage of being a bit special: `is_sorted_by`'s closure *has -to* receive its arguments by reference, as do the closures of `[T]::is_sorted_by` -and `[T]::is_sorted_by_key`. Additionally, when taking `Self::Item` by value, -one can no longer implement `Iterator::is_sorted_by_key` with -`Iterator::is_sorted_by` but would have to write a new implementation, taking -care to call the key extraction method only once for each element. - - ### Require `Ord` instead of only `PartialOrd` As proposed in this RFC, `is_sorted` only requires its elements to be From 85c3b5fb2cc0cea5dd38f6a32a1a02af170d899b Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 12 Apr 2018 10:36:45 +0200 Subject: [PATCH 07/10] Mention SIMD impl of `is_sorted` in the motivation section --- text/0000-is-sorted.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index 68ac67b2c67..889ca1aef33 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -40,6 +40,16 @@ and others. (Curiously, many (mostly) more high-level programming language – like Ruby, Javascript, Java, Haskell and Python – seem to lack such a function.) +## Fast Implementation via SIMD + +Lastly, it is possible to implement `is_sorted` for many common types with SIMD +instructions which improves speed significantly. It is unlikely that many +programmers will take the time to write SIMD code themselves, thus everyone +would benefit if this rather difficult implementation work is done in the +standard library. + + + # Guide-level explanation [guide-level-explanation]: #guide-level-explanation @@ -162,10 +172,8 @@ condition `a <= b` holds. For slices/iterators with zero or one element, the function returns `false` if any two consecutive elements are not comparable (this is an implication of the `a <= b` condition from above). -A note about implementation: it's sufficient to only do real work in -`Iterator::is_sorted_by`. All other methods can simply be implemented by -(directly or indirectly) using `Iterator::is_sorted_by`. A sample -implementation can be found [here](https://play.rust-lang.org/?gist=431ff42fe8ba5980fcf9250c8bc4492b&version=stable). +A sample implementation can be found +[here](https://play.rust-lang.org/?gist=431ff42fe8ba5980fcf9250c8bc4492b&version=stable). # Drawbacks From 970ef5c26f549a9eddf2fb538ee73c7dd03d8b1d Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Wed, 8 Aug 2018 21:13:55 +0200 Subject: [PATCH 08/10] Fix subtle bug in RFC "Add `is_sorted`" --- text/0000-is-sorted.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index 889ca1aef33..122c5c2106e 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -19,7 +19,7 @@ is sorted. The most important use cases are probably **unit tests** and The lack of an `is_sorted()` function in Rust's standard library has led to [countless programmers implementing their own](https://github.com/search?l=Rust&q=%22fn+is_sorted%22&type=Code&utf8=%E2%9C%93). While it is possible to write a one-liner using iterators (e.g. -`(0..arr.len() - 1).all(|i| arr[i] < arr[i + 1])`), it is still unnecessary +`(0..arr.len() - 1).all(|i| arr[i] <= arr[i + 1])`), it is still unnecessary overhead while writing *and* reading the code. In [the corresponding issue on the main repository](https://github.com/rust-lang/rust/issues/44370) From e35ec1832a3b6856d47ca0c5e6498198b356c766 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 9 Aug 2018 17:13:32 +0200 Subject: [PATCH 09/10] Add note about bug in initial version of "is_sorted" RFC --- text/0000-is-sorted.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/text/0000-is-sorted.md b/text/0000-is-sorted.md index 122c5c2106e..5884b44255a 100644 --- a/text/0000-is-sorted.md +++ b/text/0000-is-sorted.md @@ -19,8 +19,8 @@ is sorted. The most important use cases are probably **unit tests** and The lack of an `is_sorted()` function in Rust's standard library has led to [countless programmers implementing their own](https://github.com/search?l=Rust&q=%22fn+is_sorted%22&type=Code&utf8=%E2%9C%93). While it is possible to write a one-liner using iterators (e.g. -`(0..arr.len() - 1).all(|i| arr[i] <= arr[i + 1])`), it is still unnecessary -overhead while writing *and* reading the code. +`(0..arr.len() - 1).all(|i| arr[i] <= arr[i + 1])`¹), it is still unnecessary +mental overhead while writing *and* reading the code. In [the corresponding issue on the main repository](https://github.com/rust-lang/rust/issues/44370) (from which a few comments are referenced) everyone seems to agree on the @@ -39,6 +39,12 @@ D's [`std.algorithm.sorting.is_sorted`](https://dlang.org/library/std/algorithm/ and others. (Curiously, many (mostly) more high-level programming language – like Ruby, Javascript, Java, Haskell and Python – seem to lack such a function.) +¹ In the initial version of this RFC, this code snippet contained a bug +(`<` instead of `<=`). This subtle mistake happens very often: in this RFC, +[in the discussion thread about this RFC](https://github.com/rust-lang/rfcs/pull/2351#issuecomment-370126518), +in [this StackOverflow answer](https://stackoverflow.com/posts/51272639/revisions) +and in many more places. Thus, avoiding this common bug is another good +reason to add `is_sorted()`. ## Fast Implementation via SIMD From 1aa75abdc25df534d20942f9daea206e880332a2 Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Sun, 19 Aug 2018 10:44:38 +0200 Subject: [PATCH 10/10] RFC 2351 --- text/{0000-is-sorted.md => 2351-is-sorted.md} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename text/{0000-is-sorted.md => 2351-is-sorted.md} (98%) diff --git a/text/0000-is-sorted.md b/text/2351-is-sorted.md similarity index 98% rename from text/0000-is-sorted.md rename to text/2351-is-sorted.md index 5884b44255a..03be680ca5f 100644 --- a/text/0000-is-sorted.md +++ b/text/2351-is-sorted.md @@ -1,7 +1,7 @@ -- Feature Name: is_sorted +- Feature Name: `is_sorted` - Start Date: 2018-02-24 -- RFC PR: (leave this empty) -- Rust Issue: (leave this empty) +- RFC PR: [rust-lang/rfcs#2351](https://github.com/rust-lang/rfcs/pull/2351) +- Rust Issue: [rust-lang/rust#53485](https://github.com/rust-lang/rust/issues/53485) # Summary [summary]: #summary