From 7946b9bea75fc59d47a5ac8baa8cc261b15376e8 Mon Sep 17 00:00:00 2001
From: Chris Zwicker <ch.zwicker@gmail.com>
Date: Tue, 30 Apr 2024 15:05:25 +0200
Subject: [PATCH] sum-of-multiples: update "deep dive" (#1915)

---
 .../.approaches/from-factors/content.md       | 32 +++++++------------
 .../.approaches/from-factors/snippet.txt      | 10 +++---
 .../.approaches/introduction.md               | 17 +++++-----
 3 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/exercises/practice/sum-of-multiples/.approaches/from-factors/content.md b/exercises/practice/sum-of-multiples/.approaches/from-factors/content.md
index af71f4433..0300b5fa0 100644
--- a/exercises/practice/sum-of-multiples/.approaches/from-factors/content.md
+++ b/exercises/practice/sum-of-multiples/.approaches/from-factors/content.md
@@ -2,22 +2,20 @@
 
 ```rust
 pub fn sum_of_multiples_from_factors(limit: u32, factors: &[u32]) -> u32 {
-    let mut multiples: Vec<_> = factors
+    factors
         .iter()
         .filter(|&&factor| factor != 0)
         .flat_map(|&factor| (factor..limit).step_by(factor as usize))
-        .collect();
-    multiples.sort();
-    multiples.dedup();
-    multiples.iter().sum()
+        .collect::<HashSet<_>>()
+        .iter()
+        .sum()
 }
 ```
 
 This approach implements the exact steps outlined in the exercise description:
 
 1. For each non-zero factor, find all multiples of that factor that are less than the `limit`
-2. Collect all multiples in a [`Vec`][vec]
-3. Remove duplicate multiples
+2. Collect all multiples in a [`HashSet`][hash_set]
 3. Calculate the sum of all unique multiples
 
 In order to compute the list of multiples for a factor, we create a [`Range`][range] from the factor (inclusive) to the `limit` (exclusive), then use [`step_by`][iterator-step_by] with the same factor.
@@ -25,24 +23,16 @@ In order to compute the list of multiples for a factor, we create a [`Range`][ra
 To combine the multiples of all factors, we iterate the list of factors and use [`flat_map`][iterator-flat_map] on each factor's multiples.
 [`flat_map`][iterator-flat_map] is a combination of [`map`][iterator-map] and [`flatten`][iterator-flatten]; it maps each factor into its multiples, then flattens them all in a single sequence.
 
-Since we need to have unique multiples to compute the proper sum, we [`collect`][iterator-collect] the multiples into a [`Vec`][vec], which allows us to then [`sort`][slice-sort][^1] them and use [`dedup`][vec-dedup] to remove the duplicates.
-[`collect`][iterator-collect] is a powerful function that can collect the data in a sequence and store it in any kind of collection - however, because of this, the compiler is not able to infer the type of collection you want as the output.
-To solve this problem, we type the `multiples` variable explicitly.
-
-Finally, calculating the sum of the remaining unique multiples in the set is easy: we can simply call [`sum`][iterator-sum].
-
-[^1]: There is another method available to sort a slice: [`sort_unstable`][slice-sort_unstable]. Usually, using [`sort_unstable`][slice-sort_unstable] is recommended if we do not need to keep the ordering of duplicate elements (which is our case). However, [`sort`][slice-sort] has the advantage because of its implementation. From the documentation:
-
-    > Current implementation
-    >
-    > The current algorithm is an adaptive, iterative merge sort inspired by timsort. It is designed to be very fast in cases where the slice is nearly sorted, or consists of two or more sorted sequences concatenated one after another.
-
-    The last part is key, because this is exactly our use case: we concatenate sequences of _sorted_ multiples.
+Since we need to have unique multiples to compute the proper sum, we [`collect`][iterator-collect] the multiples into a [`HashSet`][hash_set], which only keeps one of each of its entries, thus removing duplicates.
+[`collect`][iterator-collect] is a powerful function that can collect the data in a sequence and store it in any kind of collection - however, because of this, the compiler in this case is not able to infer the type of collection you want as the output.
+To solve this problem, we specify the type `HashSet<_>` explicitly.
 
-    Running a benchmark using the two methods shows that in our scenario, [`sort`][slice-sort] is about twice as fast as [`sort_unstable`][slice-sort_unstable].
+Finally, calculating the sum of the remaining unique multiples in the set is easy: we can simply get an [Iterator][iterator] and call [`sum`][iterator-sum].
 
 [vec]: https://doc.rust-lang.org/std/vec/struct.Vec.html
+[hash_set]: https://doc.rust-lang.org/std/collections/struct.HashSet.html
 [range]: https://doc.rust-lang.org/std/ops/struct.Range.html
+[iterator]: https://doc.rust-lang.org/std/iter/trait.Iterator.html
 [iterator-step_by]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.step_by
 [iterator-flat_map]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.flat_map
 [iterator-map]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.map
diff --git a/exercises/practice/sum-of-multiples/.approaches/from-factors/snippet.txt b/exercises/practice/sum-of-multiples/.approaches/from-factors/snippet.txt
index 511d22574..105d1bd45 100644
--- a/exercises/practice/sum-of-multiples/.approaches/from-factors/snippet.txt
+++ b/exercises/practice/sum-of-multiples/.approaches/from-factors/snippet.txt
@@ -1,8 +1,8 @@
 pub fn sum_of_multiples_from_factors(limit: u32, factors: &[u32]) -> u32 {
-    let mut multiples: Vec<_> = factors.iter()
+    factors
+        .iter()
         .filter(|&&factor| factor != 0)
         .flat_map(|&factor| (factor..limit).step_by(factor as usize))
-        .collect();
-    multiples.sort();
-    multiples.dedup();
-    multiples.iter().sum()
+        .collect::<HashSet<_>>()
+        .iter()
+        .sum()
\ No newline at end of file
diff --git a/exercises/practice/sum-of-multiples/.approaches/introduction.md b/exercises/practice/sum-of-multiples/.approaches/introduction.md
index a942e2dd4..dd5bf42d8 100644
--- a/exercises/practice/sum-of-multiples/.approaches/introduction.md
+++ b/exercises/practice/sum-of-multiples/.approaches/introduction.md
@@ -14,14 +14,13 @@ It is also possible to find the multiples by simple addition, starting from the
 
 ```rust
 pub fn sum_of_multiples_from_factors(limit: u32, factors: &[u32]) -> u32 {
-    let mut multiples: Vec<_> = factors
+    factors
         .iter()
         .filter(|&&factor| factor != 0)
         .flat_map(|&factor| (factor..limit).step_by(factor as usize))
-        .collect();
-    multiples.sort();
-    multiples.dedup();
-    multiples.iter().sum()
+        .collect::<HashSet<_>>()
+        .iter()
+        .sum()
 }
 ```
 
@@ -41,10 +40,10 @@ For more information, check the [Sum by iterating the whole range approach][appr
 
 ## Which approach to use?
 
-- Computing the sum from factors can be efficient if we have a small number of factors and/or if they are large compared to the limit, because this will result in a small number of multiples to deduplicate.
-  However, as the number of multiples grows, this approach can result in a lot of work to deduplicate them.
-- Computing the sum by iterating the whole range is less efficient for large ranges when the number of factors is small and/or when they are large.
-  However, this approach has the advantage of having stable complexity that is only dependent on the limit and the number of factors, since there is no deduplication involved.
+- Computing the sum from factors can be efficient if we have a small number of `factors` and/or if they are large compared to the `limit`, because this will result in a small number of hashes to compute "in vain".
+  However, as the number of multiples grows, this approach can result in a lot of effort updating the `HashMap` to eliminate duplicates.
+- Computing the sum by iterating the whole range can be efficient if we have a small range (low `limit`) and a comparatively large amount of `factors`.
+  Additionally, this approach has the advantage of having stable complexity that is only dependent on the limit and the number of factors, since there is no deduplication involved.
   It also avoids any additional memory allocation.
 
 Without proper benchmarks, the second approach may be preferred since it offers a more stable level of complexity (e.g. its performances varies less when the size of the input changes).