Bump version

rust-ml · Mar 11, 2021 · 0822fa4 · 0822fa4
1 parent f0eca13
commit 0822fa4
Show file tree

Hide file tree

Showing 18 changed files with 136 additions and 41 deletions.
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,4 @@ poetry.lock
 
 # Generated artifacts of website (with Zola)
 docs/website/public/*
+docs/website/static/rustdocs/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,46 @@
+Version 0.3.1 - 2021-03-11
+========================
+
+In this release of Linfa the documentation is extended, new examples are added and the functionality of datasets improved. No new algorithms were added.
+
+The meta-issue [#82](https://github.com/rust-ml/linfa/issues/82) gives a good overview of the necessary documentation improvements and testing/documentation/examples were considerably extended in this release. 
+
+Further new functionality was added to datasets and multi-target datasets are introduced. Bootstrapping is now possible for features and samples and you can cross-validate your model with k-folding. We polished various bits in the kernel machines and simplified the interface there.
+
+The trait structure of regression metrics are simplified and the silhouette score introduced for easier testing of K-Means and other algorithms.
+
+Changes
+-----------
+ * improve documentation in all algorithms, various commits
+ * add a website to the infrastructure (c8acc785b)
+ * add k-folding with and without copying (b0af80546f8)
+ * add feature naming and pearson's cross correlation (71989627f)
+ * improve ergonomics when handling kernels (1a7982b973)
+ * improve TikZ generator in `linfa-trees` (9d71f603bbe)
+ * introduce multi-target datasets (b231118629)
+ * simplify regression metrics and add cluster metrics (d0363a1fa8ef)
+
+Version 0.3.0 - 2021-01-21
+=========================
+
+New Algorithms
+-----------
+
+ * Approximated DBSCAN has been added to `linfa-clustering` by [@Sauro98]
+ * Gaussian Naive Bayes  has been added to `linfa-bayes` by [@VasanthakumarV]
+ * Elastic Net linear regression has been added to `linfa-elasticnet` by [@paulkoerbitz] and [@bytesnake]
+
+Changes
+----------
+
+ * Added benchmark to gaussian mixture models (a3eede55)
+ * Fixed bugs in linear decision trees, added generator for TiKZ trees (bfa5aebe7)
+ * Implemented serde for all crates behind feature flag (4f0b63bb)
+ * Implemented new backend features (7296c9ec4)
+ * Introduced `linfa-datasets` for easier testing (3cec12b4f)
+ * Rename `Dataset` to `DatasetBase` and introduce `Dataset` and `DatasetView` (21dd579cf)
+ * Improve kernel tests and documentation (8e81a6d)
+
 Version 0.2.0 - 2020-11-26
 ==========================
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa"
-version = "0.3.0"
+version = "0.3.1"
 authors = [
     "Luca Palmieri <[email protected]>",
     "Lorenz Schmidt <[email protected]>",
@@ -60,7 +60,7 @@ features = ["cblas"]
 ndarray-rand = "0.11"
 approx = { version = "0.3", default-features = false, features = ["std"] }
 
-linfa-datasets = { version = "0.3.0", path = "datasets", features = ["winequality", "iris", "diabetes", "linnerud"] }
+linfa-datasets = { path = "datasets", features = ["winequality", "iris", "diabetes"] }
 
 [workspace]
 members = [

diff --git a/algorithms/linfa-bayes/Cargo.toml b/algorithms/linfa-bayes/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-bayes"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["VasanthakumarV <[email protected]>"]
 description = "Collection of Naive Bayes Algorithms"
 edition = "2018"
@@ -15,8 +15,8 @@ ndarray = { version = "0.13" , features = ["blas", "approx"]}
 ndarray-stats = "0.3"
 thiserror = "1"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
 approx = "0.3"
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-clustering"
-version = "0.3.0"
+version = "0.3.1"
 edition = "2018"
 authors = [
     "Luca Palmieri <[email protected]>",
@@ -36,7 +36,7 @@ sprs = "0.7"
 num-traits = "0.1.32"
 rand_isaac = "0.2.0"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 partitions = "0.2.4"
 
 [dev-dependencies]

diff --git a/algorithms/linfa-elasticnet/Cargo.toml b/algorithms/linfa-elasticnet/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-elasticnet"
-version = "0.3.0"
+version = "0.3.1"
 authors = [
     "Paul Körbitz / Google <[email protected]>",
     "Lorenz Schmidt <[email protected]>"
@@ -35,9 +35,9 @@ num-traits = "0.2"
 approx = "0.3.2"
 thiserror = "1"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
 ndarray-rand = "0.11"
 rand_isaac = "0.2"
diff --git a/algorithms/linfa-hierarchical/Cargo.toml b/algorithms/linfa-hierarchical/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-hierarchical"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Lorenz Schmidt <[email protected]>"]
 edition = "2018"
 
@@ -17,10 +17,10 @@ categories = ["algorithms", "mathematics", "science"]
 ndarray = { version = "0.13", default-features = false }
 kodama = "0.2"
 
-linfa = { version = "0.3.0", path = "../.." }
-linfa-kernel = { version = "0.3.0", path = "../linfa-kernel" }
+linfa = { version = "0.3.1", path = "../.." }
+linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = "0.7"
 ndarray-rand = "0.11"
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-ica"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["VasanthakumarV <[email protected]>"]
 description = "A collection of Independent Component Analysis (ICA) algorithms"
 edition = "2018"
@@ -31,7 +31,7 @@ ndarray-stats = "0.3"
 num-traits = "0.2"
 rand_isaac = "0.2.0"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.5", default-features = false }

diff --git a/algorithms/linfa-kernel/Cargo.toml b/algorithms/linfa-kernel/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-kernel"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Lorenz Schmidt <[email protected]>"]
 description = "Kernel methods for non-linear algorithms"
 edition = "2018"
@@ -29,4 +29,4 @@ sprs = { version = "0.9.3", default-features = false }
 hnsw = "0.6"
 space = "0.10"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-linear"
-version = "0.3.0"
+version = "0.3.1"
 authors = [
     "Paul Körbitz / Google <[email protected]>",
     "VasanthakumarV <[email protected]>"
@@ -25,8 +25,8 @@ argmin = {version="0.3.1", features=["ndarrayl"]}
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 thiserror = "1"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
 approx = "0.3.2"
diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-logistic"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Paul Körbitz / Google <[email protected]>"]
 
 description = "A Machine Learning framework for Rust"
@@ -20,8 +20,8 @@ num-traits = "0.2"
 argmin = {version="0.3.1", features=["ndarrayl"]}
 serde = "1.0"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
 approx = "0.3.2"
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-reduction/Cargo.toml b/algorithms/linfa-reduction/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-reduction"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Lorenz Schmidt <[email protected]>"]
 description = "A collection of dimensionality reduction techniques"
 edition = "2018"
@@ -30,11 +30,11 @@ ndarray-linalg = "0.12"
 ndarray-rand = "0.11"
 num-traits = "0.2"
 
-linfa = { version = "0.3.0", path = "../.." }
-linfa-kernel = { version = "0.3.0", path = "../linfa-kernel" }
+linfa = { version = "0.3.1", path = "../.." }
+linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = { version = "0.7", features = ["small_rng"] }
 ndarray-npy = { version = "0.5", default-features = false }
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
 approx = { version = "0.3", default-features = false, features = ["std"] }
diff --git a/algorithms/linfa-svm/Cargo.toml b/algorithms/linfa-svm/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-svm"
-version = "0.3.0"
+version = "0.3.1"
 edition = "2018"
 authors = ["Lorenz Schmidt <[email protected]>"]
 description = "Support Vector Machines"
@@ -29,9 +29,9 @@ ndarray-rand = "0.11"
 num-traits = "0.1.32"
 thiserror = "1"
 
-linfa = { version = "0.3.0", path = "../.." }
-linfa-kernel = { version = "0.3.0", path = "../linfa-kernel" }
+linfa = { version = "0.3.1", path = "../.." }
+linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.0", path = "../../datasets", features = ["winequality", "diabetes"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality", "diabetes"] }
 rand_isaac = "0.2"
diff --git a/algorithms/linfa-trees/Cargo.toml b/algorithms/linfa-trees/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-trees"
-version = "0.3.0"
+version = "0.3.1"
 edition = "2018"
 authors = ["Moss Ebeling <[email protected]>"]
 description = "A collection of tree-based algorithms"
@@ -27,14 +27,14 @@ features = ["std", "derive"]
 ndarray = { version = "0.13" , features = ["rayon", "approx"]}
 ndarray-rand = "0.11"
 
-linfa = { version = "0.3.0", path = "../.." }
+linfa = { version = "0.3.1", path = "../.." }
 
 [dev-dependencies]
 rand = { version = "0.7", features = ["small_rng"] }
 criterion = "0.3"
 approx = "0.3"
 
-linfa-datasets = { version = "0.3.0", path = "../../datasets/", features = ["iris"] }
+linfa-datasets = { version = "0.3.1", path = "../../datasets/", features = ["iris"] }
 
 [[bench]]
 name = "decision_tree"

diff --git a/datasets/Cargo.toml b/datasets/Cargo.toml
@@ -1,14 +1,14 @@
 [package]
 name = "linfa-datasets"
-version = "0.3.0"
+version = "0.3.1"
 authors = ["Lorenz Schmidt <[email protected]>"]
 description = "Collection of small datasets for Linfa"
 edition = "2018"
 license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-ml/linfa"
 
 [dependencies]
-linfa = { version = "0.3.0", path = ".." }
+linfa = { version = "0.3.1", path = ".." }
 ndarray = { version = "0.13", default-features = false }
 ndarray-csv = "0.4"
 csv = "1.1"

diff --git a/datasets/README.md b/datasets/README.md
@@ -18,13 +18,13 @@ Currently the following datasets are provided:
 | linnerud | The linnerud dataset contains samples from 20 middle-aged men in a fitness club. Their physical capability, as well as biological measures are related. | 20, 3, 3 | Regression | [here](https://core.ac.uk/download/pdf/20641325.pdf) |
 
 The purpose of this crate is to faciliate dataset loading and make it as simple as possible. Loaded datasets are returned as a 
-[`linfa::Dataset`](https://docs.rs/linfa/0.3.0/linfa/dataset/type.Dataset.html) structure with named features.
+[`linfa::Dataset`](https://docs.rs/linfa/latest/linfa/dataset/type.Dataset.html) structure with named features.
 
 ## Using a dataset
 
 To use one of the provided datasets in your project add the `linfa-datasets` crate to your `Cargo.toml` and enable the corresponding feature:
 ```
-linfa-datasets = { version = "0.3.0", features = ["winequality"] }
+linfa-datasets = { version = "0.3.1", features = ["winequality"] }
 ```
 You can then use the dataset in your working code:
 ```rust

diff --git a/datasets/src/lib.rs b/datasets/src/lib.rs
@@ -18,13 +18,13 @@
 //! | linnerud | The linnerud dataset contains samples from 20 middle-aged men in a fitness club. Their physical capability, as well as biological measures are related. | 20, 3, 3 | Regression | [here](https://core.ac.uk/download/pdf/20641325.pdf) |
 //!
 //! The purpose of this crate is to faciliate dataset loading and make it as simple as possible. Loaded datasets are returned as a
-//! [`linfa::Dataset`](https://docs.rs/linfa/0.3.0/linfa/dataset/type.Dataset.html) structure with named features.
+//! [linfa::Dataset] structure with named features.
 //!
 //! ## Using a dataset
 //!
 //! To use one of the provided datasets in your project add the `linfa-datasets` crate to your `Cargo.toml` and enable the corresponding feature:
 //! ```ignore
-//! linfa-datasets = { version = "0.3.0", features = ["winequality"] }
+//! linfa-datasets = { version = "0.3.1", features = ["winequality"] }
 //! ```
 //!
 //! You can then use the dataset in your working code:

diff --git a/docs/website/content/news/release_031.md b/docs/website/content/news/release_031.md
@@ -0,0 +1,51 @@
++++
+title = "Release 0.3.1"
+date = "2021-03-11"
++++
+
+In this release of Linfa the documentation is extended, new examples are added and the functionality of datasets improved. No new algorithms were added.
+
+<!-- more -->
+
+The meta-issue [#82](https://github.com/rust-ml/linfa/issues/82) gives a good overview of the necessary documentation improvements and testing/documentation/examples were considerably extended in this release. 
+
+Further new functionality was added to datasets and multi-target datasets are introduced. Bootstrapping is now possible for features and samples and you can cross-validate your model with k-folding. We polished various bits in the kernel machines and simplified the interface there.
+
+The trait structure of regression metrics are simplified and the silhouette score introduced for easier testing of K-Means and other algorithms.
+
+
+# Changes
+
+ * improve documentation in all algorithms, various commits
+ * add a website to the infrastructure (c8acc785b)
+ * add k-folding with and without copying (b0af80546f8)
+ * add feature naming and pearson's cross correlation (71989627f)
+ * improve ergonomics when handling kernels (1a7982b973)
+ * improve TikZ generator in `linfa-trees` (9d71f603bbe)
+ * introduce multi-target datasets (b231118629)
+ * simplify regression metrics and add cluster metrics (d0363a1fa8ef)
+
+# Example
+
+You can now perform cross-validation with k-folding. @Sauro98 actually implemented two versions, one which copies the dataset into k folds and one which avoid excessive memory operations by copying only the validation dataset around. For example to test a model with 8-folding:
+
+```rust
+// perform cross-validation with the F1 score
+let f1_runs = dataset
+    .iter_fold(8, |v| params.fit(&v).unwrap())
+    .map(|(model, valid)| {
+        let cm = model
+            .predict(&valid)
+            .mapv(|x| x > Pr::even())
+            .confusion_matrix(&valid).unwrap();
+
+          cm.f1_score()
+    })  
+    .collect::<Array1<_>>();
+
+// calculate mean and standard deviation
+println!("F1 score: {}±{}",
+    f1_runs.mean().unwrap(),
+    f1_runs.std_axis(Axis(0), 0.0),
+); 
+```