From 7b95ab1222dfd58719b4d17ec7003f318ecd73ce Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 11:46:40 -0800
Subject: [PATCH 01/13] Add codespell as a linter

Similar to https://github.com/rapidsai/cudf/pull/12097, this adds codespell
as a linter to the pre-commit config, and fixes various spelling errors
it highlights. (https://github.com/codespell-project/codespell)
---
 .pre-commit-config.yaml                       |  7 ++++++
 BUILD.md                                      |  4 ++--
 CONTRIBUTING.md                               |  4 ++--
 ci/checks/black_lists.sh                      |  4 ++--
 cpp/CMakeLists.txt                            |  4 ++--
 cpp/bench/sg/dataset.cuh                      |  4 ++--
 cpp/examples/symreg/symreg_example.cpp        |  4 ++--
 cpp/include/cuml/cluster/hdbscan.hpp          |  4 ++--
 cpp/include/cuml/ensemble/randomforest.hpp    |  4 ++--
 cpp/include/cuml/fil/multi_sum.cuh            |  4 ++--
 cpp/include/cuml/genetic/genetic.h            |  6 ++---
 cpp/include/cuml/genetic/program.h            |  4 ++--
 cpp/include/cuml/manifold/tsne.h              | 10 ++++-----
 cpp/include/cuml/manifold/umap.hpp            | 16 +++++++-------
 cpp/include/cuml/metrics/metrics.hpp          |  6 ++---
 cpp/include/cuml/neighbors/knn.hpp            |  4 ++--
 cpp/include/cuml/tree/decisiontree.hpp        |  4 ++--
 cpp/include/cuml/tsa/arima_common.h           |  4 ++--
 cpp/scripts/gitutils.py                       |  8 +++----
 cpp/src/arima/batched_arima.cu                |  6 ++---
 cpp/src/common/cumlHandle.hpp                 |  6 ++---
 cpp/src/dbscan/vertexdeg/pack.h               |  4 ++--
 .../batched-levelalgo/builder.cuh             |  4 ++--
 .../kernels/builder_kernels.cuh               |  8 +++----
 .../kernels/builder_kernels_impl.cuh          |  4 ++--
 cpp/src/decisiontree/decisiontree.cuh         |  4 ++--
 cpp/src/explainer/kernel_shap.cu              |  4 ++--
 cpp/src/explainer/permutation_shap.cu         |  4 ++--
 cpp/src/explainer/tree_shap.cu                |  6 ++---
 cpp/src/fil/infer.cu                          |  4 ++--
 cpp/src/fil/internal.cuh                      |  6 ++---
 cpp/src/fil/treelite_import.cu                |  6 ++---
 cpp/src/genetic/genetic.cu                    |  4 ++--
 cpp/src/glm/qn/glm_base.cuh                   |  4 ++--
 cpp/src/glm/qn/qn.cuh                         |  4 ++--
 cpp/src/glm/qn/qn_solvers.cuh                 | 14 ++++++------
 cpp/src/hdbscan/detail/extract.cuh            |  4 ++--
 cpp/src/hdbscan/detail/select.cuh             |  6 ++---
 cpp/src/hdbscan/detail/stabilities.cuh        |  4 ++--
 cpp/src/holtwinters/internal/hw_decompose.cuh |  4 ++--
 cpp/src/kmeans/kmeans_mg_impl.cuh             |  8 +++----
 cpp/src/knn/knn_opg_common.cuh                | 22 +++++++++----------
 cpp/src/solver/lars_impl.cuh                  | 12 +++++-----
 cpp/src/solver/sgd.cuh                        |  4 ++--
 cpp/src/svm/kernelcache.cuh                   |  4 ++--
 cpp/src/svm/linear.cu                         |  4 ++--
 cpp/src/svm/results.cuh                       |  4 ++--
 cpp/src/svm/smoblocksolve.cuh                 | 10 ++++-----
 cpp/src/svm/smosolver.cuh                     |  4 ++--
 cpp/src/tsa/auto_arima.cuh                    |  4 ++--
 cpp/src/tsne/barnes_hut_kernels.cuh           | 18 +++++++--------
 cpp/src/tsne/barnes_hut_tsne.cuh              |  4 ++--
 cpp/src/tsne/cannylab/bh.cu                   |  2 +-
 cpp/src/tsne/exact_kernels.cuh                |  8 +++----
 cpp/src/tsne/fft_tsne.cuh                     |  6 ++---
 cpp/src/umap/fuzzy_simpl_set/naive.cuh        |  4 ++--
 cpp/src/umap/simpl_set_embed/algo.cuh         |  4 ++--
 cpp/src_prims/linalg/batched/gemv.cuh         |  4 ++--
 cpp/src_prims/linalg/batched/matrix.cuh       |  6 ++---
 cpp/src_prims/sparse/batched/csr.cuh          |  4 ++--
 cpp/test/prims/batched/matrix.cu              |  4 ++--
 cpp/test/sg/fil_test.cu                       |  4 ++--
 cpp/test/sg/lars_test.cu                      |  4 ++--
 cpp/test/sg/linear_svm_test.cu                |  6 ++---
 cpp/test/sg/rf_test.cu                        |  4 ++--
 cpp/test/sg/shap_kernel.cu                    |  4 ++--
 cpp/test/sg/svc_test.cu                       |  8 +++----
 cpp/test/sg/tsne_test.cu                      |  4 ++--
 docs/source/index.rst                         |  2 +-
 notebooks/random_forest_mnmg_demo.ipynb       |  2 +-
 notebooks/target_encoder_walkthrough.ipynb    |  4 ++--
 pyproject.toml                                |  9 ++++++++
 python/CMakeLists.txt                         |  4 ++--
 .../sklearn/preprocessing/_data.py            |  6 ++---
 .../sklearn/preprocessing/_imputation.py      |  2 +-
 .../cuml/benchmark/automated/utils/utils.py   |  6 ++---
 python/cuml/cluster/hdbscan/hdbscan.pyx       |  6 ++---
 python/cuml/dask/common/dask_df_utils.py      |  2 +-
 python/cuml/dask/common/input_utils.py        |  2 +-
 python/cuml/dask/common/utils.py              |  2 +-
 python/cuml/dask/datasets/regression.py       |  6 ++---
 python/cuml/dask/ensemble/__init__.py         |  2 +-
 .../dask/extended/linear_model/__init__.py    |  2 +-
 python/cuml/dask/linear_model/__init__.py     |  2 +-
 .../dask/linear_model/linear_regression.py    |  2 +-
 python/cuml/dask/manifold/__init__.py         |  2 +-
 python/cuml/dask/metrics/__init__.py          |  2 +-
 python/cuml/dask/neighbors/__init__.py        |  2 +-
 python/cuml/dask/solvers/__init__.py          |  2 +-
 python/cuml/decomposition/pca.pyx             |  2 +-
 .../cuml/ensemble/randomforestclassifier.pyx  |  6 ++---
 .../hyperopt_utils/plotting_utils.py          |  8 +++----
 .../cuml/experimental/linear_model/lars.pyx   |  4 ++--
 python/cuml/explainer/kernel_shap.pyx         |  6 ++---
 python/cuml/explainer/permutation_shap.pyx    |  4 ++--
 python/cuml/feature_extraction/_tfidf.py      |  2 +-
 python/cuml/fil/fil.pyx                       | 16 +++++++-------
 python/cuml/internals/base.pyx                |  4 ++--
 python/cuml/internals/base_return_types.py    |  2 +-
 python/cuml/internals/memory_utils.py         |  4 ++--
 .../cuml/linear_model/linear_regression.pyx   |  4 ++--
 python/cuml/metrics/accuracy.pyx              |  4 ++--
 python/cuml/metrics/kl_divergence.pyx         |  6 ++---
 python/cuml/metrics/pairwise_distances.pyx    |  6 ++---
 python/cuml/naive_bayes/naive_bayes.py        |  6 ++---
 python/cuml/neighbors/kernel_density.py       |  4 ++--
 python/cuml/neighbors/nearest_neighbors.pyx   |  6 ++---
 python/cuml/preprocessing/TargetEncoder.py    |  4 ++--
 .../preprocessing/text/stem/porter_stemmer.py | 14 ++++++------
 python/cuml/solvers/qn.pyx                    |  4 ++--
 python/cuml/svm/svc.pyx                       | 10 ++++-----
 python/cuml/svm/svm_base.pyx                  | 14 ++++++------
 python/cuml/svm/svr.pyx                       | 10 ++++-----
 .../cuml/testing/plugins/quick_run_plugin.py  |  2 +-
 python/cuml/testing/utils.py                  |  6 ++---
 .../cuml/tests/stemmer_tests/test_stemmer.py  |  2 +-
 python/cuml/tests/stemmer_tests/test_steps.py |  8 +++----
 python/cuml/tests/test_api.py                 |  2 +-
 python/cuml/tests/test_base.py                |  2 +-
 python/cuml/tests/test_linear_svm.py          |  6 ++---
 python/cuml/tests/test_metrics.py             |  2 +-
 python/cuml/tests/test_random_forest.py       |  2 +-
 python/cuml/tests/test_svm.py                 |  2 +-
 .../tests/test_text_feature_extraction.py     |  2 +-
 python/cuml/tests/test_train_test_split.py    |  2 +-
 python/cuml/thirdparty_adapters/adapters.py   |  2 +-
 python/cuml/tsa/batched_lbfgs.py              |  2 +-
 wiki/cpp/DEVELOPER_GUIDE.md                   |  2 +-
 wiki/mnmg/Using_Infiniband_for_MNMG.md        |  2 +-
 wiki/python/ESTIMATOR_GUIDE.md                | 10 ++++-----
 130 files changed, 340 insertions(+), 324 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9b33cecede..1a4b2c87af 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,6 +18,13 @@ repos:
             types_or: [python, cython]
             exclude: thirdparty
             additional_dependencies: [flake8-force]
+    - repo: https://github.com/codespell-project/codespell
+      rev: v2.2.2
+      hooks:
+          - id: codespell
+            additional_dependencies: [tomli]
+            args: ["--toml", "pyproject.toml"]
+            exclude: (?x)^(^CHANGELOG.md$)
     - repo: local
       hooks:
           - id: no-deprecationwarning
diff --git a/BUILD.md b/BUILD.md
index 615af933af..9f1044c168 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -61,7 +61,7 @@ $ ./build.sh cuml --singlegpu          # build the cuML python package without M
 $ ./build.sh --ccache                  # use ccache to cache compilations, speeding up subsequent builds
 ```
 
-By default, Ninja is used as the cmake generator. To override this and use (e.g.) `make`, define the `CMAKE_GENERATOR` environment variable accodingly:
+By default, Ninja is used as the cmake generator. To override this and use (e.g.) `make`, define the `CMAKE_GENERATOR` environment variable accordingly:
 ```bash
 CMAKE_GENERATOR='Unix Makefiles' ./build.sh
 ```
@@ -123,7 +123,7 @@ If using a conda environment (recommended), then cmake can be configured appropr
 $ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
 ```
 
-Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run succesfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command.
+Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run successfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command.
 ```
 Cannot generate a safe runtime search path for target ml_test because files
 in some directories may conflict with libraries in implicit directories:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e0167530e3..8f59c15780 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,9 +29,9 @@ into three categories:
 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/cuml/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
     or [help wanted](https://github.com/rapidsai/cuml/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
 3. Comment on the issue saying you are going to work on it.
-4. Get familar with the developer guide relevant for you:
+4. Get familiar with the developer guide relevant for you:
     * For C++ developers it is available here [DEVELOPER_GUIDE.md](wiki/cpp/DEVELOPER_GUIDE.md)
-    * For Python developers, a [Python DEVELOPER_GUIDE.md](wiki/python/DEVELOPER_GUIDE.md) is availabe as well.
+    * For Python developers, a [Python DEVELOPER_GUIDE.md](wiki/python/DEVELOPER_GUIDE.md) is available as well.
 5. Code! Make sure to update unit tests!
 6. When done, [create your pull request](https://github.com/rapidsai/cuml/compare).
 7. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/), or fix if needed.
diff --git a/ci/checks/black_lists.sh b/ci/checks/black_lists.sh
index 2ed13a2135..85435cf856 100755
--- a/ci/checks/black_lists.sh
+++ b/ci/checks/black_lists.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 ##########################################
 # cuML black listed function call Tester #
 ##########################################
 
-# PR_TARGET_BRANCH is set by the CI enviroment
+# PR_TARGET_BRANCH is set by the CI environment
 
 git checkout --quiet $PR_TARGET_BRANCH
 
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 00603aea53..5025b05fde 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -99,7 +99,7 @@ message(VERBOSE "CUML_CPP: Build and statically link FAISS library: ${CUML_USE_F
 message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Experimental: Choose which algorithms are built into libcuml++.so. Can specify individual algorithms or groups in a semicolon-separated list.")
-message(VERBOSE "CUML_CPP: Building libcuml++ with algoriths: '${CUML_ALGORITHMS}'.")
+message(VERBOSE "CUML_CPP: Building libcuml++ with algorithms: '${CUML_ALGORITHMS}'.")
 
 # Set RMM logging level
 set(RMM_LOGGING_LEVEL "INFO" CACHE STRING "Choose the logging level.")
diff --git a/cpp/bench/sg/dataset.cuh b/cpp/bench/sg/dataset.cuh
index 5b15535fbc..fd0e25b9fa 100644
--- a/cpp/bench/sg/dataset.cuh
+++ b/cpp/bench/sg/dataset.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ namespace Bench {
  * by every Benchmark's Params structure.
  */
 struct DatasetParams {
-  /** number of rows in the datset */
+  /** number of rows in the dataset */
   int nrows;
   /** number of cols in the dataset */
   int ncols;
diff --git a/cpp/examples/symreg/symreg_example.cpp b/cpp/examples/symreg/symreg_example.cpp
index a7043f278c..7e8359efe9 100644
--- a/cpp/examples/symreg/symreg_example.cpp
+++ b/cpp/examples/symreg/symreg_example.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
-// Namspace alias
+// Namespace alias
 namespace cg = cuml::genetic;
 
 #ifndef CUDA_RT_CALL
diff --git a/cpp/include/cuml/cluster/hdbscan.hpp b/cpp/include/cuml/cluster/hdbscan.hpp
index caf4132784..d218b4040b 100644
--- a/cpp/include/cuml/cluster/hdbscan.hpp
+++ b/cpp/include/cuml/cluster/hdbscan.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -497,7 +497,7 @@ void compute_core_dists(const raft::handle_t& handle,
  * @brief Compute the map from final, normalize labels to the labels in the CondensedHierarchy
  *
  * @param[in] handle raft handle for resource reuse
- * @param[in] condensed_tree the Condensed Hiearchy object
+ * @param[in] condensed_tree the Condensed Hierarchy object
  * @param[in] n_leaves number of leaves in the input data
  * @param[in] cluster_selection_method cluster selection method
  * @param[out] inverse_label_map rmm::device_uvector of size 0. It will be resized during the
diff --git a/cpp/include/cuml/ensemble/randomforest.hpp b/cpp/include/cuml/ensemble/randomforest.hpp
index 7ce6b8fda2..bccc02bac2 100644
--- a/cpp/include/cuml/ensemble/randomforest.hpp
+++ b/cpp/include/cuml/ensemble/randomforest.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,7 +71,7 @@ struct RF_params {
    * round(max_samples * n_samples) number of samples with replacement. More on
    * bootstrapping:
    *     https://en.wikipedia.org/wiki/Bootstrap_aggregating
-   * If boostrapping is set to false, whole dataset is used to build each
+   * If bootstrapping is set to false, whole dataset is used to build each
    * tree.
    */
   bool bootstrap;
diff --git a/cpp/include/cuml/fil/multi_sum.cuh b/cpp/include/cuml/fil/multi_sum.cuh
index 180134a494..f69a6f8287 100644
--- a/cpp/include/cuml/fil/multi_sum.cuh
+++ b/cpp/include/cuml/fil/multi_sum.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@
  template parameters: data [T]ype, reduction [R]adix
  function parameters:
  @data[] holds one value per thread in shared memory
- @n_groups is the number of indendent reductions
+ @n_groups is the number of independent reductions
  @n_values is the size of each individual reduction,
    that is the number of values to be reduced to a single value
  function returns: one sum per thread, for @n_groups first threads.
diff --git a/cpp/include/cuml/genetic/genetic.h b/cpp/include/cuml/genetic/genetic.h
index 207502bc77..5a65eb8ad1 100644
--- a/cpp/include/cuml/genetic/genetic.h
+++ b/cpp/include/cuml/genetic/genetic.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -87,7 +87,7 @@ void symRegPredict(const raft::handle_t& handle,
  * @param handle      cuML handle
  * @param input       device pointer to feature matrix
  * @param n_rows      number of rows of the feature matrix
- * @param params      host struct containg training hyperparameters
+ * @param params      host struct containing training hyperparameters
  * @param best_prog   The best program obtained during training. Inferences are made using this
  * @param output      device pointer to output probability(in col major format)
  */
@@ -104,7 +104,7 @@ void symClfPredictProbs(const raft::handle_t& handle,
  * @param handle      cuML handle
  * @param input       device pointer to feature matrix
  * @param n_rows      number of rows of the feature matrix
- * @param params      host struct containg training hyperparameters
+ * @param params      host struct containing training hyperparameters
  * @param best_prog   Best program obtained after training
  * @param output      Device pointer to output predictions
  */
diff --git a/cpp/include/cuml/genetic/program.h b/cpp/include/cuml/genetic/program.h
index 42fcdb4eac..c4179868b1 100644
--- a/cpp/include/cuml/genetic/program.h
+++ b/cpp/include/cuml/genetic/program.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ struct program {
    * Now take the resulting 1D array and reverse it.
    *
    * @note The pointed memory buffer is NOT owned by this class and further it
-   *       is assumed to be a zero-copy (aka pinned memory) buffer, atleast in
+   *       is assumed to be a zero-copy (aka pinned memory) buffer, at least in
    *       this initial version
    */
 
diff --git a/cpp/include/cuml/manifold/tsne.h b/cpp/include/cuml/manifold/tsne.h
index d4c5cd1b02..c07b72f486 100644
--- a/cpp/include/cuml/manifold/tsne.h
+++ b/cpp/include/cuml/manifold/tsne.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -121,8 +121,8 @@ struct TSNEParams {
  * @param[out] Y                   The column-major final embedding in device memory
  * @param[in]  n                   Number of rows in data X.
  * @param[in]  p                   Number of columns in data X.
- * @param[in]  knn_indices         Array containing nearest neighors indices.
- * @param[in]  knn_dists           Array containing nearest neighors distances.
+ * @param[in]  knn_indices         Array containing nearest neighbors indices.
+ * @param[in]  knn_dists           Array containing nearest neighbors distances.
  * @param[in]  params              Parameters for TSNE model
  * @param[out] kl_div              (optional) KL divergence output
  *
@@ -155,8 +155,8 @@ void TSNE_fit(const raft::handle_t& handle,
  * @param[in]  nnz                 The number of non-zero entries in the CSR.
  * @param[in]  n                   Number of rows in data X.
  * @param[in]  p                   Number of columns in data X.
- * @param[in]  knn_indices         Array containing nearest neighors indices.
- * @param[in]  knn_dists           Array containing nearest neighors distances.
+ * @param[in]  knn_indices         Array containing nearest neighbors indices.
+ * @param[in]  knn_dists           Array containing nearest neighbors distances.
  * @param[in]  params              Parameters for TSNE model
  * @param[out] kl_div              (optional) KL divergence output
  *
diff --git a/cpp/include/cuml/manifold/umap.hpp b/cpp/include/cuml/manifold/umap.hpp
index a160464577..da119df636 100644
--- a/cpp/include/cuml/manifold/umap.hpp
+++ b/cpp/include/cuml/manifold/umap.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -143,9 +143,9 @@ void fit_sparse(const raft::handle_t& handle,
  * Dense transform
  *
  * @param[in] handle: raft::handle_t
- * @param[in] X: pointer to input array to be infered
- * @param[in] n: n_samples of input array to be infered
- * @param[in] d: n_features of input array to be infered
+ * @param[in] X: pointer to input array to be inferred
+ * @param[in] n: n_samples of input array to be inferred
+ * @param[in] d: n_features of input array to be inferred
  * @param[in] orig_X: pointer to original training array
  * @param[in] orig_n: number of rows in original training array
  * @param[in] embedding: pointer to embedding created during training
@@ -168,10 +168,10 @@ void transform(const raft::handle_t& handle,
  * Sparse transform
  *
  * @param[in] handle: raft::handle_t
- * @param[in] indptr: pointer to index pointer array of input array to be infered
- * @param[in] indices: pointer to index array of input array to be infered
- * @param[in] data: pointer to data array of input array to be infered
- * @param[in] nnz: number of stored values of input array to be infered
+ * @param[in] indptr: pointer to index pointer array of input array to be inferred
+ * @param[in] indices: pointer to index array of input array to be inferred
+ * @param[in] data: pointer to data array of input array to be inferred
+ * @param[in] nnz: number of stored values of input array to be inferred
  * @param[in] n: n_samples of input array
  * @param[in] d: n_features of input array
  * @param[in] orig_x_indptr: pointer to index pointer array of original training array
diff --git a/cpp/include/cuml/metrics/metrics.hpp b/cpp/include/cuml/metrics/metrics.hpp
index cd4c0bc58f..beef06c89a 100644
--- a/cpp/include/cuml/metrics/metrics.hpp
+++ b/cpp/include/cuml/metrics/metrics.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -177,7 +177,7 @@ double adjusted_rand_index(const raft::handle_t& handle,
  *
  * The KL divergence tells us how well the probability distribution Q
  * approximates the probability distribution P
- * It is often also used as a 'distance metric' between two probablity ditributions (not symmetric)
+ * It is often also used as a 'distance metric' between two probability distributions (not symmetric)
  *
  * @param handle: raft::handle_t
  * @param y: Array of probabilities corresponding to distribution P
@@ -192,7 +192,7 @@ double kl_divergence(const raft::handle_t& handle, const double* y, const double
  *
  * The KL divergence tells us how well the probability distribution Q
  * approximates the probability distribution P
- * It is often also used as a 'distance metric' between two probablity ditributions (not symmetric)
+ * It is often also used as a 'distance metric' between two probability distributions (not symmetric)
  *
  * @param handle: raft::handle_t
  * @param y: Array of probabilities corresponding to distribution P
diff --git a/cpp/include/cuml/neighbors/knn.hpp b/cpp/include/cuml/neighbors/knn.hpp
index 86d1bb2b30..095d201d3b 100644
--- a/cpp/include/cuml/neighbors/knn.hpp
+++ b/cpp/include/cuml/neighbors/knn.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -139,7 +139,7 @@ void knn_classify(raft::handle_t& handle,
 /**
  * @brief Flat C++ API function to perform a knn regression using
  * a given a vector of label arrays. This supports multilabel
- * regression by clasifying on multiple label arrays. Note that
+ * regression by classifying on multiple label arrays. Note that
  * each label is classified independently, as is done in scikit-learn.
  *
  * @param[in] handle RAFT handle
diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp
index 30ca1b3ab2..b6ccdb21c8 100644
--- a/cpp/include/cuml/tree/decisiontree.hpp
+++ b/cpp/include/cuml/tree/decisiontree.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,7 +56,7 @@ struct DecisionTreeParams {
    */
   CRITERION split_criterion;
   /**
-   * Minimum impurity decrease required for spliting a node. If the impurity decrease is below this
+   * Minimum impurity decrease required for splitting a node. If the impurity decrease is below this
    * value, node is leafed out. Default is 0.0
    */
   float min_impurity_decrease = 0.0f;
diff --git a/cpp/include/cuml/tsa/arima_common.h b/cpp/include/cuml/tsa/arima_common.h
index ff60bc0cd5..597d05aa57 100644
--- a/cpp/include/cuml/tsa/arima_common.h
+++ b/cpp/include/cuml/tsa/arima_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -315,7 +315,7 @@ struct ARIMAMemory {
 
     if (r <= 5) {
       // Note: temp mem for the direct Lyapunov solver grows very quickly!
-      // This solver is used iff the condition above is satisifed
+      // This solver is used iff the condition above is satisfied
       append_buffer<assign>(I_m_AxA_dense, r * r * r * r * batch_size);
       append_buffer<assign>(I_m_AxA_batches, batch_size);
       append_buffer<assign>(I_m_AxA_inv_dense, r * r * r * r * batch_size);
diff --git a/cpp/scripts/gitutils.py b/cpp/scripts/gitutils.py
index 56d8d5d6ac..ccb70b358e 100644
--- a/cpp/scripts/gitutils.py
+++ b/cpp/scripts/gitutils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -85,7 +85,7 @@ def repo_version_major_minor():
 def determine_merge_commit(current_branch="HEAD"):
     """
     When running outside of CI, this will estimate the target merge commit hash
-    of `current_branch` by finding a common ancester with the remote branch
+    of `current_branch` by finding a common ancestor with the remote branch
     'branch-{major}.{minor}' where {major} and {minor} are determined from the
     repo version.
 
@@ -211,8 +211,8 @@ def modifiedFiles(pathFilter=None):
     If inside a CI-env (ie. TARGET_BRANCH and COMMIT_HASH are defined, and
     current branch is "current-pr-branch"), then lists out all files modified
     between these 2 branches. Locally, TARGET_BRANCH will try to be determined
-    from the current repo version and finding a coresponding branch named
-    'branch-{major}.{minor}'. If this fails, this functino will list out all
+    from the current repo version and finding a corresponding branch named
+    'branch-{major}.{minor}'. If this fails, this function will list out all
     the uncommitted files in the current branch.
 
     Such utility function is helpful while putting checker scripts as part of
diff --git a/cpp/src/arima/batched_arima.cu b/cpp/src/arima/batched_arima.cu
index a989bc5812..43bc81221b 100644
--- a/cpp/src/arima/batched_arima.cu
+++ b/cpp/src/arima/batched_arima.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -469,7 +469,7 @@ void batched_loglike(raft::handle_t& handle,
   }
 
   if (host_loglike) {
-    /* Tranfer log-likelihood device -> host */
+    /* Transfer log-likelihood device -> host */
     raft::update_host(loglike, d_loglike, batch_size, stream);
   }
 }
@@ -931,7 +931,7 @@ void _start_params(raft::handle_t& handle,
                         order.k,
                         params.mu);
 
-  // Estimate a seasonal ARMA fit independantly
+  // Estimate a seasonal ARMA fit independently
   if (order.P + order.Q)
     _arma_least_squares(handle,
                         params.sar,
diff --git a/cpp/src/common/cumlHandle.hpp b/cpp/src/common/cumlHandle.hpp
index 3d7fa3b031..63aa1ce0dd 100644
--- a/cpp/src/common/cumlHandle.hpp
+++ b/cpp/src/common/cumlHandle.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class HandleMap {
    *
    * @return std::pair with handle and error code. If error code is not CUML_SUCCESS
    *                   the handle is INVALID_HANDLE. Error code CUML_INAVLID_HANDLE
-   *                   is returned if the provided `handle` is invald.
+   *                   is returned if the provided `handle` is invalid.
    */
   std::pair<raft::handle_t*, cumlError_t> lookupHandlePointer(cumlHandle_t handle) const;
 
@@ -51,7 +51,7 @@ class HandleMap {
    *
    * @return cumlError_t CUML_SUCCESS or CUML_INVALID_HANDLE.
    *                   Error code CUML_INAVLID_HANDLE is returned if the provided
-   *                   `handle` is invald.
+   *                   `handle` is invalid.
    */
   cumlError_t removeAndDestroyHandle(cumlHandle_t handle);
 
diff --git a/cpp/src/dbscan/vertexdeg/pack.h b/cpp/src/dbscan/vertexdeg/pack.h
index 732780ce30..1f3c551402 100644
--- a/cpp/src/dbscan/vertexdeg/pack.h
+++ b/cpp/src/dbscan/vertexdeg/pack.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ struct Pack {
   /**
    * @brief reset the output array before calling the actual kernel
    * @param stream cuda stream where to perform this operation
-   * @param vdlen lenght of the vertex degree array
+   * @param vdlen length of the vertex degree array
    */
   void resetArray(cudaStream_t stream, Index_ vdlen)
   {
diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index b7045fbcf6..fef69b12f7 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -296,7 +296,7 @@ struct Builder {
    * @brief assign workspace to the current state
    *
    * @param[in] d_wspace device buffer allocated by the user for the workspace.
-   *                     Its size should be atleast workspaceSize()
+   *                     Its size should be at least workspaceSize()
    * @param[in] h_wspace pinned host buffer needed to store the learned nodes
    */
   void assignWorkspace(char* d_wspace, char* h_wspace)
diff --git a/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh b/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh
index 6909355b9f..7daf5341b7 100644
--- a/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -139,7 +139,7 @@ struct CustomDifference {
  * (=blockIdx.x), threadIdx.x). Method used is a random, parallel, sampling with replacement of
  * excess of 'k' samples (hence the name) and then eliminating the dupicates by ordering them. The
  * excess number of samples (=`n_parallel_samples`) is calculated such that after ordering there is
- * atleast 'k' uniques.
+ * at least 'k' uniques.
  */
 template <typename IdxT, int MAX_SAMPLES_PER_THREAD, int BLOCK_THREADS = 128>
 __global__ void excess_sample_with_replacement_kernel(
@@ -185,7 +185,7 @@ __global__ void excess_sample_with_replacement_kernel(
       // mask of the previous iteration, if exists, is re-used here
       // so previously generated unique random numbers are used.
       // newly generated random numbers may or may not duplicate the previously generated ones
-      // but this ensures some forward progress in order to generate atleast 'k' unique random
+      // but this ensures some forward progress in order to generate at least 'k' unique random
       // samples.
       if (mask[thread_local_sample_idx] == 0 and cta_sample_idx < n_parallel_samples)
         raft::random::custom_next(
@@ -194,7 +194,7 @@ __global__ void excess_sample_with_replacement_kernel(
                0)  // indices that exceed `n_parallel_samples` will not generate
         items[thread_local_sample_idx] = n - 1;
       else
-        continue;  // this case is for samples whose mask == 1 (saving previous iteraion's random
+        continue;  // this case is for samples whose mask == 1 (saving previous iteration's random
                    // number generated)
     }
 
diff --git a/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels_impl.cuh b/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels_impl.cuh
index a42e53c8d2..6e6e526c78 100644
--- a/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels_impl.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ static constexpr int TPB_DEFAULT = 128;
  * @return the position of the left child node in the nodes list. However, this
  *         value is valid only for threadIdx.x == 0.
  * @note this should be called by only one block from all participating blocks
- *       'smem' should be atleast of size `sizeof(IdxT) * TPB * 2`
+ *       'smem' should be at least of size `sizeof(IdxT) * TPB * 2`
  */
 template <typename DataT, typename LabelT, typename IdxT, int TPB>
 DI void partitionSamples(const Dataset<DataT, LabelT, IdxT>& dataset,
diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index cd425c6760..eac66f1e16 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -155,7 +155,7 @@ tl::Tree<T, T> build_treelite_tree(const DT::TreeMetaDataNode<T, L>& rf_tree,
                                    unsigned int num_class)
 {
   // First index refers to the cuml node id
-  // Seccond refers to the tl node id
+  // Second refers to the tl node id
   using kv = std::pair<std::size_t, std::size_t>;
   std::vector<kv> cur_level_queue;
   std::vector<kv> next_level_queue;
diff --git a/cpp/src/explainer/kernel_shap.cu b/cpp/src/explainer/kernel_shap.cu
index d9ad002e4e..4bc968f2e7 100644
--- a/cpp/src/explainer/kernel_shap.cu
+++ b/cpp/src/explainer/kernel_shap.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -125,7 +125,7 @@ __global__ void sampled_rows_kernel(IdxT* nsamples,
     int rand_idx = (int)(curand_uniform(&state) * ncols);
 
     // Since X is initialized to 0, we quickly check for collisions (if k_blk << ncols the
-    // likelyhood of collisions is low)
+    // likelihood of collisions is low)
     while (atomicExch(&(X[2 * blockIdx.x * ncols + rand_idx]), 1) == 1) {
       rand_idx = (int)(curand_uniform(&state) * ncols);
     }
diff --git a/cpp/src/explainer/permutation_shap.cu b/cpp/src/explainer/permutation_shap.cu
index 8cb69b06a0..46d78d99b2 100644
--- a/cpp/src/explainer/permutation_shap.cu
+++ b/cpp/src/explainer/permutation_shap.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,7 +53,7 @@ __global__ void _fused_tile_scatter_pe(DataT* dataset,
       start = ((tid % ncols) + 1) * nrows_background;
 
       // each entry of the dataset will be input the same number of times
-      // to the matrix, controled by the sc_size parameter
+      // to the matrix, controlled by the sc_size parameter
       end = start + sc_size * nrows_background;
 
       // now we just need to check if this thread is between start and end
diff --git a/cpp/src/explainer/tree_shap.cu b/cpp/src/explainer/tree_shap.cu
index df49232633..0dce3675af 100644
--- a/cpp/src/explainer/tree_shap.cu
+++ b/cpp/src/explainer/tree_shap.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ class BitField {
     return oss.str();
   }
 
-  static_assert(!std::is_signed<T>::value, "Must use unsiged type as underlying storage.");
+  static_assert(!std::is_signed<T>::value, "Must use unsigned type as underlying storage.");
 };
 
 using CatBitFieldStorageT = std::uint32_t;
@@ -629,7 +629,7 @@ void visit_path_segments_in_model(const tl::ModelImpl<ThresholdType, LeafType>&
 
 // Traverse a path from the root node to a leaf node and return the list of the path segments
 // Note: the path segments will have missing values in path_idx, group_id and v (leaf value).
-//       The callser is responsible for filling in these fields.
+//       The caller is responsible for filling in these fields.
 template <typename ThresholdType, typename LeafType>
 std::vector<gpu_treeshap::PathElement<SplitCondition<ThresholdType>>> traverse_towards_leaf_node(
   const tl::Tree<ThresholdType, LeafType>& tree,
diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu
index 04acd71003..73da47cfc1 100644
--- a/cpp/src/fil/infer.cu
+++ b/cpp/src/fil/infer.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -571,7 +571,7 @@ template <int NITEMS, typename real_t>
 struct tree_aggregator_t<NITEMS, real_t, VECTOR_LEAF> {
   // per_class_margin is a row-major matrix
   // of size num_threads_per_class * num_classes
-  // used to acccumulate class values
+  // used to accumulate class values
   vec<NITEMS, real_t>* per_class_margin;
   vec<NITEMS, int>* vector_leaf_indices;
   int* thread_num_rows;
diff --git a/cpp/src/fil/internal.cuh b/cpp/src/fil/internal.cuh
index eb0566e5a3..4f18d0c072 100644
--- a/cpp/src/fil/internal.cuh
+++ b/cpp/src/fil/internal.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -423,7 +423,7 @@ struct categorical_sets {
   }
 };
 
-// lets any tree determine a child index for a node in a generic fasion
+// lets any tree determine a child index for a node in a generic fashion
 // used in fil_test.cu fot its child_index() in CPU predicting
 struct tree_base {
   categorical_sets cat_sets;
@@ -554,7 +554,7 @@ struct cat_sets_device_owner {
  *    must be sparse_node16, sparse_node8 or dense_node
  *  @param h cuML handle used by this function
  *  @param pf pointer to where to store the newly created forest
- *  @param trees for sparse forests, indices of tree roots in the nodes arrray, of length
+ *  @param trees for sparse forests, indices of tree roots in the nodes array, of length
  params->ntrees; ignored for dense forests
  *  @param nodes nodes for the forest, of length params->num_nodes for sparse
       or (2**(params->depth + 1) - 1) * params->ntrees for dense forests
diff --git a/cpp/src/fil/treelite_import.cu b/cpp/src/fil/treelite_import.cu
index f01b9cf2f7..533a3be4ba 100644
--- a/cpp/src/fil/treelite_import.cu
+++ b/cpp/src/fil/treelite_import.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -229,7 +229,7 @@ void adjust_threshold(real_t* pthreshold, bool* swap_child_nodes, tl::Operator c
 {
   // in treelite (take left node if val [op] threshold),
   // the meaning of the condition is reversed compared to FIL;
-  // thus, "<" in treelite corresonds to comparison ">=" used by FIL
+  // thus, "<" in treelite corresponds to comparison ">=" used by FIL
   // https://github.com/dmlc/treelite/blob/master/include/treelite/tree.h#L243
   if (isnan(*pthreshold)) {
     *swap_child_nodes = !*swap_child_nodes;
@@ -482,7 +482,7 @@ void tl2fil_common(forest_params_t* params,
                    const tl::ModelImpl<T, L>& model,
                    const treelite_params_t* tl_params)
 {
-  // fill in forest-indendent params
+  // fill in forest-independent params
   params->algo      = tl_params->algo;
   params->threshold = tl_params->threshold;
 
diff --git a/cpp/src/genetic/genetic.cu b/cpp/src/genetic/genetic.cu
index a276724b3a..3cc6680ce5 100644
--- a/cpp/src/genetic/genetic.cu
+++ b/cpp/src/genetic/genetic.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -370,7 +370,7 @@ void symFit(const raft::handle_t& handle,
 {
   cudaStream_t stream = handle.get_stream();
 
-  // Update arity map in params - Need to do this only here, as all operations will call Fit atleast
+  // Update arity map in params - Need to do this only here, as all operations will call Fit at least
   // once
   for (auto f : params.function_set) {
     int ar = 1;
diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh
index 2f669ba59f..cd9820c61b 100644
--- a/cpp/src/glm/qn/glm_base.cuh
+++ b/cpp/src/glm/qn/glm_base.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -141,7 +141,7 @@ struct GLMBase : GLMDims {
   {
     // Base impl assumes simple case C = 1
     // TODO would be nice to have a kernel that fuses these two steps
-    // This would be easy, if mapThenSumReduce allowed outputing the result of
+    // This would be easy, if mapThenSumReduce allowed outputting the result of
     // map (supporting inplace)
     auto lz_copy  = static_cast<Loss*>(this)->lz;
     auto dlz_copy = static_cast<Loss*>(this)->dlz;
diff --git a/cpp/src/glm/qn/qn.cuh b/cpp/src/glm/qn/qn.cuh
index 6fa67b653f..266d55ece5 100644
--- a/cpp/src/glm/qn/qn.cuh
+++ b/cpp/src/glm/qn/qn.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ int qn_fit(const raft::handle_t& handle,
   LBFGSParam<T> opt_param(pams);
   SimpleVec<T> w0(w0_data, loss.n_param);
 
-  // Scale the regularization strenght with the number of samples.
+  // Scale the regularization strength with the number of samples.
   T l1 = pams.penalty_l1;
   T l2 = pams.penalty_l2;
   if (pams.penalty_normalized) {
diff --git a/cpp/src/glm/qn/qn_solvers.cuh b/cpp/src/glm/qn/qn_solvers.cuh
index d52bdffeeb..54474e7a00 100644
--- a/cpp/src/glm/qn/qn_solvers.cuh
+++ b/cpp/src/glm/qn/qn_solvers.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -50,7 +50,7 @@
 namespace ML {
 namespace GLM {
 
-// TODO better way to deal with alignment? Smaller aligne possible?
+// TODO better way to deal with alignment? Smaller align possible?
 constexpr size_t qn_align = 256;
 
 template <typename T>
@@ -126,7 +126,7 @@ inline bool update_and_check(const char* solver,
     stop    = true;
   }
 
-  // if lineseach wasn't successful, undo the update.
+  // if linesearch wasn't successful, undo the update.
   if (!isLsSuccess || !isLsValid) {
     fx = fxp;
     x.copy_async(xp, stream);
@@ -203,7 +203,7 @@ inline OPT_RETCODE min_lbfgs(const LBFGSParam<T>& param,
   OPT_RETCODE retcode;
   LINE_SEARCH_RETCODE lsret;
   for (; *k <= param.max_iterations; (*k)++) {
-    // Save the curent x and gradient
+    // Save the current x and gradient
     xp.copy_async(x, stream);
     gradp.copy_async(grad, stream);
     fxp = fx;
@@ -354,7 +354,7 @@ inline OPT_RETCODE min_owlqn(const LBFGSParam<T>& param,
   OPT_RETCODE retcode;
   LINE_SEARCH_RETCODE lsret;
   for ((*k) = 1; (*k) <= param.max_iterations; (*k)++) {
-    // Save the curent x and gradient
+    // Save the current x and gradient
     xp.copy_async(x, stream);
     gradp.copy_async(grad, stream);
     fxp = fx;
@@ -437,9 +437,9 @@ inline int qn_minimize(const raft::handle_t& handle,
   } else {
     // There might not be a better way to deal with dispatching
     // for the l1 case:
-    // The algorithm explicitely expects a differentiable
+    // The algorithm explicitly expects a differentiable
     // function f(x). It takes care of adding and
-    // handling the term l1norm(x) * l1_pen explicitely, i.e.
+    // handling the term l1norm(x) * l1_pen explicitly, i.e.
     // it needs to evaluate f(x) and its gradient separately
 
     rmm::device_uvector<T> tmp(owlqn_workspace_size(opt_param, x.len), stream);
diff --git a/cpp/src/hdbscan/detail/extract.cuh b/cpp/src/hdbscan/detail/extract.cuh
index fcd14d8a4a..d33e22107d 100644
--- a/cpp/src/hdbscan/detail/extract.cuh
+++ b/cpp/src/hdbscan/detail/extract.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -251,7 +251,7 @@ void _compute_inverse_label_map(const raft::handle_t& handle,
  * @param[out] inverse_label_map array mapping final label ids to condensed label ids, used for
  * prediction APIs (size n_clusters)
  * @param[in] allow_single_cluster allows a single cluster to be returned (rather than just noise)
- * @param[in] max_cluster_size maximium number of points that can be considered in a cluster before
+ * @param[in] max_cluster_size maximum number of points that can be considered in a cluster before
  * it is split into multiple sub-clusters.
  * @param[in] cluster_selection_epsilon a distance threshold. clusters below this value will be
  * merged.
diff --git a/cpp/src/hdbscan/detail/select.cuh b/cpp/src/hdbscan/detail/select.cuh
index 9253fb5961..e8bf64852c 100644
--- a/cpp/src/hdbscan/detail/select.cuh
+++ b/cpp/src/hdbscan/detail/select.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -302,7 +302,7 @@ void leaf(const raft::handle_t& handle,
  * @param[in] n_clusters number of clusters in cluster tree
  * @param[in] cluster_selection_epsilon distance threshold
  * @param[in] allow_single_cluster allows a single cluster with noisy datasets
- * @param[in] n_selected_clusters numnber of cluster selections in is_cluster
+ * @param[in] n_selected_clusters number of cluster selections in is_cluster
  */
 template <typename value_idx, typename value_t, int tpb = 256>
 void cluster_epsilon_search(const raft::handle_t& handle,
@@ -421,7 +421,7 @@ void select_clusters(const raft::handle_t& handle,
   auto epsilon_search = true;
 
   if (cluster_selection_method == Common::CLUSTER_SELECTION_METHOD::LEAF) {
-    // TODO: reenable to match reference implementation
+    // TODO: re-enable to match reference implementation
     // It's a confirmed bug https://github.com/scikit-learn-contrib/hdbscan/issues/476
 
     // if no cluster leaves were found, declare root as cluster
diff --git a/cpp/src/hdbscan/detail/stabilities.cuh b/cpp/src/hdbscan/detail/stabilities.cuh
index 7be40bfbcb..734814725d 100644
--- a/cpp/src/hdbscan/detail/stabilities.cuh
+++ b/cpp/src/hdbscan/detail/stabilities.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -108,7 +108,7 @@ void compute_stabilities(const raft::handle_t& handle,
     stream,
     cub::DeviceSegmentedReduce::Min<const value_t*, value_t*, const value_idx*, const value_idx*>);
   // finally, we find minimum between initialized births where parent=child
-  // and births of parents for their childrens
+  // and births of parents for their children
   auto births_zip =
     thrust::make_zip_iterator(thrust::make_tuple(births.data(), births_parent_min.data()));
   auto min_op = [] __device__(const thrust::tuple<value_t, value_t>& birth_pair) {
diff --git a/cpp/src/holtwinters/internal/hw_decompose.cuh b/cpp/src/holtwinters/internal/hw_decompose.cuh
index 2b289c0339..8c1efdd7b7 100644
--- a/cpp/src/holtwinters/internal/hw_decompose.cuh
+++ b/cpp/src/holtwinters/internal/hw_decompose.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -208,7 +208,7 @@ void batched_ls(const raft::handle_t& handle,
                                                                  dev_info_d.data(),
                                                                  stream));
 
-  // Single thread kenrel to inverse R
+  // Single thread kernel to inverse R
   RinvKernel<Dtype><<<1, 1, 0, stream>>>(A_d.data(), Rinv_d.data(), trend_len);
 
   // R1QT = inv(R)*transpose(Q)
diff --git a/cpp/src/kmeans/kmeans_mg_impl.cuh b/cpp/src/kmeans/kmeans_mg_impl.cuh
index cb7ead8bf7..f1a0470652 100644
--- a/cpp/src/kmeans/kmeans_mg_impl.cuh
+++ b/cpp/src/kmeans/kmeans_mg_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -702,7 +702,7 @@ void fit(const raft::handle_t& handle,
              "An error occurred in the distributed operation. This can result "
              "from a failed rank");
       ASSERT(curClusteringCost != (DataT)0.0,
-             "Too few points and centriods being found is getting 0 cost from "
+             "Too few points and centroids being found is getting 0 cost from "
              "centers\n");
 
       if (n_iter[0] > 0) {
@@ -755,7 +755,7 @@ void fit(const raft::handle_t& handle,
   // underlying expandable storage that holds centroids data
   auto centroidsRawData = raft::make_device_matrix<DataT, IndexT>(handle, n_clusters, n_features);
 
-  // Device-accessible allocation of expandable storage used as temorary buffers
+  // Device-accessible allocation of expandable storage used as temporary buffers
   rmm::device_uvector<char> workspace(0, stream);
 
   // check if weights sum up to n_samples
@@ -774,7 +774,7 @@ void fit(const raft::handle_t& handle,
   } else if (params.init == raft::cluster::kmeans::KMeansParams::InitMethod::Array) {
     CUML_LOG_KMEANS(handle,
                     "KMeans.fit: initialize cluster centers from the ndarray array input "
-                    "passed to init arguement.\n");
+                    "passed to init argument.\n");
 
     ASSERT(centroids != nullptr,
            "centroids array is null (require a valid array of centroids for "
diff --git a/cpp/src/knn/knn_opg_common.cuh b/cpp/src/knn/knn_opg_common.cuh
index 1dc97e8269..16442a6637 100644
--- a/cpp/src/knn/knn_opg_common.cuh
+++ b/cpp/src/knn/knn_opg_common.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -229,7 +229,7 @@ struct opg_knn_work {
 
 /*!
  Main function, computes distributed KNN operation
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] handle RAFT handle
  */
 template <typename in_t, typename ind_t, typename dist_t, typename out_t>
@@ -368,8 +368,8 @@ void opg_knn(opg_knn_param<in_t, ind_t, dist_t, out_t>& params, raft::handle_t&
 };
 
 /*!
- Broadcast query batch accross all the workers
- @param[in] params Parameters for distrbuted KNN operation
+ Broadcast query batch across all the workers
+ @param[in] params Parameters for distributed KNN operation
  @param[in] handle RAFT handle
  @param[in] part_rank Rank of currently processed query batch
  @param[in] broadcast Pointer to broadcast
@@ -413,7 +413,7 @@ void broadcast_query(opg_knn_work<in_t, ind_t, dist_t, out_t>& work,
 
 /*!
  Perform a local KNN search for a given query batch
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[in] query Pointer to query
@@ -495,7 +495,7 @@ __global__ void copy_label_outputs_from_index_parts_kernel(out_t* out,
 
 /*!
  Get the right labels for indices obtained after a KNN merge
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[in] batch_size Batch size
@@ -546,7 +546,7 @@ void copy_label_outputs_from_index_parts(opg_knn_param<in_t, ind_t, dist_t, out_
  Exchange results of local KNN search and operation for a given query batch
  All non-root index ranks send the results for the current
  query batch to the root rank for the batch.
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[in] part_rank Rank of currently processed query batch
@@ -682,7 +682,7 @@ void exchange_results(opg_knn_param<in_t, ind_t, dist_t, out_t>& params,
 
 /*!
  Reduce all local results to a global result for a given query batch
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[in] part_idx Partition index of query batch
@@ -824,7 +824,7 @@ __global__ void merge_labels_kernel(out_t* outputs,
 
 /*!
  Get the right labels for indices obtained after local KNN searches
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[out] output KNN outputs output array
@@ -884,7 +884,7 @@ void merge_labels(opg_knn_param_t& params,
 
 /*!
  Perform final classification, regression or class-proba operation for a given query batch
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[out] outputs KNN outputs output array
@@ -917,7 +917,7 @@ void perform_local_operation(opg_knn_param<in_t, ind_t, dist_t, out_t>& params,
 
 /*!
  Perform final classification, regression or class-proba operation for a given query batch
- @param[in] params Parameters for distrbuted KNN operation
+ @param[in] params Parameters for distributed KNN operation
  @param[in] work Current work for distributed KNN
  @param[in] handle RAFT handle
  @param[out] outputs KNN outputs output array
diff --git a/cpp/src/solver/lars_impl.cuh b/cpp/src/solver/lars_impl.cuh
index b1f87e761d..0d282a8009 100644
--- a/cpp/src/solver/lars_impl.cuh
+++ b/cpp/src/solver/lars_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -179,14 +179,14 @@ void swapFeatures(cublasHandle_t handle,
  * We have an active set with n_active elements, and an inactive set with
  * n_valid_cols - n_active elements. The matrix X [n_samples, n_features] is
  * partitioned in a way that the first n_active columns store the active set.
- * Similarily the vectors correlation and indices are partitioned in a way
+ * Similarly the vectors correlation and indices are partitioned in a way
  * that the first n_active elements belong to the active set:
  * - active set:  X[:,:n_active], correlation[:n_active], indices[:n_active]
  * - inactive set: X[:,n_active:], correlation[n_active:], indices[n_active:].
  *
  * This function moves the feature column X[:,idx] into the active set by
  * replacing the first inactive element with idx. The indices and correlation
- * vectors are modified accordinly. The sign array is updated with the sign
+ * vectors are modified accordingly. The sign array is updated with the sign
  * of correlation[n_active].
  *
  * @param handle cuBLAS handle
@@ -196,7 +196,7 @@ void swapFeatures(cublasHandle_t handle,
  * @param X device array of feature vectors in column major format, size
  *     [n_cols * ld_X]
  * @param n_rows number of training vectors
- * @param n_cols number of valid features colums (ignoring those features which
+ * @param n_cols number of valid features columns (ignoring those features which
  *    are detected to be collinear with the active set)
  * @param ld_X leading dimension of X
  * @param cor device array of correlations, size [n_cols]
@@ -573,7 +573,7 @@ LarsFitStatus calcEquiangularVec(const raft::handle_t& handle,
  *    size [n_active * ld_G]
  * @param ld_G leading dimension of G (ld_G >= n_cols)
  * @param X device array of training vectors in column major format,
- *     size [n_rows * n_cols]. Only used if the gram matrix is not avaiable.
+ *     size [n_rows * n_cols]. Only used if the gram matrix is not available.
  * @param ld_X leading dimension of X (ld_X >= n_rows)
  * @param u device pointer to equiangular vector size [n_rows]. Only used if the
  *     Gram matrix G is not available.
@@ -771,7 +771,7 @@ void larsInit(const raft::handle_t& handle,
  * @param ws device pointer to the ws vector, size [n_cols]
  * @param cor device pointer to the correlations, size [n_cols]
  * @param a_vec device pointer to a = X.T[:,n_A:] * u, size [n_cols]
- * @param beta pointer to regression coefficents, size [max_iter]
+ * @param beta pointer to regression coefficients, size [max_iter]
  * @param coef_path device pointer to all the coefficients along the
  *    regularization path, size [(max_iter + 1) * max_iter]
  * @param stream CUDA stream
diff --git a/cpp/src/solver/sgd.cuh b/cpp/src/solver/sgd.cuh
index 7128c696cc..ccadaae47f 100644
--- a/cpp/src/solver/sgd.cuh
+++ b/cpp/src/solver/sgd.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ using namespace MLCommon;
  * @param lr_type
  *        type of the learning rate function (i.e. OPTIMAL, CONSTANT, INVSCALING, ADAPTIVE)
  * @param eta0
- *        learning rate for contant lr_type. It's used to calculate learning rate function for other
+ *        learning rate for constant lr_type. It's used to calculate learning rate function for other
  * types of lr_type
  * @param power_t
  *        power value in the INVSCALING lr_type
diff --git a/cpp/src/svm/kernelcache.cuh b/cpp/src/svm/kernelcache.cuh
index d02a580515..0ed1a0ff9d 100644
--- a/cpp/src/svm/kernelcache.cuh
+++ b/cpp/src/svm/kernelcache.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -206,7 +206,7 @@ class KernelCache {
       int n_cached;
       cache.GetCacheIdxPartitioned(
         unique_idx.data(), n_unique, ws_cache_idx.data(), &n_cached, stream);
-      // collect allready cached values
+      // collect already cached values
       cache.GetVecs(ws_cache_idx.data(), n_cached, tile.data(), stream);
       int non_cached = n_unique - n_cached;
       if (non_cached > 0) {
diff --git a/cpp/src/svm/linear.cu b/cpp/src/svm/linear.cu
index a6bb7d2fab..57515a06bf 100644
--- a/cpp/src/svm/linear.cu
+++ b/cpp/src/svm/linear.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -144,7 +144,7 @@ __global__ void predictProba(T* out, const T* z, const int nRows, const int nCla
   const T* rowIn = z + i * (Binary ? 1 : nClasses);
   T* rowOut      = out + i * nClasses;
 
-  // the largest 'z' in the row (to substract it from z for numeric stability).
+  // the largest 'z' in the row (to subtract it from z for numeric stability).
   T t      = std::numeric_limits<T>::lowest();
   T maxVal = t;
   int j    = threadIdx.x;
diff --git a/cpp/src/svm/results.cuh b/cpp/src/svm/results.cuh
index e8927e3280..c03ccef9f5 100644
--- a/cpp/src/svm/results.cuh
+++ b/cpp/src/svm/results.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -179,7 +179,7 @@ class Results {
   /** Return non zero dual coefficients.
    *
    * @param [in] val_tmp device pointer with dual coefficients
-   * @param [out] dual_coefs device pointer of non-zero dual coefficiens,
+   * @param [out] dual_coefs device pointer of non-zero dual coefficients,
    *   unallocated on entry, on exit size [n_support]
    * @param [out] n_support number of support vectors
    */
diff --git a/cpp/src/svm/smoblocksolve.cuh b/cpp/src/svm/smoblocksolve.cuh
index 0ae00e8840..9c681cd914 100644
--- a/cpp/src/svm/smoblocksolve.cuh
+++ b/cpp/src/svm/smoblocksolve.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -123,7 +123,7 @@ namespace SVM {
  * @tparam math_t floating point data type
  * @tparam WSIZE working set size (max 1024)
  * @param [in] y_array target labels size [n_train]
- * @param [in] n_train number of trainig vectors
+ * @param [in] n_train number of training vectors
  * @param [inout] alpha dual coefficients, size [n_train]
  * @param [in] n_ws number of elements in the working set
  * @param [out] delta_alpha change in the dual coeff of vectors in the working
@@ -136,7 +136,7 @@ namespace SVM {
  *   size [n_train]
  * @param [in] eps tolerance, iterations will stop if the duality gap is smaller
  *  than this value (or if the gap is smaller than 0.1 times the initial gap)
- * @param [out] return_buff, two valies are returned: duality gap and the number
+ * @param [out] return_buff, two values are returned: duality gap and the number
  *   of iterations
  * @param [in] max_iter maximum number of iterations
  * @param [in] svmType type of the SVM problem to solve
@@ -187,7 +187,7 @@ __global__ __launch_bounds__(WSIZE) void SmoBlockSolve(math_t* y_array,
   int64_t n_rows = (svmType == EPSILON_SVR) ? n_train / 2 : n_train;
 
   // Consult KernelCache::GetTile for the layout of the kernel matrix
-  // kernel matrix row and colums indices for workspace vector ws_idx[tid]
+  // kernel matrix row and columns indices for workspace vector ws_idx[tid]
   // k_row_idx \in [0..n_rows-1]
   int64_t k_row_idx = (svmType == EPSILON_SVR && idx >= n_rows) ? idx - n_rows : idx;
   // k_col_idx \in [0..n_unique-1]
@@ -262,7 +262,7 @@ __global__ __launch_bounds__(WSIZE) void SmoBlockSolve(math_t* y_array,
     // Knowing that q > 0 (since f_l > f_u and \eta_ul > 0), and 0 <= a_u <= C,
     // the constraints are simplified as
     // y == 1:  q <= C-a_u, and  y == -1: q <= a_u
-    // Similarily we can say for a'_l:
+    // Similarly we can say for a'_l:
     // y == 1:  q <= a_l, and y ==- 1: q <= C - a_l
     // We clip q accordingly before we do the update of a.
     if (threadIdx.x == u) tmp_u = y > 0 ? C - a : a;
diff --git a/cpp/src/svm/smosolver.cuh b/cpp/src/svm/smosolver.cuh
index c56f1254e3..adf5cd7dd6 100644
--- a/cpp/src/svm/smosolver.cuh
+++ b/cpp/src/svm/smosolver.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -231,7 +231,7 @@ class SmoSolver {
                                                      1,
                                                      stream));
     if (svmType == EPSILON_SVR) {
-      // SVR has doubled the number of trainig vectors and we need to update
+      // SVR has doubled the number of training vectors and we need to update
       // alpha for both batches individually
       // #TODO: Call from public API when ready
       RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemv(handle.get_cublas_handle(),
diff --git a/cpp/src/tsa/auto_arima.cuh b/cpp/src/tsa/auto_arima.cuh
index 7bfb56c88b..53ce9478f8 100644
--- a/cpp/src/tsa/auto_arima.cuh
+++ b/cpp/src/tsa/auto_arima.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -209,7 +209,7 @@ inline void divide_by_min_build_index(const DataT* d_matrix,
   auto counting = thrust::make_counting_iterator(0);
 
   // In the first pass, compute d_batch and initialize the matrix that will
-  // be used to compute d_size and d_index (1 for the first occurence of the
+  // be used to compute d_size and d_index (1 for the first occurrence of the
   // minimum of each row, else 0)
   rmm::device_uvector<int> cumul(batch_size * n_sub, stream);
   int* d_cumul = cumul.data();
diff --git a/cpp/src/tsne/barnes_hut_kernels.cuh b/cpp/src/tsne/barnes_hut_kernels.cuh
index 3742a36892..d8f4b9992f 100644
--- a/cpp/src/tsne/barnes_hut_kernels.cuh
+++ b/cpp/src/tsne/barnes_hut_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ namespace TSNE {
 namespace BH {
 
 /**
- * Intializes the states of objects. This speeds the overall kernel up.
+ * Initializes the states of objects. This speeds the overall kernel up.
  */
 template <typename value_idx, typename value_t>
 __global__ void InitializationKernel(/*int *restrict errd, */
@@ -85,7 +85,7 @@ __global__ void Find_Normalization(value_t* restrict Z_norm, const value_idx N)
  * Figures the bounding boxes for every point in the embedding.
  */
 template <typename value_idx, typename value_t>
-__global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restrict startd,
+__global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restrict started,
                                                               value_idx* restrict childd,
                                                               value_t* restrict massd,
                                                               value_t* restrict posxd,
@@ -163,7 +163,7 @@ __global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restric
     atomicExch(radiusd, fmaxf(maxx - minx, maxy - miny) * 0.5f + 1e-5f);
 
     massd[NNODES]  = -1.0f;
-    startd[NNODES] = 0;
+    started[NNODES] = 0;
     posxd[NNODES]  = (minx + maxx) * 0.5f;
     posyd[NNODES]  = (miny + maxy) * 0.5f;
 
@@ -333,7 +333,7 @@ __global__ __launch_bounds__(THREADS2) void TreeBuildingKernel(/* int *restrict
  * Clean more state vectors.
  */
 template <typename value_idx, typename value_t>
-__global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict startd,
+__global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict started,
                                                         value_t* restrict massd,
                                                         const value_idx NNODES,
                                                         const value_idx* restrict bottomd)
@@ -347,7 +347,7 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict star
 #pragma unroll
   for (; k < NNODES; k += inc) {
     massd[k]  = -1.0f;
-    startd[k] = -1;
+    started[k] = -1;
   }
 }
 
@@ -493,7 +493,7 @@ __global__ __launch_bounds__(THREADS3,
 template <typename value_idx>
 __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restrict sortd,
                                                                 const value_idx* restrict countd,
-                                                                volatile value_idx* restrict startd,
+                                                                volatile value_idx* restrict started,
                                                                 value_idx* restrict childd,
                                                                 const value_idx NNODES,
                                                                 const value_idx N,
@@ -511,7 +511,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restr
     if (++limiter > NNODES) break;
 
     // Not a child so skip
-    if ((start = startd[k]) < 0) continue;
+    if ((start = started[k]) < 0) continue;
 
     int j = 0;
     for (int i = 0; i < 4; i++) {
@@ -524,7 +524,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restr
         }
         if (ch >= N) {
           // child is a cell
-          startd[ch] = start;
+          started[ch] = start;
           start += countd[ch];  // add #bodies in subtree
         } else if (start <= NNODES and start >= 0) {
           // child is a body
diff --git a/cpp/src/tsne/barnes_hut_tsne.cuh b/cpp/src/tsne/barnes_hut_tsne.cuh
index c274aab4e7..782cd627fb 100644
--- a/cpp/src/tsne/barnes_hut_tsne.cuh
+++ b/cpp/src/tsne/barnes_hut_tsne.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,7 +56,7 @@ value_t Barnes_Hut(value_t* VAL,
 
   value_t kl_div = 0;
 
-  // Get device properites
+  // Get device properties
   //---------------------------------------------------
   const int blocks = raft::getMultiProcessorCount();
 
diff --git a/cpp/src/tsne/cannylab/bh.cu b/cpp/src/tsne/cannylab/bh.cu
index d280ae6f76..4fd2625fc6 100644
--- a/cpp/src/tsne/cannylab/bh.cu
+++ b/cpp/src/tsne/cannylab/bh.cu
@@ -984,4 +984,4 @@ int main(int argc, char* argv[])
   cudaFree(minl);
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/cpp/src/tsne/exact_kernels.cuh b/cpp/src/tsne/exact_kernels.cuh
index 8a10d39ce7..e7a92ac2ec 100644
--- a/cpp/src/tsne/exact_kernels.cuh
+++ b/cpp/src/tsne/exact_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -90,7 +90,7 @@ __global__ void sigmas_kernel(const value_t* restrict distances,
 }
 
 /****************************************/
-/* Finds the best Gaussian bandwith for
+/* Finds the best Gaussian bandwidth for
     each row in the dataset             */
 template <typename value_idx, typename value_t>
 __global__ void sigmas_kernel_2d(const value_t* restrict distances,
@@ -400,8 +400,8 @@ value_t repulsive_forces(const value_t* restrict Y,
 }
 
 /****************************************/
-/* Applys or integrates all forces. Uses
-    more gains and contrains the output
+/* Applies or integrates all forces. Uses
+    more gains and constrains the output
     for output stability                */
 template <typename value_idx, typename value_t>
 __global__ void apply_kernel(value_t* restrict Y,
diff --git a/cpp/src/tsne/fft_tsne.cuh b/cpp/src/tsne/fft_tsne.cuh
index 2ca7506b01..bde81a7c8c 100644
--- a/cpp/src/tsne/fft_tsne.cuh
+++ b/cpp/src/tsne/fft_tsne.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -169,7 +169,7 @@ value_t FFT_TSNE(value_t* VAL,
   auto stream        = handle.get_stream();
   auto thrust_policy = handle.get_thrust_policy();
 
-  // Get device properites
+  // Get device properties
   //---------------------------------------------------
   const int mp_count          = raft::getMultiProcessorCount();
   const int dev_major_version = MLCommon::getDeviceCapability().first;
@@ -580,7 +580,7 @@ value_t FFT_TSNE(value_t* VAL,
                         attractive_forces_device.size();
 
     if (grad_norm <= params.min_grad_norm) {
-      CUML_LOG_DEBUG("Breaking early as `min_grad_norm` was satisifed, after %d iterations", iter);
+      CUML_LOG_DEBUG("Breaking early as `min_grad_norm` was satisfied, after %d iterations", iter);
       break;
     }
   }
diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.cuh b/cpp/src/umap/fuzzy_simpl_set/naive.cuh
index bd14876e37..f674b0ba0f 100644
--- a/cpp/src/umap/fuzzy_simpl_set/naive.cuh
+++ b/cpp/src/umap/fuzzy_simpl_set/naive.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -68,7 +68,7 @@ static const float MIN_K_DIST_SCALE   = 1e-3;
  *
  * @param local_connectivity: The local connectivity required -- i.e. the number of nearest
  *                            neighbors that should be assumed to be connected at a local
- *                            level. The higher this value the more connecte the manifold
+ *                            level. The higher this value the more connected the manifold
  *                            becomes locally. In practice, this should not be more than the
  *                            local intrinsic dimension of the manifold.
  *
diff --git a/cpp/src/umap/simpl_set_embed/algo.cuh b/cpp/src/umap/simpl_set_embed/algo.cuh
index ad941fe659..4c153e6da9 100644
--- a/cpp/src/umap/simpl_set_embed/algo.cuh
+++ b/cpp/src/umap/simpl_set_embed/algo.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -173,7 +173,7 @@ T create_gradient_rounding_factor(
   const int* head, int nnz, int n_samples, T alpha, rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<T> buffer(n_samples, stream);
-  // calcuate the maximum number of edges conected to 1 vertex.
+  // calculate the maximum number of edges connected to 1 vertex.
   thrust::reduce_by_key(rmm::exec_policy(stream),
                         head,
                         head + nnz,
diff --git a/cpp/src_prims/linalg/batched/gemv.cuh b/cpp/src_prims/linalg/batched/gemv.cuh
index 57985e499a..7412ce4cac 100644
--- a/cpp/src_prims/linalg/batched/gemv.cuh
+++ b/cpp/src_prims/linalg/batched/gemv.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@ namespace Batched {
  * @tparam VecLen number of elements
  * @param x x vector
  * @param y y vector
- * @param smem dynamic shared memory needed for reduction. It must be atleast of
+ * @param smem dynamic shared memory needed for reduction. It must be at least of
  *             size: `sizeof(DataT) * nWarps`.
  * @param broadcast only thread 0 will contain the final dot product if false,
  *                  else every thread will contain this value
diff --git a/cpp/src_prims/linalg/batched/matrix.cuh b/cpp/src_prims/linalg/batched/matrix.cuh
index f5c5267438..51cc4dec0b 100644
--- a/cpp/src_prims/linalg/batched/matrix.cuh
+++ b/cpp/src_prims/linalg/batched/matrix.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -221,7 +221,7 @@ class Matrix {
     initialize(setZero);
   }
 
-  //! Destructor: nothing to destroy explicitely
+  //! Destructor: nothing to destroy explicitly
   ~Matrix() {}
 
   //! Copy constructor
@@ -784,7 +784,7 @@ Matrix<T> operator-(const Matrix<T>& A, const Matrix<T>& B)
 }
 
 /**
- * @brief Unary substraction
+ * @brief Unary subtraction
  *
  * @param[in]  A  Batched matrix A
  * @return -A
diff --git a/cpp/src_prims/sparse/batched/csr.cuh b/cpp/src_prims/sparse/batched/csr.cuh
index 752ff2f344..91b02287f7 100644
--- a/cpp/src_prims/sparse/batched/csr.cuh
+++ b/cpp/src_prims/sparse/batched/csr.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -210,7 +210,7 @@ class CSR {
   {
   }
 
-  //! Destructor: nothing to destroy explicitely
+  //! Destructor: nothing to destroy explicitly
   ~CSR() {}
 
   //! Copy constructor
diff --git a/cpp/test/prims/batched/matrix.cu b/cpp/test/prims/batched/matrix.cu
index 250efe5157..fcd1d2dfc0 100644
--- a/cpp/test/prims/batched/matrix.cu
+++ b/cpp/test/prims/batched/matrix.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@ enum MatrixOperation {
   AZT_op,         // Matrix-vector product (with GEMM)
   ZA_op,          // Vector-matrix product (with GEMM)
   ApB_op,         // Addition
-  AmB_op,         // Substraction
+  AmB_op,         // Subtraction
   AkB_op,         // Kronecker product
   AsolveZ_op,     // Linear equation solver Ax=b
   LaggedZ_op,     // Lag matrix
diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu
index c133808754..9cf3accd9c 100644
--- a/cpp/test/sg/fil_test.cu
+++ b/cpp/test/sg/fil_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -204,7 +204,7 @@ void adjust_threshold_to_treelite(
 {
   // in treelite (take left node if val [op] threshold),
   // the meaning of the condition is reversed compared to FIL;
-  // thus, "<" in treelite corresonds to comparison ">=" used by FIL
+  // thus, "<" in treelite corresponds to comparison ">=" used by FIL
   // https://github.com/dmlc/treelite/blob/master/include/treelite/tree.h#L243
   // TODO(levsnv): remove workaround once confirmed to work with empty category lists in Treelite
   if (isnan(*pthreshold)) {
diff --git a/cpp/test/sg/lars_test.cu b/cpp/test/sg/lars_test.cu
index aec1b8d215..898a56476c 100644
--- a/cpp/test/sg/lars_test.cu
+++ b/cpp/test/sg/lars_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -140,7 +140,7 @@ class LarsTest : public ::testing::Test {
     // reference solution.
     raft::copy(GU, G, n_cols * n_cols, stream);
     if (!copy_G) {
-      // zero the new colum of G
+      // zero the new column of G
       RAFT_CUDA_TRY(
         cudaMemsetAsync(GU + (n_active - 1) * n_cols, 0, n_cols * sizeof(math_t), stream));
     }
diff --git a/cpp/test/sg/linear_svm_test.cu b/cpp/test/sg/linear_svm_test.cu
index 742e68211f..7ba6ea6ff0 100644
--- a/cpp/test/sg/linear_svm_test.cu
+++ b/cpp/test/sg/linear_svm_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -469,8 +469,8 @@ struct TestRegTargets {
     mp.loss          = std::get<0>(ps);
     mp.penalty       = std::get<1>(ps);
     mp.fit_intercept = std::get<2>(ps);
-    // The regularization parameter strongly affects the model perfomance in some cases,
-    // a larger-than-default value of C seems to always yeild better scores on this generated
+    // The regularization parameter strongly affects the model performance in some cases,
+    // a larger-than-default value of C seems to always yield better scores on this generated
     // dataset.
     mp.C       = 100.0;
     mp.epsilon = std::get<5>(ps);
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 1f88b1e877..c264108df6 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -692,7 +692,7 @@ class RFQuantileVariableBinsTest : public ::testing::TestWithParam<QuantileTestP
     raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
     thrust::device_vector<T> data(params.n_rows);
 
-    // n_uniques gauranteed to be non-zero and smaller than `max_n_bins`
+    // n_uniques guaranteed to be non-zero and smaller than `max_n_bins`
     int n_uniques;
     while ((n_uniques = rand() % params.max_n_bins) == 0) {}
 
diff --git a/cpp/test/sg/shap_kernel.cu b/cpp/test/sg/shap_kernel.cu
index f71960731b..518c95d919 100644
--- a/cpp/test/sg/shap_kernel.cu
+++ b/cpp/test/sg/shap_kernel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ class MakeKSHAPDatasetTest : public ::testing::TestWithParam<MakeKSHAPDatasetInp
 
     // Initialize arrays:
 
-    // Aassign a sentinel value to the observation to check easily later
+    // Assign a sentinel value to the observation to check easily later
     T sent_value = nrows_X * params.nrows_background * params.ncols * 100;
     for (i = 0; i < params.ncols; i++) {
       o_ptr[i] = sent_value;
diff --git a/cpp/test/sg/svc_test.cu b/cpp/test/sg/svc_test.cu
index 1d3e48daeb..a7dee1f46c 100644
--- a/cpp/test/sg/svc_test.cu
+++ b/cpp/test/sg/svc_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1180,7 +1180,7 @@ TYPED_TEST(SmoSolverTest, MemoryLeak)
     {blobInput{1, 0.001, KernelParams{POLYNOMIAL, 400, 5, 10}, 1000, 1000}, ThrowException::Yes}};
   // For the second set of input parameters  training will fail, some kernel
   // function values would be 1e400 or larger, which does not fit fp64.
-  // This will lead to NaN diff in SmoSolver, which whill throw an exception
+  // This will lead to NaN diff in SmoSolver, which will throw an exception
   // to stop fitting.
   size_t free1, total, free2;
   RAFT_CUDA_TRY(cudaMemGetInfo(&free1, &total));
@@ -1222,10 +1222,10 @@ TYPED_TEST(SmoSolverTest, DISABLED_MillionRows)
 {
   auto stream = this->handle.get_stream();
   if (sizeof(TypeParam) == 8) {
-    GTEST_SKIP();  // Skip the test for double imput
+    GTEST_SKIP();  // Skip the test for double input
   } else {
     // Stress test the kernel matrix calculation by calculating a kernel tile
-    // with more the 2.8B elemnts. This would fail with int32 adressing. The test
+    // with more the 2.8B elements. This would fail with int32 addressing. The test
     // is currently disabled because the memory usage might be prohibitive on CI
     // The test will be enabled once https://github.com/rapidsai/cuml/pull/2449
     // is merged, that PR would reduce the kernel tile memory size.
diff --git a/cpp/test/sg/tsne_test.cu b/cpp/test/sg/tsne_test.cu
index 14964a5955..5a027f7ecd 100644
--- a/cpp/test/sg/tsne_test.cu
+++ b/cpp/test/sg/tsne_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -162,7 +162,7 @@ class TSNETest : public ::testing::TestWithParam<TSNEInput> {
                       false);
     handle.sync_stream(stream);
 
-    // Compute theorical KL div
+    // Compute theoretical KL div
     results.kl_div_ref =
       get_kl_div(model_params, runner.COO_Matrix, pw_emb_dists.data(), n, stream);
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 5a5598d639..a2bfff9ade 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -7,7 +7,7 @@ and we provide practitioners with the easy fit-predict-transform paradigm
 without ever having to program on a GPU.
 
 As data gets larger, algorithms running on a CPU becomes slow and cumbersome.
-RAPIDS provides users a streamlined approach where data is intially loaded
+RAPIDS provides users a streamlined approach where data is initially loaded
 in the GPU, and compute tasks can be performed on it directly.
 
 cuML is fully open source, and the RAPIDS team welcomes new and seasoned
diff --git a/notebooks/random_forest_mnmg_demo.ipynb b/notebooks/random_forest_mnmg_demo.ipynb
index 06e9f23823..c41ab91c65 100755
--- a/notebooks/random_forest_mnmg_demo.ipynb
+++ b/notebooks/random_forest_mnmg_demo.ipynb
@@ -168,7 +168,7 @@
    "source": [
     "%%time\n",
     "\n",
-    "# Use all avilable CPU cores\n",
+    "# Use all available CPU cores\n",
     "skl_model = sklRF(max_depth=max_depth, n_estimators=n_trees, n_jobs=-1)\n",
     "skl_model.fit(X_train.get(), y_train.get())"
    ]
diff --git a/notebooks/target_encoder_walkthrough.ipynb b/notebooks/target_encoder_walkthrough.ipynb
index 48bc83a8a7..a08a0eb1e1 100644
--- a/notebooks/target_encoder_walkthrough.ipynb
+++ b/notebooks/target_encoder_walkthrough.ipynb
@@ -79,7 +79,7 @@
    "source": [
     "<a id=\"criteo\"></a>\n",
     "### Criteo data\n",
-    "The [criteo 1-TB benchmark](https://github.com/rambler-digital-solutions/criteo-1tb-benchmark) is a well-known dataset for click thourgh rate modeling. We only use three categorical features to make it a simple dataset for the problem."
+    "The [criteo 1-TB benchmark](https://github.com/rambler-digital-solutions/criteo-1tb-benchmark) is a well-known dataset for click through rate modeling. We only use three categorical features to make it a simple dataset for the problem."
    ]
   },
   {
@@ -178,7 +178,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Label encoding transforms string columns to integer columns. However, the mapping from a string to an integer is arbitary, which makes the encoded features less informative. For example, the first three rows of `cat_2` are `9218`, `5875` and `5199`. Although `5875` is closer to `5199` than `9218`, there is absolutely no guarantee that the string of `5875` is more similar to string of `5199` than string of `9218`. In other words, a tree classifier has make many splits to learn the pattern buried within such encoded features.   "
+    "Label encoding transforms string columns to integer columns. However, the mapping from a string to an integer is arbitrary, which makes the encoded features less informative. For example, the first three rows of `cat_2` are `9218`, `5875` and `5199`. Although `5875` is closer to `5199` than `9218`, there is absolutely no guarantee that the string of `5875` is more similar to string of `5199` than string of `9218`. In other words, a tree classifier has make many splits to learn the pattern buried within such encoded features.   "
    ]
   },
   {
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..7461b1ef18
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[tool.codespell]
+# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
+# this is only to allow you to run codespell interactively
+skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild,CHANGELOG.md,_stop_words.py"
+# ignore short words, and typename parameters like OffsetT
+ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
+ignore-words-list = "inout,numer,startd,couldn"
+builtin = "clear"
+quiet-level = 3
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ed8b71bb4e..3cb1cd9104 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -50,7 +50,7 @@ message(VERBOSE "CUML_PY: Searching for existing CUML C++ installations before d
 message(VERBOSE "CUML_PY: Disabling all mnmg components and comms libraries: ${SINGLEGPU}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Experimental: Choose which algorithms are built into libcuml++.so. Can specify individual algorithms or groups in a semicolon-separated list.")
-message(VERBOSE "CUML_PY: Building cuML with algoriths: '${CUML_ALGORITHMS}'.")
+message(VERBOSE "CUML_PY: Building cuML with algorithms: '${CUML_ALGORITHMS}'.")
 
 set(CUML_CPP_TARGET "cuml++")
 set(CUML_CPP_SRC "../cpp")
diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py
index ac4dd23b4e..f1e9eac615 100644
--- a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py
+++ b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py
@@ -302,7 +302,7 @@ def _reset(self):
         __init__ parameters are not touched.
         """
 
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, 'scale_'):
             del self.scale_
@@ -629,7 +629,7 @@ def _reset(self):
         __init__ parameters are not touched.
         """
 
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, 'scale_'):
             del self.scale_
@@ -934,7 +934,7 @@ def _reset(self):
         __init__ parameters are not touched.
         """
 
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, 'scale_'):
             del self.scale_
diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
index 3ab9f0ddce..7bada72d4b 100644
--- a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
+++ b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
@@ -582,7 +582,7 @@ def _get_missing_features_info(self, X):
             imputer_mask = sparse_constructor(
                 (mask, X.indices.copy(), X.indptr.copy()),
                 shape=X.shape, dtype=np.float32)
-            # temporarly switch to using float32 as
+            # temporary switch to using float32 as
             # cupy cannot operate with bool as of now
 
             if self.features == 'missing-only':
diff --git a/python/cuml/benchmark/automated/utils/utils.py b/python/cuml/benchmark/automated/utils/utils.py
index 4bf6c718bb..5e6315f8e5 100644
--- a/python/cuml/benchmark/automated/utils/utils.py
+++ b/python/cuml/benchmark/automated/utils/utils.py
@@ -22,7 +22,7 @@
         "falling back to pytest_benchmark fixtures.\n"
     )
 
-    # if rapids_pytest_benchmark is not available, just perfrom time-only
+    # if rapids_pytest_benchmark is not available, just perform time-only
     # benchmarking and replace the util functions with nops
     import pytest_benchmark
 
@@ -266,8 +266,8 @@ def _benchmark_algo(
         Either 'training' or 'inference', describe the algorithm/model
         step to be benchmarked
     dataset :
-        Tuple with the data and a dictionnary that describes how it was built.
-        The dictionnary can be later used during the NVTX benchmark.
+        Tuple with the data and a dictionary that describes how it was built.
+        The dictionary can be later used during the NVTX benchmark.
     setup_kwargs :
         Algorithm/model setup kwargs
     training_kwargs :
diff --git a/python/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cluster/hdbscan/hdbscan.pyx
index 4bfecb3136..5c40295080 100644
--- a/python/cuml/cluster/hdbscan/hdbscan.pyx
+++ b/python/cuml/cluster/hdbscan/hdbscan.pyx
@@ -423,7 +423,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         utilizing plotting tools. This requires the `hdbscan` CPU
         Python package to be installed.
 
-    gen_single_linkage_tree_ : bool, optinal (default=False)
+    gen_single_linkage_tree_ : bool, optional (default=False)
         Whether to populate the `single_linkage_tree_` member for
         utilizing plotting tools. This requires the `hdbscan` CPU
         Python package t be installed.
@@ -435,7 +435,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         (`cuml.global_settings.output_type`) will be used. See
         :ref:`output-data-type-configuration` for more info.
 
-    prediction_data : bool, optinal (default=False)
+    prediction_data : bool, optional (default=False)
         Whether to generate extra cached data for predicting labels or
         membership vectors few new unseen points later. If you wish to
         persist the clustering object for later re-use you probably want
@@ -457,7 +457,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         A score of how persistent each cluster is. A score of 1.0 represents
         a perfectly stable cluster that persists over all distance scales,
         while a score of 0.0 represents a perfectly ephemeral cluster. These
-        scores can be guage the relative coherence of the clusters output
+        scores can be gauge the relative coherence of the clusters output
         by the algorithm.
 
     condensed_tree_ : CondensedTree object
diff --git a/python/cuml/dask/common/dask_df_utils.py b/python/cuml/dask/common/dask_df_utils.py
index eb3cf45db9..d608232ffa 100644
--- a/python/cuml/dask/common/dask_df_utils.py
+++ b/python/cuml/dask/common/dask_df_utils.py
@@ -67,7 +67,7 @@ def to_pandas(df):
 
     meta = c.submit(get_meta, dfs[0])
 
-    # Using new variabe for local result to stop race-condition in scheduler
+    # Using new variable for local result to stop race-condition in scheduler
     # Ref: https://github.com/dask/dask/issues/6027
     meta_local = meta.result()
 
diff --git a/python/cuml/dask/common/input_utils.py b/python/cuml/dask/common/input_utils.py
index 9176c86613..688de03219 100644
--- a/python/cuml/dask/common/input_utils.py
+++ b/python/cuml/dask/common/input_utils.py
@@ -86,7 +86,7 @@ def __init__(
     def get_client(cls, client=None):
         return default_client() if client is None else client
 
-    """ Class methods for initalization """
+    """ Class methods for initialization """
 
     @classmethod
     def create(cls, data, client=None):
diff --git a/python/cuml/dask/common/utils.py b/python/cuml/dask/common/utils.py
index eec417bdf7..27a10cc14f 100644
--- a/python/cuml/dask/common/utils.py
+++ b/python/cuml/dask/common/utils.py
@@ -178,7 +178,7 @@ class MultiHolderLock:
     A per-process synchronization lock allowing multiple concurrent holders
     at any one time. This is used in situations where resources might be
     limited and it's important that the number of concurrent users of
-    the resources are constained.
+    the resources are constrained.
 
     This lock is serializable, but relies on a Python threading.Lock
     underneath to properly synchronize internal state across threads.
diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py
index d2b4c967b9..27dd579c14 100644
--- a/python/cuml/dask/datasets/regression.py
+++ b/python/cuml/dask/datasets/regression.py
@@ -230,7 +230,7 @@ def _dask_make_low_rank_covariance(
     eigen-vector and the squared, low-rank singular values.
     With a memory usage of only O(n_features ^ 2) in this case, we pass
     this covariance matrix to workers to generate each part of X
-    embarassingly parallel from a multi-variate normal with mean 0
+    embarrassingly parallel from a multi-variate normal with mean 0
     and generated covariance.
     """
     local_rs = cp.random.RandomState(seed=seed)
@@ -441,8 +441,8 @@ def make_regression(
         transpose is performed on each part. This may cause memory to spike \
         (other parameters make order `F` by construction)
      2. When `n_targets > 1` and `order = 'F'` as above, we have to \
-        explicity transpose the `y` array. If `coef = True`, then we also \
-        explicity transpose the `ground_truth` array
+        explicitly transpose the `y` array. If `coef = True`, then we also \
+        explicitly transpose the `ground_truth` array
      3. When `shuffle = True` and `order = F`, there are memory spikes to \
         shuffle the `F` order arrays
 
diff --git a/python/cuml/dask/ensemble/__init__.py b/python/cuml/dask/ensemble/__init__.py
index 5bfb8de61a..fd3ebd95a8 100755
--- a/python/cuml/dask/ensemble/__init__.py
+++ b/python/cuml/dask/ensemble/__init__.py
@@ -23,5 +23,5 @@
     from cuml.dask.ensemble.randomforestregressor import RandomForestRegressor
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
diff --git a/python/cuml/dask/extended/linear_model/__init__.py b/python/cuml/dask/extended/linear_model/__init__.py
index 55001cfe19..8f8cba28a1 100644
--- a/python/cuml/dask/extended/linear_model/__init__.py
+++ b/python/cuml/dask/extended/linear_model/__init__.py
@@ -23,5 +23,5 @@
     )
 else:
     warnings.warn(
-        "Dask-glm not found. Multi-GPU logistic regression is disabed."
+        "Dask-glm not found. Multi-GPU logistic regression is disabled."
     )
diff --git a/python/cuml/dask/linear_model/__init__.py b/python/cuml/dask/linear_model/__init__.py
index 5eb0bd2dc9..4f8594a665 100644
--- a/python/cuml/dask/linear_model/__init__.py
+++ b/python/cuml/dask/linear_model/__init__.py
@@ -24,5 +24,5 @@
     from cuml.dask.linear_model.elastic_net import ElasticNet
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
diff --git a/python/cuml/dask/linear_model/linear_regression.py b/python/cuml/dask/linear_model/linear_regression.py
index ad07745166..0d50c2ec24 100644
--- a/python/cuml/dask/linear_model/linear_regression.py
+++ b/python/cuml/dask/linear_model/linear_regression.py
@@ -47,7 +47,7 @@ class LinearRegression(
         SVD is slower, but guaranteed to be stable.
     fit_intercept : boolean (default = True)
         LinearRegression adds an additional term c to correct for the global
-        mean of y, modeling the reponse as "x * beta + c".
+        mean of y, modeling the response as "x * beta + c".
         If False, the model expects that you have centered the data.
     normalize : boolean (default = False)
         If True, the predictors in X will be normalized by dividing by its
diff --git a/python/cuml/dask/manifold/__init__.py b/python/cuml/dask/manifold/__init__.py
index 46924a75ca..d795183cce 100644
--- a/python/cuml/dask/manifold/__init__.py
+++ b/python/cuml/dask/manifold/__init__.py
@@ -22,7 +22,7 @@
     from cuml.dask.manifold.umap import UMAP
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
 
 __all__ = ["UMAP"]
diff --git a/python/cuml/dask/metrics/__init__.py b/python/cuml/dask/metrics/__init__.py
index d881208bee..09bbb44932 100644
--- a/python/cuml/dask/metrics/__init__.py
+++ b/python/cuml/dask/metrics/__init__.py
@@ -21,5 +21,5 @@
     from cuml.dask.metrics.confusion_matrix import confusion_matrix
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
diff --git a/python/cuml/dask/neighbors/__init__.py b/python/cuml/dask/neighbors/__init__.py
index b378576beb..8ccc2e40fd 100644
--- a/python/cuml/dask/neighbors/__init__.py
+++ b/python/cuml/dask/neighbors/__init__.py
@@ -22,5 +22,5 @@
     from cuml.dask.neighbors.kneighbors_regressor import KNeighborsRegressor
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
diff --git a/python/cuml/dask/solvers/__init__.py b/python/cuml/dask/solvers/__init__.py
index 909935423e..6f7c45e18f 100644
--- a/python/cuml/dask/solvers/__init__.py
+++ b/python/cuml/dask/solvers/__init__.py
@@ -20,5 +20,5 @@
     from cuml.dask.solvers.cd import CD  # NOQA
 else:
     warnings.warn(
-        "Dask not found. All Dask-based multi-GPU operation is disabed."
+        "Dask not found. All Dask-based multi-GPU operation is disabled."
     )
diff --git a/python/cuml/decomposition/pca.pyx b/python/cuml/decomposition/pca.pyx
index 64bfb8df3f..e65afd2f56 100644
--- a/python/cuml/decomposition/pca.pyx
+++ b/python/cuml/decomposition/pca.pyx
@@ -421,7 +421,7 @@ class PCA(UniversalBase,
         if self.n_components is None:
             logger.warn(
                 'Warning(`fit`): As of v0.16, PCA invoked without an'
-                ' n_components argument defauts to using'
+                ' n_components argument defaults to using'
                 ' min(n_samples, n_features) rather than 1'
             )
             n_rows = X.shape[0]
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 608b25b439..11275109e7 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -1,6 +1,6 @@
 
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -208,7 +208,7 @@ class RandomForestClassifier(BaseRandomForestModel,
            and ``max(2, ceil(min_samples_split * n_rows))`` is the minimum
            number of samples for each split.
     min_impurity_decrease : float (default = 0.0)
-        Minimum decrease in impurity requried for
+        Minimum decrease in impurity required for
         node to be spilt.
     max_batch_size : int (default = 4096)
         Maximum number of nodes that can be processed in a given batch.
@@ -629,7 +629,7 @@ class RandomForestClassifier(BaseRandomForestModel,
                       convert_dtype=True,
                       fil_sparse_format='auto') -> CumlArray:
         """
-        Predicts class probabilites for X. This function uses the GPU
+        Predicts class probabilities for X. This function uses the GPU
         implementation of predict.
 
         Parameters
diff --git a/python/cuml/experimental/hyperopt_utils/plotting_utils.py b/python/cuml/experimental/hyperopt_utils/plotting_utils.py
index 107446761b..25a155975b 100644
--- a/python/cuml/experimental/hyperopt_utils/plotting_utils.py
+++ b/python/cuml/experimental/hyperopt_utils/plotting_utils.py
@@ -42,7 +42,7 @@ def plot_heatmap(df, col1, col2):
 
 def plot_search_results(res):
     """
-    Plots by fixing all paramters except one parameter to its best value using
+    Plots by fixing all parameters except one parameter to its best value using
     matplotlib.
 
     Accepts results from grid or random search from dask-ml.
@@ -64,7 +64,7 @@ def plot_search_results(res):
     try:
         # Grid Search
         params = res.param_grid
-        # Ploting results
+        # Plotting results
         fig, ax = plt.subplots(
             1, len(params), sharex="none", sharey="all", figsize=(20, 5)
         )
@@ -84,11 +84,11 @@ def plot_search_results(res):
             )
             ax[i].set_xlabel(p.upper())
     except Exception as e:
-        # Randomized Seach
+        # Randomized Search
         print("Cannot generate plots because of ", type(e), "trying again...")
         try:
             params = res.param_distributions
-            # Ploting results
+            # Plotting results
             fig, ax = plt.subplots(
                 1, len(params), sharex="none", sharey="all", figsize=(20, 5)
             )
diff --git a/python/cuml/experimental/linear_model/lars.pyx b/python/cuml/experimental/linear_model/lars.pyx
index d609ac6b51..a4dd301ec2 100644
--- a/python/cuml/experimental/linear_model/lars.pyx
+++ b/python/cuml/experimental/linear_model/lars.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -222,7 +222,7 @@ class Lars(Base, RegressorMixin):
                 Gram = cp.dot(X.T, X)
             except MemoryError as err:
                 if self.precompute:
-                    logger.debug("Not enought memory to store the Gram matrix."
+                    logger.debug("Not enough memory to store the Gram matrix."
                                  " Proceeding without it.")
         return Gram
 
diff --git a/python/cuml/explainer/kernel_shap.pyx b/python/cuml/explainer/kernel_shap.pyx
index fb3b49c216..2e23eaf153 100644
--- a/python/cuml/explainer/kernel_shap.pyx
+++ b/python/cuml/explainer/kernel_shap.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -146,7 +146,7 @@ class KernelExplainer(SHAPBase):
     dtype : np.float32 or np.float64 (default = None)
         Parameter to specify the precision of data to generate to call the
         model. If not specified, the explainer will try to get the dtype
-        of the model, if it cannot be queried, then it will defaul to
+        of the model, if it cannot be queried, then it will default to
         np.float32.
     output_type : 'cupy' or 'numpy' (default = 'numpy')
         Parameter to specify the type of data to output.
@@ -404,7 +404,7 @@ class KernelExplainer(SHAPBase):
                 axis=1
             )
 
-            # we neeed to do l1 regularization if user left it as auto and we
+            # we needed to do l1 regularization if user left it as auto and we
             # evaluated less than 20% of the space, or if the user set it
             # and we did not evaluate all the space (i.e. nsamples_random == 0)
             nonzero_inds = None
diff --git a/python/cuml/explainer/permutation_shap.pyx b/python/cuml/explainer/permutation_shap.pyx
index 63506c4c82..4bb3f6ae27 100644
--- a/python/cuml/explainer/permutation_shap.pyx
+++ b/python/cuml/explainer/permutation_shap.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -153,7 +153,7 @@ class PermutationExplainer(SHAPBase):
     dtype : np.float32 or np.float64 (default = None)
         Parameter to specify the precision of data to generate to call the
         model. If not specified, the explainer will try to get the dtype
-        of the model, if it cannot be queried, then it will defaul to
+        of the model, if it cannot be queried, then it will default to
         np.float32.
     output_type : 'cupy' or 'numpy' (default = 'numpy')
         Parameter to specify the type of data to output.
diff --git a/python/cuml/feature_extraction/_tfidf.py b/python/cuml/feature_extraction/_tfidf.py
index ce9a209ba0..18b358a461 100644
--- a/python/cuml/feature_extraction/_tfidf.py
+++ b/python/cuml/feature_extraction/_tfidf.py
@@ -273,7 +273,7 @@ def _check_is_idf_fitted(self):
         if not hasattr(self, "idf_"):
             msg = (
                 "This TfidfTransformer instance is not fitted or the "
-                "value of use_idf is not consistant between "
+                "value of use_idf is not consistent between "
                 ".fit() and .transform()."
             )
             raise NotFittedError(msg)
diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx
index 75fa59c97b..24d677236c 100644
--- a/python/cuml/fil/fil.pyx
+++ b/python/cuml/fil/fil.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -230,7 +230,7 @@ cdef extern from "cuml/fil/fil.h" namespace "ML::fil":
         # limit number of CUDA blocks launched per GPU SM (or unlimited if 0)
         int blocks_per_sm
         # multiple (neighboring) threads infer on the same tree within a block
-        # this improves memory bandwith near tree root (but uses more shared
+        # this improves memory bandwidth near tree root (but uses more shared
         # memory)
         int threads_per_tree
         # n_items is how many input samples (items) any thread processes.
@@ -605,12 +605,12 @@ class ForestInference(Base,
            algo='AUTO'
 
     blocks_per_sm : integer (default=0)
-        (experimental) Indicates how the number of thread blocks to lauch
+        (experimental) Indicates how the number of thread blocks to launch
         for the inference kernel is determined.
 
         - ``0`` (default): Launches the number of blocks proportional to
           the number of data rows
-        - ``>= 1``: Attempts to lauch blocks_per_sm blocks per SM. This
+        - ``>= 1``: Attempts to launch blocks_per_sm blocks per SM. This
           will fail if blocks_per_sm blocks result in more threads than the
           maximum supported number of threads per GPU. Even if successful,
           it is not guaranteed that blocks_per_sm blocks will run on an SM
@@ -793,12 +793,12 @@ class ForestInference(Base,
                algo='AUTO'
 
         blocks_per_sm : integer (default=0)
-            (experimental) Indicates how the number of thread blocks to lauch
+            (experimental) Indicates how the number of thread blocks to launch
             for the inference kernel is determined.
 
             - ``0`` (default): Launches the number of blocks proportional to
               the number of data rows
-            - ``>= 1``: Attempts to lauch blocks_per_sm blocks per SM. This
+            - ``>= 1``: Attempts to launch blocks_per_sm blocks per SM. This
               will fail if blocks_per_sm blocks result in more threads than the
               maximum supported number of threads per GPU. Even if successful,
               it is not guaranteed that blocks_per_sm blocks will run on an SM
@@ -896,12 +896,12 @@ class ForestInference(Base,
                algo='AUTO'
 
         blocks_per_sm : integer (default=0)
-            (experimental) Indicates how the number of thread blocks to lauch
+            (experimental) Indicates how the number of thread blocks to launch
             for the inference kernel is determined.
 
             - ``0`` (default): Launches the number of blocks proportional to
               the number of data rows
-            - ``>= 1``: Attempts to lauch blocks_per_sm blocks per SM. This
+            - ``>= 1``: Attempts to launch blocks_per_sm blocks per SM. This
               will fail if blocks_per_sm blocks result in more threads than the
               maximum supported number of threads per GPU. Even if successful,
               it is not guaranteed that blocks_per_sm blocks will run on an SM
diff --git a/python/cuml/internals/base.pyx b/python/cuml/internals/base.pyx
index 6533b46a1e..38a749dd82 100644
--- a/python/cuml/internals/base.pyx
+++ b/python/cuml/internals/base.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -421,7 +421,7 @@ class Base(TagsMixin,
             self.n_features_in_ = X.shape[1]
 
     def _more_tags(self):
-        # 'preserves_dtype' tag's Scikit definition currently only appies to
+        # 'preserves_dtype' tag's Scikit definition currently only applies to
         # transformers and whether the transform method conserves the dtype
         # (in that case returns an empty list, otherwise the dtype it
         # casts to).
diff --git a/python/cuml/internals/base_return_types.py b/python/cuml/internals/base_return_types.py
index ad58632d37..b5d952ba20 100644
--- a/python/cuml/internals/base_return_types.py
+++ b/python/cuml/internals/base_return_types.py
@@ -55,7 +55,7 @@ def _get_base_return_type(class_name, attr):
         if attr.__annotations__["return"] == class_name:
             return "base"
     except Exception:
-        assert False, "Shouldnt get here"
+        assert False, "Shouldn't get here"
         return None
 
     return None
diff --git a/python/cuml/internals/memory_utils.py b/python/cuml/internals/memory_utils.py
index 921c17dac6..be21b52998 100644
--- a/python/cuml/internals/memory_utils.py
+++ b/python/cuml/internals/memory_utils.py
@@ -223,8 +223,8 @@ def _get_size_from_shape(shape, dtype):
 def set_global_output_type(output_type):
     """
     Method to set cuML's single GPU estimators global output type.
-    It will be used by all estimators unless overriden in their initialization
-    with their own output_type parameter. Can also be overriden by the context
+    It will be used by all estimators unless overridden in their initialization
+    with their own output_type parameter. Can also be overridden by the context
     manager method :func:`using_output_type`.
 
     Parameters
diff --git a/python/cuml/linear_model/linear_regression.pyx b/python/cuml/linear_model/linear_regression.pyx
index 950ac556cd..9e04e40b57 100644
--- a/python/cuml/linear_model/linear_regression.pyx
+++ b/python/cuml/linear_model/linear_regression.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ def fit_multi_target(X, y, fit_intercept=True, sample_weight=None):
     y_arr = y.to_output('array')
 
     if fit_intercept:
-        # Add column containg ones to fit intercept.
+        # Add column containing ones to fit intercept.
         nrow, ncol = X.shape
         X_wide = X.mem_type.xpy.empty_like(
             X_arr, shape=(nrow, ncol + 1)
diff --git a/python/cuml/metrics/accuracy.pyx b/python/cuml/metrics/accuracy.pyx
index 9d1ea47b96..c09f9f7820 100644
--- a/python/cuml/metrics/accuracy.pyx
+++ b/python/cuml/metrics/accuracy.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics":
 @cuml.internals.api_return_any()
 def accuracy_score(ground_truth, predictions, handle=None, convert_dtype=True):
     """
-    Calcuates the accuracy score of a classification model.
+    Calculates the accuracy score of a classification model.
 
         Parameters
         ----------
diff --git a/python/cuml/metrics/kl_divergence.pyx b/python/cuml/metrics/kl_divergence.pyx
index f9d7ba80e3..3dbaed3d6a 100644
--- a/python/cuml/metrics/kl_divergence.pyx
+++ b/python/cuml/metrics/kl_divergence.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,8 +45,8 @@ def kl_divergence(P, Q, handle=None, convert_dtype=True):
     Calculates the "Kullback-Leibler" Divergence
     The KL divergence tells us how well the probability distribution Q
     approximates the probability distribution P
-    It is often also used as a 'distance metric' between two probablity
-    ditributions (not symmetric)
+    It is often also used as a 'distance metric' between two probability
+    distributions (not symmetric)
 
     Parameters
     ----------
diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx
index d5a63dc188..8b35b1b7a8 100644
--- a/python/cuml/metrics/pairwise_distances.pyx
+++ b/python/cuml/metrics/pairwise_distances.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -219,7 +219,7 @@ def nan_euclidean_distances(
     X_m[missing_X] = 0
     Y_m[missing_Y] = 0
 
-    # Adjust distances for sqaured
+    # Adjust distances for squared
     if X_m.shape == Y_m.shape:
         if (X_m == Y_m).all():
             distances = cp.asarray(pairwise_distances(
@@ -490,7 +490,7 @@ def sparse_pairwise_distances(X, Y=None, metric="euclidean", handle=None,
         will increase memory used for the method.
 
     metric_arg : float, optional (default = 2)
-        Additionnal metric-specific argument.
+        Additional metric-specific argument.
         For Minkowski it's the p-norm to apply.
 
     Returns
diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py
index 097b49d410..bf3029ea4f 100644
--- a/python/cuml/naive_bayes/naive_bayes.py
+++ b/python/cuml/naive_bayes/naive_bayes.py
@@ -375,7 +375,7 @@ def fit(self, X, y, sample_weight=None) -> "GaussianNB":
             n_features is the number of features.
         y : array-like shape (n_samples) Target values.
         sample_weight : array-like of shape (n_samples)
-            Weights applied to individial samples (1. for unweighted).
+            Weights applied to individual samples (1. for unweighted).
             Currently sample weight is ignored.
         """
         return self._partial_fit(
@@ -895,7 +895,7 @@ def fit(self, X, y, sample_weight=None) -> "_BaseDiscreteNB":
             n_features is the number of features.
         y : array-like shape (n_samples) Target values.
         sample_weight : array-like of shape (n_samples)
-            Weights applied to individial samples (1. for unweighted).
+            Weights applied to individual samples (1. for unweighted).
             Currently sample weight is ignored.
         """
         self.fit_called_ = False
@@ -1932,7 +1932,7 @@ def _joint_log_likelihood(self, X):
             col_indices = X.col
 
             # Adjust with the non-zeros data by adding jll_data (non-zeros)
-            # and substracting jll_zeros which are the zeros
+            # and subtracting jll_zeros which are the zeros
             # that were first computed
             for i in range(self.n_classes_):
                 jll_data = self.smoothed_cat_count[
diff --git a/python/cuml/neighbors/kernel_density.py b/python/cuml/neighbors/kernel_density.py
index e7d6fdc8ab..eb11de24e9 100644
--- a/python/cuml/neighbors/kernel_density.py
+++ b/python/cuml/neighbors/kernel_density.py
@@ -324,7 +324,7 @@ def score_samples(self, X):
         # Note that sklearns user guide is wrong
         # It says the (unnormalised) probability output for
         #  the kernel density is sum(K(x,h)).
-        # In fact what they implment is (1/n)*sum(K(x,h))
+        # In fact what they implement is (1/n)*sum(K(x,h))
         # Here we divide by n in normal probability space
         # Which becomes -log(n) in log probability space
         sum_weights = (
@@ -419,7 +419,7 @@ def sample(self, n_samples=1, random_state=None):
             X = rng.normal(size=(n_samples, dim))
             s_sq = cp.einsum("ij,ij->i", X, X).get()
 
-            # do this on the CPU becaause we don't have
+            # do this on the CPU because we don't have
             # a gammainc function  readily available
             correction = cp.array(
                 gammainc(0.5 * dim, 0.5 * s_sq) ** (1.0 / dim)
diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx
index 441d43a2aa..4369b5d475 100644
--- a/python/cuml/neighbors/nearest_neighbors.pyx
+++ b/python/cuml/neighbors/nearest_neighbors.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -226,7 +226,7 @@ class NearestNeighbors(UniversalBase,
             - nprobe: (int) at query time, number of cells used for search
             - M: (int) number of subquantizers
             - n_bits: (int) bits allocated per subquantizer
-            - usePrecomputedTables : (bool) wether to use precomputed tables
+            - usePrecomputedTables : (bool) whether to use precomputed tables
 
     metric_expanded : bool
         Can increase performance in Minkowski-based (Lp) metrics (for p > 1)
@@ -396,7 +396,7 @@ class NearestNeighbors(UniversalBase,
                           "(see cuML issue #4020)")
 
             if not is_dense(X):
-                raise ValueError("Approximate Nearest Neigbors methods "
+                raise ValueError("Approximate Nearest Neighbors methods "
                                  "require dense data")
 
             additional_info = {'n_samples': self.n_samples_fit_,
diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py
index 7b295d394b..3b4a93ab7e 100644
--- a/python/cuml/preprocessing/TargetEncoder.py
+++ b/python/cuml/preprocessing/TargetEncoder.py
@@ -39,7 +39,7 @@ def func(ds):
 class TargetEncoder:
     """
     A cudf based implementation of target encoding [1]_, which converts
-    one or mulitple categorical variables, 'Xs', with the average of
+    one or multiple categorical variables, 'Xs', with the average of
     corresponding values of the target variable, 'Y'. The input data is
     grouped by the columns `Xs` and the aggregated mean value of `Y` of
     each group is calculated to replace each value of `Xs`. Several
@@ -106,7 +106,7 @@ def __init__(
             raise ValueError(f"smooth {smooth} is not zero or positive")
         if n_folds < 0 or not isinstance(n_folds, int):
             raise ValueError(
-                "n_folds {} is not a postive integer".format(n_folds)
+                "n_folds {} is not a positive integer".format(n_folds)
             )
         if output_type not in {"cupy", "numpy", "auto"}:
             msg = (
diff --git a/python/cuml/preprocessing/text/stem/porter_stemmer.py b/python/cuml/preprocessing/text/stem/porter_stemmer.py
index 7a376a9a9f..382582a78d 100644
--- a/python/cuml/preprocessing/text/stem/porter_stemmer.py
+++ b/python/cuml/preprocessing/text/stem/porter_stemmer.py
@@ -150,7 +150,7 @@ def _step1a(self, word_str_ser, can_replace_mask=None):
             SSES -> SS                         caresses  ->  caress
             IES  -> I                          ponies    ->  poni
                                                ties      ->  ti
-                                               (### this is for orignal impl)
+                                               (### this is for original impl)
             SS   -> SS                         caress    ->  caress
             S    ->                            cats      ->  cat
         """
@@ -500,7 +500,7 @@ def _step3(self, word_str_ser, can_replace_mask=None):
         Step 3
 
             (m>0) ICATE ->  IC              triplicate     ->  triplic
-            (m>0) ATIVE ->                  formative      ->  form
+            (m>0) ACTIVE ->                  formative      ->  form
             (m>0) ALIZE ->  AL              formalize      ->  formal
             (m>0) ICITI ->  IC              electriciti    ->  electric
             (m>0) ICAL  ->  IC              electrical     ->  electric
@@ -515,7 +515,7 @@ def _step3(self, word_str_ser, can_replace_mask=None):
             word_str_ser,
             [
                 ("icate", "ic", has_positive_measure),
-                ("ative", "", has_positive_measure),
+                ("active", "", has_positive_measure),
                 ("alize", "al", has_positive_measure),
                 ("iciti", "ic", has_positive_measure),
                 ("ical", "ic", has_positive_measure),
@@ -543,8 +543,8 @@ def _step4(self, word_str_ser, can_replace_mask=None):
             (m>1) ENT   ->                  dependent      ->  depend
             (m>1 and (*S or *T)) ION ->     adoption       ->  adopt
             (m>1) OU    ->                  homologou      ->  homolog
-            (m>1) ISM   ->                  communism      ->  commun
-            (m>1) ATE   ->                  activate       ->  activ
+            (m>1) ISM   ->                  communism      ->  common
+            (m>1) ATE   ->                  activate       ->  active
             (m>1) ITI   ->                  angulariti     ->  angular
             (m>1) OUS   ->                  homologous     ->  homolog
             (m>1) IVE   ->                  effective      ->  effect
@@ -665,7 +665,7 @@ def _step5b(self, word_str_ser, can_replace_mask=None):
         Step 5b
 
             (m > 1 and *d and *L) -> single letter
-                                    controll       ->  control
+                                    control       ->  control
                                     roll           ->  roll
         """
 
@@ -799,7 +799,7 @@ def apply_rule_list(word_str_ser, rules, condition_flag):
 
 def build_can_replace_mask(len_mask, mask):
     """
-    Creates a cudf series represeting can_replace_mask of length=len_mask
+    Creates a cudf series representing can_replace_mask of length=len_mask
     if mask is None else returns mask
     """
     if mask is None:
diff --git a/python/cuml/solvers/qn.pyx b/python/cuml/solvers/qn.pyx
index 8b334fdc29..6585719f9d 100644
--- a/python/cuml/solvers/qn.pyx
+++ b/python/cuml/solvers/qn.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -347,7 +347,7 @@ class QN(Base,
         Note, this parameter corresponds to `ftol` in
         `scipy.optimize.minimize(method='L-BFGS-B')
         <https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html>`_,
-        which is set by default to a miniscule `2.2e-9` and is not exposed in
+        which is set by default to a minuscule `2.2e-9` and is not exposed in
         `sklearn.LogisticRegression()
         <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`_.
         This condition is meant to protect the solver against doing vanishingly
diff --git a/python/cuml/svm/svc.pyx b/python/cuml/svm/svc.pyx
index c998321dac..f49ab20793 100644
--- a/python/cuml/svm/svc.pyx
+++ b/python/cuml/svm/svc.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ cdef extern from "cuml/svm/svm_parameter.h" namespace "ML::SVM":
         NU_SVR
 
     cdef struct SvmParameter:
-        # parameters for trainig
+        # parameters for training
         double C
         double cache_size
         int max_iter
@@ -157,7 +157,7 @@ class SVC(SVMBase,
         - 'scale': gamma will be se to ``1 / (n_features * X.var())``
 
     coef0 : float (default = 0.0)
-        Independent term in kernel function, only signifficant for poly and
+        Independent term in kernel function, only significant for poly and
         sigmoid
     tol : float (default = 1e-3)
         Tolerance for stopping criterion.
@@ -166,7 +166,7 @@ class SVC(SVMBase,
         the training time, at the cost of higher memory footprint. After
         training the kernel cache is deallocated.
         During prediction, we also need a temporary space to store kernel
-        matrix elements (this can be signifficant if n_support is large).
+        matrix elements (this can be significant if n_support is large).
         The cache_size variable sets an upper limit to the prediction
         buffer as well.
     class_weight : dict or string (default=None)
@@ -195,7 +195,7 @@ class SVC(SVMBase,
         Enable or disable probability estimates.
     random_state: int (default = None)
         Seed for random number generator (used only when probability = True).
-        Currently this argument is not used and a waring will be printed if the
+        Currently this argument is not used and a warning will be printed if the
         user provides it.
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
diff --git a/python/cuml/svm/svm_base.pyx b/python/cuml/svm/svm_base.pyx
index dc3564deb0..2a34554217 100644
--- a/python/cuml/svm/svm_base.pyx
+++ b/python/cuml/svm/svm_base.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -62,7 +62,7 @@ cdef extern from "cuml/svm/svm_parameter.h" namespace "ML::SVM":
         NU_SVR
 
     cdef struct SvmParameter:
-        # parameters for trainig
+        # parameters for training
         double C
         double cache_size
         int max_iter
@@ -130,7 +130,7 @@ class SVMBase(Base,
         - 'scale': gamma will be se to ``1 / (n_features * X.var())``
 
     coef0 : float (default = 0.0)
-        Independent term in kernel function, only signifficant for poly and
+        Independent term in kernel function, only significant for poly and
         sigmoid
     tol : float (default = 1e-3)
         Tolerance for stopping criterion.
@@ -139,7 +139,7 @@ class SVMBase(Base,
         the training time, at the cost of higher memory footprint. After
         training the kernel cache is deallocated.
         During prediction, we also need a temporary space to store kernel
-        matrix elements (this can be signifficant if n_support is large).
+        matrix elements (this can be significant if n_support is large).
         The cache_size variable sets an upper limit to the prediction
         buffer as well.
     max_iter : int (default = 100*n_samples)
@@ -169,7 +169,7 @@ class SVMBase(Base,
         future to represent number support vectors for each class (like
         in Sklearn, see Issue #956)
     support_ : int, shape = [n_support]
-        Device array of suppurt vector indices
+        Device array of support vector indices
     support_vectors_ : float, shape [n_support, n_cols]
         Device array of support vectors
     dual_coef_ : float, shape = [1, n_support]
@@ -285,7 +285,7 @@ class SVMBase(Base,
         """
         Get KernelType from the kernel string.
 
-        Paramaters
+        Parameters
         ----------
         kernel: string, ('linear', 'poly', 'rbf', or 'sigmoid')
         """
@@ -536,7 +536,7 @@ class SVMBase(Base,
             ndarray, cuda array interface compliant array like CuPy
 
         predict_class : boolean
-            Switch whether to retun class label (true), or decision function
+            Switch whether to return class label (true), or decision function
             value (false).
 
         Returns
diff --git a/python/cuml/svm/svr.pyx b/python/cuml/svm/svr.pyx
index 02af929807..4d89d95755 100644
--- a/python/cuml/svm/svr.pyx
+++ b/python/cuml/svm/svr.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ cdef extern from "cuml/svm/svm_parameter.h" namespace "ML::SVM":
         C_SVC, NU_SVC, EPSILON_SVR, NU_SVR
 
     cdef struct SvmParameter:
-        # parameters for trainig
+        # parameters for training
         double C
         double cache_size
         int max_iter
@@ -131,7 +131,7 @@ class SVR(SVMBase, RegressorMixin):
         - 'scale': gamma will be se to ``1 / (n_features * X.var())``
 
     coef0 : float (default = 0.0)
-        Independent term in kernel function, only signifficant for poly and
+        Independent term in kernel function, only significant for poly and
         sigmoid
     tol : float (default = 1e-3)
         Tolerance for stopping criterion.
@@ -144,7 +144,7 @@ class SVR(SVMBase, RegressorMixin):
         the training time, at the cost of higher memory footprint. After
         training the kernel cache is deallocated.
         During prediction, we also need a temporary space to store kernel
-        matrix elements (this can be signifficant if n_support is large).
+        matrix elements (this can be significant if n_support is large).
         The cache_size variable sets an upper limit to the prediction
         buffer as well.
     max_iter : int (default = -1)
@@ -171,7 +171,7 @@ class SVR(SVMBase, RegressorMixin):
         future to represent number support vectors for each class (like
         in Sklearn, see Issue #956)
     support_ : int, shape = [n_support]
-        Device array of suppurt vector indices
+        Device array of support vector indices
     support_vectors_ : float, shape [n_support, n_cols]
         Device array of support vectors
     dual_coef_ : float, shape = [1, n_support]
diff --git a/python/cuml/testing/plugins/quick_run_plugin.py b/python/cuml/testing/plugins/quick_run_plugin.py
index 52e10936c6..cfc7a6eb13 100644
--- a/python/cuml/testing/plugins/quick_run_plugin.py
+++ b/python/cuml/testing/plugins/quick_run_plugin.py
@@ -70,7 +70,7 @@ def get_leaf(node_list: list) -> list:
             for n in node_list:
                 name = getattr(n, "originalname", n.name)
 
-                # Add the interior node if it doesnt exist. Must be a function
+                # Add the interior node if it doesn't exist. Must be a function
                 # to be a leaf
                 if name not in curr_node:
                     if isinstance(n, _pytest.python.Function):
diff --git a/python/cuml/testing/utils.py b/python/cuml/testing/utils.py
index 3b64bd0483..41de2e1f37 100644
--- a/python/cuml/testing/utils.py
+++ b/python/cuml/testing/utils.py
@@ -681,7 +681,7 @@ def compare_svm(
     assert accuracy1 >= accuracy2 - accuracy_tol
 
     if b_tol is None:
-        b_tol = 100 * svm1.tol  # Using deafult tol=1e-3 leads to b_tol=0.1
+        b_tol = 100 * svm1.tol  # Using default tol=1e-3 leads to b_tol=0.1
 
     if accuracy2 < 0.5:
         # Increase error margin for classifiers that are not accurate.
@@ -732,7 +732,7 @@ def compare_svm(
             df2 = svm2.decision_function(X)
             # For classification, the class is determined by
             # sign(decision function). We should not expect tight match for
-            # the actual value of the function, therfore we set large tolerance
+            # the actual value of the function, therefore we set large tolerance
             assert svm_array_equal(
                 df1, df2, tol=1e-1, relative_diff=True, report_summary=True
             )
@@ -742,7 +742,7 @@ def compare_svm(
                 accuracy2,
             )
 
-    # Compare support_ (dataset indicies of points that form the support
+    # Compare support_ (dataset indices of points that form the support
     # vectors) and ensure that some overlap (~1/8) between two exists
     support1 = set(svm1.support_)
     support2 = set(svm2.support_)
diff --git a/python/cuml/tests/stemmer_tests/test_stemmer.py b/python/cuml/tests/stemmer_tests/test_stemmer.py
index b2d5c3f704..8742994500 100644
--- a/python/cuml/tests/stemmer_tests/test_stemmer.py
+++ b/python/cuml/tests/stemmer_tests/test_stemmer.py
@@ -33,7 +33,7 @@ def get_words():
     word_ls = []
     for item in treebank.fileids():
         for (word, tag) in treebank.tagged_words(item):
-            # assuming the words are allready lowered
+            # assuming the words are already lowered
             word = word.lower()
             word_ls.append(word)
 
diff --git a/python/cuml/tests/stemmer_tests/test_steps.py b/python/cuml/tests/stemmer_tests/test_steps.py
index 19b136db02..37a1f689e3 100644
--- a/python/cuml/tests/stemmer_tests/test_steps.py
+++ b/python/cuml/tests/stemmer_tests/test_steps.py
@@ -237,8 +237,8 @@ def test_step4():
         "depend",
         "adopt",
         "homolog",
-        "commun",
-        "activ",
+        "common",
+        "active",
         "angular",
         "homolog",
         "effect",
@@ -274,7 +274,7 @@ def test_step5a():
 
 
 def test_step5b():
-    word_str_ser_ls = ["controll", "roll"]
+    word_str_ser_ls = ["control", "roll"]
     word_str_ser = cudf.Series(word_str_ser_ls)
     expect = ["control", "roll"]
 
@@ -283,7 +283,7 @@ def test_step5b():
     assert list(got.to_pandas().values) == expect
 
     # mask test
-    expect = ["controll", "roll"]
+    expect = ["control", "roll"]
     mask = cudf.Series([False, True])
     got = st._step5b(word_str_ser, mask)
     assert list(got.to_pandas().values) == expect
diff --git a/python/cuml/tests/test_api.py b/python/cuml/tests/test_api.py
index 916ff22785..8d13499171 100644
--- a/python/cuml/tests/test_api.py
+++ b/python/cuml/tests/test_api.py
@@ -166,7 +166,7 @@ def test_dynamic_tags_and_composition():
     dynamic_tags = dummy_class_with_tags()._get_tags()
     print(dummy_class_with_tags.__mro__)
 
-    # In python, the MRO is so that the uppermost inheritted class
+    # In python, the MRO is so that the uppermost inherited class
     # being closest to the final class, so in our dummy_class_with_tags
     # the F Major input mixin should the C mixin
     assert static_tags["preferred_input_order"] == "F"
diff --git a/python/cuml/tests/test_base.py b/python/cuml/tests/test_base.py
index f2f7d9e516..dc70c8cf22 100644
--- a/python/cuml/tests/test_base.py
+++ b/python/cuml/tests/test_base.py
@@ -137,7 +137,7 @@ def get_param_doc(param_doc_obj, name: str):
         # Ensure the default values are the same
         assert param.default == klass_param.default
 
-        # Make sure we arent accidentally a *args or **kwargs
+        # Make sure we aren't accidentally a *args or **kwargs
         assert (
             klass_param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
             or klass_param.kind == inspect.Parameter.KEYWORD_ONLY
diff --git a/python/cuml/tests/test_linear_svm.py b/python/cuml/tests/test_linear_svm.py
index a1f4b40183..d0e82b7ae9 100644
--- a/python/cuml/tests/test_linear_svm.py
+++ b/python/cuml/tests/test_linear_svm.py
@@ -41,15 +41,15 @@
 
 def good_enough(myscore: float, refscore: float, training_size: int):
     myerr = 1.0 - myscore
-    referr = 1.0 - refscore
+    refer = 1.0 - refscore
     # Extra discount for uncertainty based on the training data.
     # Totally empirical; for <10 samples, the error is allowed
     # to be ~50%, which is a total randomness. But this is ok,
     # since we don't expect the model to be trained from this few
     # samples.
     c = (10000 + training_size) / (100 + 5 * training_size)
-    thresh_rel = referr * (1 + ERROR_TOLERANCE_REL * c)
-    thresh_abs = referr + ERROR_TOLERANCE_ABS * c
+    thresh_rel = refer * (1 + ERROR_TOLERANCE_REL * c)
+    thresh_abs = refer + ERROR_TOLERANCE_ABS * c
     good_rel = myerr <= thresh_rel
     good_abs = myerr <= thresh_abs
     assert good_rel or good_abs, (
diff --git a/python/cuml/tests/test_metrics.py b/python/cuml/tests/test_metrics.py
index eb0ee21219..5e8d8bb7c4 100644
--- a/python/cuml/tests/test_metrics.py
+++ b/python/cuml/tests/test_metrics.py
@@ -1047,7 +1047,7 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
     S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
 
-    # Compare single and double inputs to eachother
+    # Compare single and double inputs to each other
     S = pairwise_distances(X, metric=metric)
     S2 = pairwise_distances(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
diff --git a/python/cuml/tests/test_random_forest.py b/python/cuml/tests/test_random_forest.py
index 5cc5bfdd99..699d83923a 100644
--- a/python/cuml/tests/test_random_forest.py
+++ b/python/cuml/tests/test_random_forest.py
@@ -1191,7 +1191,7 @@ def test_rf_host_memory_leak(large_clf, estimator_type):
         gc.collect()
         final_mem = process.memory_info().rss
 
-    # Some tiny allocations may occur, but we shuld not leak
+    # Some tiny allocations may occur, but we should not leak
     # without bounds, which previously happened
     assert (final_mem - initial_baseline_mem) < 2e6
 
diff --git a/python/cuml/tests/test_svm.py b/python/cuml/tests/test_svm.py
index 978b4ab0a0..a0f82f5dc5 100644
--- a/python/cuml/tests/test_svm.py
+++ b/python/cuml/tests/test_svm.py
@@ -464,7 +464,7 @@ def test_svm_memleak(
     tmp.fit(X_train, y_train)
     ms = get_memsize(tmp)
     print(
-        "Memory consumtion of SVC object is {} MiB".format(
+        "Memory consumption of SVC object is {} MiB".format(
             ms / (1024 * 1024.0)
         )
     )
diff --git a/python/cuml/tests/test_text_feature_extraction.py b/python/cuml/tests/test_text_feature_extraction.py
index 0e74887fc7..f19e7fc55a 100644
--- a/python/cuml/tests/test_text_feature_extraction.py
+++ b/python/cuml/tests/test_text_feature_extraction.py
@@ -483,7 +483,7 @@ def test_vectorizer_empty_token_case():
     """
     We ignore empty tokens right now but sklearn treats them as a character
     we might want to look into this more but
-    this should not be a concern for most piplines
+    this should not be a concern for most pipelines
     """
     corpus = [
         "a b ",
diff --git a/python/cuml/tests/test_train_test_split.py b/python/cuml/tests/test_train_test_split.py
index 9b64160d7a..b6dd4d7847 100644
--- a/python/cuml/tests/test_train_test_split.py
+++ b/python/cuml/tests/test_train_test_split.py
@@ -413,7 +413,7 @@ def test_stratified_binary_classification():
         ]
     )
 
-    # Needs to fail when we have just 1 occurence of a label
+    # Needs to fail when we have just 1 occurrence of a label
     y = cp.array([0, 0, 0, 0, 1])
     with pytest.raises(ValueError):
         train_test_split(X, y, train_size=0.75, stratify=y, shuffle=True)
diff --git a/python/cuml/thirdparty_adapters/adapters.py b/python/cuml/thirdparty_adapters/adapters.py
index 1140ecf79b..8963a7a497 100644
--- a/python/cuml/thirdparty_adapters/adapters.py
+++ b/python/cuml/thirdparty_adapters/adapters.py
@@ -365,7 +365,7 @@ def _masked_column_median(arr, masked_value):
     n_elems = arr.shape[0] - count_missing_values
 
     # If no elements remain after removing missing value, median for
-    # that colum is nan
+    # that column is nan
     nan_cols = cp.logical_or(nan_cols, n_elems <= 0)
 
     col_index = cp.arange(arr_sorted.shape[1])
diff --git a/python/cuml/tsa/batched_lbfgs.py b/python/cuml/tsa/batched_lbfgs.py
index 38c7f66cd4..9a7544e4b6 100644
--- a/python/cuml/tsa/batched_lbfgs.py
+++ b/python/cuml/tsa/batched_lbfgs.py
@@ -188,7 +188,7 @@ def fprime_f(x):
                 task_str = task[ib].tobytes()
                 task_str_strip = task[ib].tobytes().strip(b"\x00").strip()
                 if task_str.startswith(b"FG"):
-                    # needs function evalation
+                    # needs function evaluation
                     f[ib] = fk[ib]
                     g[ib] = gk[ib * n : (ib + 1) * n]
                 elif task_str.startswith(b"NEW_X"):
diff --git a/wiki/cpp/DEVELOPER_GUIDE.md b/wiki/cpp/DEVELOPER_GUIDE.md
index d30910478f..13fe035b80 100644
--- a/wiki/cpp/DEVELOPER_GUIDE.md
+++ b/wiki/cpp/DEVELOPER_GUIDE.md
@@ -318,7 +318,7 @@ void foo(const raft::handle_t& h, ..., cudaStream_t stream )
     ...
 }
 ```
-If thrust 1.9.4 or later is avaiable for use in cuML a similar allocator can be provided for `thrust::device_vector`.
+If thrust 1.9.4 or later is available for use in cuML a similar allocator can be provided for `thrust::device_vector`.
 
 ### <a name="allocationsthrust"></a>Using Thrust
 To ensure that thrust algorithms allocate temporary memory via the provided device memory allocator, use the `ML::thrustAllocatorAdapter` available in `src/common/allocatorAdapter.hpp` with the `thrust::cuda::par` execution policy:
diff --git a/wiki/mnmg/Using_Infiniband_for_MNMG.md b/wiki/mnmg/Using_Infiniband_for_MNMG.md
index 11d6c924e6..4dcad64005 100644
--- a/wiki/mnmg/Using_Infiniband_for_MNMG.md
+++ b/wiki/mnmg/Using_Infiniband_for_MNMG.md
@@ -19,7 +19,7 @@ Install autogen if it's not already installed:
 sudo apt-get install autogen autoconf libtool
 ```
 
-Optionaly install `gdrcopy` for faster GPU-Network card data transfer: 
+Optionally install `gdrcopy` for faster GPU-Network card data transfer: 
 
 From the [ucx wiki](https://github.com/openucx/ucx/wiki/NVIDIA-GPU-Support), `gdrcopy` can be installed, and might be necessary, to enable faster GPU-Network card data transfer.
 
diff --git a/wiki/python/ESTIMATOR_GUIDE.md b/wiki/python/ESTIMATOR_GUIDE.md
index a3832f8def..0324001af2 100644
--- a/wiki/python/ESTIMATOR_GUIDE.md
+++ b/wiki/python/ESTIMATOR_GUIDE.md
@@ -86,7 +86,7 @@ At a high level, all cuML Estimators must:
          ]
    ```
 
-7. Implement the appropriate tags method if any of the [default tags](#estimator-tags-and-cuml-specific-tags) need to be overriden for the new estimator.
+7. Implement the appropriate tags method if any of the [default tags](#estimator-tags-and-cuml-specific-tags) need to be overridden for the new estimator.
 There are some convenience [Mixins](../../python/common/mixins.py), that the estimator can inherit, can be used for indicating the preferred order (column or row major) as well as for sparse input capability.
 
 If other tags are needed, they are static (i.e. don't change depending on the instantiated estimator), and more than one estimator will use them, then implement a new [Mixin](../../python/common/mixins.py), if the tag will be used by a single class then implement the `_more_static_tags` method:
@@ -272,12 +272,12 @@ def get_param_names(self):
 
 Scikit-learn introduced estimator tags in version 0.21, which are used to programmatically inspect the capabilities of estimators. These capabilities include items like sparse matrix support and the need for positive inputs, among other things. cuML estimators support _all_ of the tags defined by the Scikit-learn estimator [developer guide](https://scikit-learn.org/stable/developers/index.html), and will add support for any tag added there.
 
-Additionaly, some tags specific to cuML have been added. These tags may or may not be specific to GPU data types and can even apply outside of automated testing, such as allowing for the optimization of data generation. This can be useful for pipelines and HPO, among other things. These are:
+Additionally, some tags specific to cuML have been added. These tags may or may not be specific to GPU data types and can even apply outside of automated testing, such as allowing for the optimization of data generation. This can be useful for pipelines and HPO, among other things. These are:
 
 - `X_types_gpu` (default=['2darray'])
    Analogous to `X_types`, indicates what types of GPU objects an estimator can take. `2darray` includes GPU ndarray objects (like CuPy and Numba) and cuDF objects, since they are all processed the same by `input_utils`. `sparse` includes `CuPy` sparse arrays.
  - `preferred_input_order` (default=None)
-   One of ['F', 'C', None]. Whether an estimator "prefers" data in column-major ('F') or row-major ('C') contiguous memory layout. If different methods prefer different layouts or neither format is benefitial, then it is defined to `None` unless there is a good reason to chose either `F` or `C`. For example, all of `fit`, `predict`, etc. in an estimator use `F` but only `score` uses`C`.
+   One of ['F', 'C', None]. Whether an estimator "prefers" data in column-major ('F') or row-major ('C') contiguous memory layout. If different methods prefer different layouts or neither format is beneficial, then it is defined to `None` unless there is a good reason to chose either `F` or `C`. For example, all of `fit`, `predict`, etc. in an estimator use `F` but only `score` uses`C`.
 - `dynamic_tags` (default=False)
    Most estimators only need to define the tags statically, which facilitates the usage of tags in general. But some estimators might need to modify the values of a tag based on runtime attributes, so this tag reflects whether an estimator needs to do that. This tag value is automatically set by the `Base` estimator class if an Estimator has defined the `_more_tags` instance method.
 
@@ -304,7 +304,7 @@ Any array-like attribute stored in an estimator needs to be convertible to the u
 
 The `CumlArrayDescriptor` behaves different when accessed internally (from within one of `cuml`'s functions) vs. externally (for user code outside the cuml module). Internally, it behaves exactly like a normal attribute and will return the previous value set. Externally, the array will get converted to the user's desired output type lazily and repeated conversion will be cached.
 
-Performing the arrray conversion lazily (i.e. converting the input array to the desired output type, only when the attribute it read from for the first time) can greatly help reduce memory consumption, but can have unintended impacts the developers should be aware of. For example, benchmarking should take into account the lazy evaluation and ensure the array conversion is included in any profiling.
+Performing the array conversion lazily (i.e. converting the input array to the desired output type, only when the attribute it read from for the first time) can greatly help reduce memory consumption, but can have unintended impacts the developers should be aware of. For example, benchmarking should take into account the lazy evaluation and ensure the array conversion is included in any profiling.
 
 #### Defining Array-Like Attributes
 
@@ -474,7 +474,7 @@ def predict(self, X) -> CumlArray:
 
 #### Option 2: Manual Estimator Method Decoration
 
-While the automatic converions from type annotations works for many estimator functions, sometimes its necessary to explicitly decorate an estimator method. This allows developers greater flexibility over the input argument, output type and output dtype.
+While the automatic conversions from type annotations works for many estimator functions, sometimes its necessary to explicitly decorate an estimator method. This allows developers greater flexibility over the input argument, output type and output dtype.
 
 Which decorator to use for an estimator function is determined by 2 factors:
 

From f9ad96d65057d2a9cebc0b36d3a486aa903b262c Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 12:01:21 -0800
Subject: [PATCH 02/13] fix

---
 cpp/src/tsne/barnes_hut_kernels.cuh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/src/tsne/barnes_hut_kernels.cuh b/cpp/src/tsne/barnes_hut_kernels.cuh
index d8f4b9992f..5059c1a8f1 100644
--- a/cpp/src/tsne/barnes_hut_kernels.cuh
+++ b/cpp/src/tsne/barnes_hut_kernels.cuh
@@ -85,7 +85,7 @@ __global__ void Find_Normalization(value_t* restrict Z_norm, const value_idx N)
  * Figures the bounding boxes for every point in the embedding.
  */
 template <typename value_idx, typename value_t>
-__global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restrict started,
+__global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restrict startd,
                                                               value_idx* restrict childd,
                                                               value_t* restrict massd,
                                                               value_t* restrict posxd,
@@ -163,7 +163,7 @@ __global__ __launch_bounds__(THREADS1) void BoundingBoxKernel(value_idx* restric
     atomicExch(radiusd, fmaxf(maxx - minx, maxy - miny) * 0.5f + 1e-5f);
 
     massd[NNODES]  = -1.0f;
-    started[NNODES] = 0;
+    startd[NNODES] = 0;
     posxd[NNODES]  = (minx + maxx) * 0.5f;
     posyd[NNODES]  = (miny + maxy) * 0.5f;
 
@@ -333,7 +333,7 @@ __global__ __launch_bounds__(THREADS2) void TreeBuildingKernel(/* int *restrict
  * Clean more state vectors.
  */
 template <typename value_idx, typename value_t>
-__global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict started,
+__global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict startd,
                                                         value_t* restrict massd,
                                                         const value_idx NNODES,
                                                         const value_idx* restrict bottomd)
@@ -347,7 +347,7 @@ __global__ __launch_bounds__(1024, 1) void ClearKernel2(value_idx* restrict star
 #pragma unroll
   for (; k < NNODES; k += inc) {
     massd[k]  = -1.0f;
-    started[k] = -1;
+    startd[k] = -1;
   }
 }
 
@@ -493,7 +493,7 @@ __global__ __launch_bounds__(THREADS3,
 template <typename value_idx>
 __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restrict sortd,
                                                                 const value_idx* restrict countd,
-                                                                volatile value_idx* restrict started,
+                                                                volatile value_idx* restrict startd,
                                                                 value_idx* restrict childd,
                                                                 const value_idx NNODES,
                                                                 const value_idx N,
@@ -511,7 +511,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restr
     if (++limiter > NNODES) break;
 
     // Not a child so skip
-    if ((start = started[k]) < 0) continue;
+    if ((start = startd[k]) < 0) continue;
 
     int j = 0;
     for (int i = 0; i < 4; i++) {
@@ -524,7 +524,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restr
         }
         if (ch >= N) {
           // child is a cell
-          started[ch] = start;
+          startd[ch] = start;
           start += countd[ch];  // add #bodies in subtree
         } else if (start <= NNODES and start >= 0) {
           // child is a body

From 886a0db430fa8d4854bd54d9b9fd679fd02e81b3 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 12:12:14 -0800
Subject: [PATCH 03/13] More fixes

---
 .pre-commit-config.yaml                                |  2 +-
 pyproject.toml                                         |  2 +-
 .../_thirdparty/sklearn/preprocessing/_imputation.py   |  2 +-
 python/cuml/preprocessing/text/stem/porter_stemmer.py  | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1a4b2c87af..127c271689 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
           - id: codespell
             additional_dependencies: [tomli]
             args: ["--toml", "pyproject.toml"]
-            exclude: (?x)^(^CHANGELOG.md$)
+            exclude: (?x)^(.*stemmer.*|^CHANGELOG.md$)
     - repo: local
       hooks:
           - id: no-deprecationwarning
diff --git a/pyproject.toml b/pyproject.toml
index 7461b1ef18..10eac3a74a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.codespell]
 # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
 # this is only to allow you to run codespell interactively
-skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild,CHANGELOG.md,_stop_words.py"
+skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild,CHANGELOG.md,_stop_words.py,,*stemmer.*"
 # ignore short words, and typename parameters like OffsetT
 ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
 ignore-words-list = "inout,numer,startd,couldn"
diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
index 7bada72d4b..3fe160beac 100644
--- a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
+++ b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
@@ -582,7 +582,7 @@ def _get_missing_features_info(self, X):
             imputer_mask = sparse_constructor(
                 (mask, X.indices.copy(), X.indptr.copy()),
                 shape=X.shape, dtype=np.float32)
-            # temporary switch to using float32 as
+            # temporarily switch to using float32 as
             # cupy cannot operate with bool as of now
 
             if self.features == 'missing-only':
diff --git a/python/cuml/preprocessing/text/stem/porter_stemmer.py b/python/cuml/preprocessing/text/stem/porter_stemmer.py
index 382582a78d..b49ad4f04b 100644
--- a/python/cuml/preprocessing/text/stem/porter_stemmer.py
+++ b/python/cuml/preprocessing/text/stem/porter_stemmer.py
@@ -500,7 +500,7 @@ def _step3(self, word_str_ser, can_replace_mask=None):
         Step 3
 
             (m>0) ICATE ->  IC              triplicate     ->  triplic
-            (m>0) ACTIVE ->                  formative      ->  form
+            (m>0) ATIVE ->                  formative      ->  form
             (m>0) ALIZE ->  AL              formalize      ->  formal
             (m>0) ICITI ->  IC              electriciti    ->  electric
             (m>0) ICAL  ->  IC              electrical     ->  electric
@@ -515,7 +515,7 @@ def _step3(self, word_str_ser, can_replace_mask=None):
             word_str_ser,
             [
                 ("icate", "ic", has_positive_measure),
-                ("active", "", has_positive_measure),
+                ("ative", "", has_positive_measure),
                 ("alize", "al", has_positive_measure),
                 ("iciti", "ic", has_positive_measure),
                 ("ical", "ic", has_positive_measure),
@@ -543,8 +543,8 @@ def _step4(self, word_str_ser, can_replace_mask=None):
             (m>1) ENT   ->                  dependent      ->  depend
             (m>1 and (*S or *T)) ION ->     adoption       ->  adopt
             (m>1) OU    ->                  homologou      ->  homolog
-            (m>1) ISM   ->                  communism      ->  common
-            (m>1) ATE   ->                  activate       ->  active
+            (m>1) ISM   ->                  communism      ->  commun
+            (m>1) ATE   ->                  activate       ->  activ
             (m>1) ITI   ->                  angulariti     ->  angular
             (m>1) OUS   ->                  homologous     ->  homolog
             (m>1) IVE   ->                  effective      ->  effect
@@ -665,7 +665,7 @@ def _step5b(self, word_str_ser, can_replace_mask=None):
         Step 5b
 
             (m > 1 and *d and *L) -> single letter
-                                    control       ->  control
+                                    controll       ->  control
                                     roll           ->  roll
         """
 

From 9e545ec634fbf2400d96fa69b720b26469e356b0 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 12:13:04 -0800
Subject: [PATCH 04/13] more stemmer fixes

---
 python/cuml/tests/stemmer_tests/test_steps.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cuml/tests/stemmer_tests/test_steps.py b/python/cuml/tests/stemmer_tests/test_steps.py
index 37a1f689e3..19b136db02 100644
--- a/python/cuml/tests/stemmer_tests/test_steps.py
+++ b/python/cuml/tests/stemmer_tests/test_steps.py
@@ -237,8 +237,8 @@ def test_step4():
         "depend",
         "adopt",
         "homolog",
-        "common",
-        "active",
+        "commun",
+        "activ",
         "angular",
         "homolog",
         "effect",
@@ -274,7 +274,7 @@ def test_step5a():
 
 
 def test_step5b():
-    word_str_ser_ls = ["control", "roll"]
+    word_str_ser_ls = ["controll", "roll"]
     word_str_ser = cudf.Series(word_str_ser_ls)
     expect = ["control", "roll"]
 
@@ -283,7 +283,7 @@ def test_step5b():
     assert list(got.to_pandas().values) == expect
 
     # mask test
-    expect = ["control", "roll"]
+    expect = ["controll", "roll"]
     mask = cudf.Series([False, True])
     got = st._step5b(word_str_ser, mask)
     assert list(got.to_pandas().values) == expect

From 316e583270c7602299d8193a56b8ff0df3f60378 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 12:15:09 -0800
Subject: [PATCH 05/13] ignore stop words

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 127c271689..8654298d17 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
           - id: codespell
             additional_dependencies: [tomli]
             args: ["--toml", "pyproject.toml"]
-            exclude: (?x)^(.*stemmer.*|^CHANGELOG.md$)
+            exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
     - repo: local
       hooks:
           - id: no-deprecationwarning

From b572859eee65736d448d5982731aec774bd3cf1e Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 9 Mar 2023 19:24:50 -0800
Subject: [PATCH 06/13] Empty commit


From de2822fc23f907fcaaca8c9b2fac72026ba12edb Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Fri, 10 Mar 2023 09:16:17 -0800
Subject: [PATCH 07/13] Empty commit


From f0a35053045afff8ed7874f0e0ebde9d54b287cf Mon Sep 17 00:00:00 2001
From: Ben Frederickson <github@benfrederickson.com>
Date: Mon, 13 Mar 2023 10:26:58 -0700
Subject: [PATCH 08/13] Apply suggestions from code review

Co-authored-by: Carl Simon Adorf <sadorf@nvidia.com>
---
 python/cuml/cluster/hdbscan/hdbscan.pyx         | 4 ++--
 python/cuml/ensemble/randomforestclassifier.pyx | 2 +-
 python/cuml/explainer/kernel_shap.pyx           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cluster/hdbscan/hdbscan.pyx
index 5c40295080..ba52652ef4 100644
--- a/python/cuml/cluster/hdbscan/hdbscan.pyx
+++ b/python/cuml/cluster/hdbscan/hdbscan.pyx
@@ -457,8 +457,8 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         A score of how persistent each cluster is. A score of 1.0 represents
         a perfectly stable cluster that persists over all distance scales,
         while a score of 0.0 represents a perfectly ephemeral cluster. These
-        scores can be gauge the relative coherence of the clusters output
-        by the algorithm.
+        scores can be used to gauge the relative coherence of the 
+        clusters output by the algorithm.
 
     condensed_tree_ : CondensedTree object
         The condensed tree produced by HDBSCAN. The object has methods
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 11275109e7..1a08489c0e 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -209,7 +209,7 @@ class RandomForestClassifier(BaseRandomForestModel,
            number of samples for each split.
     min_impurity_decrease : float (default = 0.0)
         Minimum decrease in impurity required for
-        node to be spilt.
+        node to be split.
     max_batch_size : int (default = 4096)
         Maximum number of nodes that can be processed in a given batch.
     random_state : int (default = None)
diff --git a/python/cuml/explainer/kernel_shap.pyx b/python/cuml/explainer/kernel_shap.pyx
index 2e23eaf153..b11cb1cbc8 100644
--- a/python/cuml/explainer/kernel_shap.pyx
+++ b/python/cuml/explainer/kernel_shap.pyx
@@ -404,7 +404,7 @@ class KernelExplainer(SHAPBase):
                 axis=1
             )
 
-            # we needed to do l1 regularization if user left it as auto and we
+            # we need to do l1 regularization if user left it as auto and we
             # evaluated less than 20% of the space, or if the user set it
             # and we did not evaluate all the space (i.e. nsamples_random == 0)
             nonzero_inds = None

From 5739cf18c2ffb86f89773c9e47c7aa6ed1a14bfb Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 13 Mar 2023 10:28:37 -0700
Subject: [PATCH 09/13] Revert refer back to referr

---
 pyproject.toml                       | 2 +-
 python/cuml/tests/test_linear_svm.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 10eac3a74a..bbefc424c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,6 @@
 skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild,CHANGELOG.md,_stop_words.py,,*stemmer.*"
 # ignore short words, and typename parameters like OffsetT
 ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
-ignore-words-list = "inout,numer,startd,couldn"
+ignore-words-list = "inout,numer,startd,couldn,referr"
 builtin = "clear"
 quiet-level = 3
diff --git a/python/cuml/tests/test_linear_svm.py b/python/cuml/tests/test_linear_svm.py
index d0e82b7ae9..a1f4b40183 100644
--- a/python/cuml/tests/test_linear_svm.py
+++ b/python/cuml/tests/test_linear_svm.py
@@ -41,15 +41,15 @@
 
 def good_enough(myscore: float, refscore: float, training_size: int):
     myerr = 1.0 - myscore
-    refer = 1.0 - refscore
+    referr = 1.0 - refscore
     # Extra discount for uncertainty based on the training data.
     # Totally empirical; for <10 samples, the error is allowed
     # to be ~50%, which is a total randomness. But this is ok,
     # since we don't expect the model to be trained from this few
     # samples.
     c = (10000 + training_size) / (100 + 5 * training_size)
-    thresh_rel = refer * (1 + ERROR_TOLERANCE_REL * c)
-    thresh_abs = refer + ERROR_TOLERANCE_ABS * c
+    thresh_rel = referr * (1 + ERROR_TOLERANCE_REL * c)
+    thresh_abs = referr + ERROR_TOLERANCE_ABS * c
     good_rel = myerr <= thresh_rel
     good_abs = myerr <= thresh_abs
     assert good_rel or good_abs, (

From 70682aca9bf2284b3f7f0a7cbcaa2f17179bbdf1 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 13 Mar 2023 10:29:40 -0700
Subject: [PATCH 10/13] codereview suggestion

---
 cpp/src/tsne/cannylab/bh.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/tsne/cannylab/bh.cu b/cpp/src/tsne/cannylab/bh.cu
index 4fd2625fc6..d280ae6f76 100644
--- a/cpp/src/tsne/cannylab/bh.cu
+++ b/cpp/src/tsne/cannylab/bh.cu
@@ -984,4 +984,4 @@ int main(int argc, char* argv[])
   cudaFree(minl);
 
   return 0;
-}
+}
\ No newline at end of file

From 5641c35feda0a75ae7b2d181d7df3e84adb95b5a Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 13 Mar 2023 10:33:56 -0700
Subject: [PATCH 11/13] Add docs to the contributing guide

and comments to the codespell config describing what the options do
---
 CONTRIBUTING.md | 6 ++++++
 pyproject.toml  | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8f59c15780..7c20a3e9e8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -88,6 +88,11 @@ To skip the checks temporarily, use `git commit --no-verify` or its short form
 _Note_: If the auto-formatters' changes affect each other, you may need to go
 through multiple iterations of `git commit` and `git add -u`.
 
+cuML also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
+mistakes, and this check is run as part of the pre-commit hook. To apply the suggested spelling
+fixes, you can run  `codespell -i 3 -w .` from the command-line in the cuML root directory.
+This will bring up an interactive prompt to select which spelling fixes to apply.
+
 ### Summary of pre-commit hooks
 
 The pre-commit hooks configured for this repository consist of a number of
@@ -102,6 +107,7 @@ please see the `.pre-commit-config.yaml` file.
 - _`#include` syntax checker_: Ensures consistent syntax for C++ `#include` statements.
 - _Copyright header checker and auto-formatter_: Ensures the copyright headers
   of files are up-to-date and in the correct format.
+- `codespell`: Checks for spelling mistakes
 
 ### Managing PR labels
 
diff --git a/pyproject.toml b/pyproject.toml
index bbefc424c1..c68ea7ba4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,5 +5,7 @@ skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild,CHANG
 # ignore short words, and typename parameters like OffsetT
 ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
 ignore-words-list = "inout,numer,startd,couldn,referr"
+# use the 'clear' dictionary for unambiguous spelling mistakes
 builtin = "clear"
+# disable warnings about binary files and wrong encoding
 quiet-level = 3

From 8aefb40a1881a65790e215b57eb6ab431334fbf3 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 13 Mar 2023 15:19:52 -0700
Subject: [PATCH 12/13] Add some docs on how to exclude codespell suggestions

---
 CONTRIBUTING.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7c20a3e9e8..b3818444f9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -93,6 +93,11 @@ mistakes, and this check is run as part of the pre-commit hook. To apply the sug
 fixes, you can run  `codespell -i 3 -w .` from the command-line in the cuML root directory.
 This will bring up an interactive prompt to select which spelling fixes to apply.
 
+If you want to ignore errors highlighted by codespell you can:
+ * Add the word to the ignore-words-list in pyproject.toml, to exclude for all of cuML
+ * Exclude the entire file from spellchecking, by adding to the `exclude` regex in .pre-commit-config.yaml
+ * Ignore only specific lines as shown in https://github.com/codespell-project/codespell/issues/1212#issuecomment-654191881
+
 ### Summary of pre-commit hooks
 
 The pre-commit hooks configured for this repository consist of a number of

From 352106fe4c5cd29d841ec66cd848206777e60d18 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 13 Mar 2023 21:13:06 -0700
Subject: [PATCH 13/13] retrigger ci