diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index abd70a8eec..5156a91ef6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: hooks: - id: cython-lint - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/BUILD.md b/BUILD.md index acb1552139..e2d54310af 100644 --- a/BUILD.md +++ b/BUILD.md @@ -11,7 +11,7 @@ To install cuML from source, ensure the following dependencies are met: 5. Cython (>= 0.29) 6. gcc (>= 9.0) 7. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment. -8. clang-format (= 16.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. +8. clang-format (= 16.0.6) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. 9. NCCL (>=2.4) 10. UCX [optional] (>= 1.7) - enables point-to-point messaging in the cuML standard communicator. This is necessary for many multi-node multi-GPU cuML algorithms to function. diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml index d833ec0f2d..472bf1ce6b 100644 --- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml +++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==15.0.7 -- clang==15.0.7 +- clang-tools==16.0.6 +- clang==16.0.6 - cmake>=3.26.4 - cuda-version=11.8 - cudatoolkit diff --git a/cpp/README.md b/cpp/README.md index 293aecbe14..fc85d23288 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -18,7 +18,7 @@ The `test` directory has subdirectories that reflect this distinction between th 1. cmake (>= 3.26.4) 2. CUDA (>= 11.0) 3. gcc (>=9.3.0) -4. clang-format (= 16.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. +4. clang-format (= 16.0.6) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=16` and `clang-tools=16` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. ### Building cuML: diff --git a/cpp/bench/sg/rf_classifier.cu b/cpp/bench/sg/rf_classifier.cu index 9f3a9b1a7d..7bbbcc49d7 100644 --- a/cpp/bench/sg/rf_classifier.cu +++ b/cpp/bench/sg/rf_classifier.cu @@ -85,11 +85,11 @@ std::vector getInputs() std::vector out; Params p; p.data.rowMajor = false; - p.blobs = {10.0, // cluster_std - false, // shuffle - -10.0, // center_box_min - 10.0, // center_box_max - 2152953ULL}; // seed + p.blobs = {10.0, // cluster_std + false, // shuffle + -10.0, // center_box_min + 10.0, // center_box_max + 2152953ULL}; // seed p.rf = set_rf_params(10, /*max_depth */ (1 << 20), /* max_leaves */ diff --git a/cpp/bench/sg/rf_regressor.cu b/cpp/bench/sg/rf_regressor.cu index 20592b914a..bc259f391e 100644 --- a/cpp/bench/sg/rf_regressor.cu +++ b/cpp/bench/sg/rf_regressor.cu @@ -87,7 +87,7 @@ std::vector getInputs() p.regression = {.shuffle = true, // Better to shuffle when n_informative < ncols .effective_rank = -1, // dataset generation will be faster .bias = 4.5, - .tail_strength = 0.5, // unused when effective_rank = -1 + .tail_strength = 0.5, // unused when effective_rank = -1 .noise = 1.0, .seed = 12345ULL}; diff --git a/cpp/bench/sg/svr.cu b/cpp/bench/sg/svr.cu index cb589a007b..41ecd7ca9f 100644 --- a/cpp/bench/sg/svr.cu +++ b/cpp/bench/sg/svr.cu @@ -91,9 +91,9 @@ std::vector> getInputs() p.regression.shuffle = true; // better to shuffle when n_informative < ncols p.regression.seed = 1378ULL; - p.regression.effective_rank = -1; // dataset generation will be faster + p.regression.effective_rank = -1; // dataset generation will be faster p.regression.bias = 0; - p.regression.tail_strength = 0.5; // unused when effective_rank = -1 + p.regression.tail_strength = 0.5; // unused when effective_rank = -1 p.regression.noise = 1; // SvmParameter{C, cache_size, max_iter, nochange_steps, tol, verbosity, diff --git a/cpp/include/cuml/experimental/fil/detail/node.hpp b/cpp/include/cuml/experimental/fil/detail/node.hpp index 72ded859df..d3024593c9 100644 --- a/cpp/include/cuml/experimental/fil/detail/node.hpp +++ b/cpp/include/cuml/experimental/fil/detail/node.hpp @@ -171,7 +171,7 @@ struct alignas( if constexpr (layout == tree_layout::depth_first) { return offset_type{1} + condition * (aligned_data.inner_data.distant_offset - offset_type{1}); } else if constexpr (layout == tree_layout::breadth_first) { - return condition* offset_type{1} + (aligned_data.inner_data.distant_offset - offset_type{1}); + return condition * offset_type{1} + (aligned_data.inner_data.distant_offset - offset_type{1}); } else { static_assert(layout == tree_layout::depth_first); } diff --git a/cpp/include/cuml/genetic/node.h b/cpp/include/cuml/genetic/node.h index c9b0629005..5e579984e9 100644 --- a/cpp/include/cuml/genetic/node.h +++ b/cpp/include/cuml/genetic/node.h @@ -86,7 +86,7 @@ struct node { tanh, unary_end = tanh, // keep this to be the last unary function in the list functions_end = unary_end, - }; // enum type + }; // enum type /** * @brief Default constructor for node diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 67189573f9..678534b899 100755 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -25,7 +25,7 @@ import tomli -EXPECTED_VERSION = "15.0.7" +EXPECTED_VERSION = "16.0.6" VERSION_REGEX = re.compile(r" LLVM version ([0-9.]+)") GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") SPACES = re.compile(r"\s+") diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh index 453110c099..fef69b12f7 100644 --- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh +++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh @@ -285,7 +285,7 @@ struct Builder { d_wsize += calculateAlignedBytes(sizeof(IdxT) * max_batch * dataset.n_sampled_cols); // colids // all nodes in the tree - h_wsize += // h_workload_info + h_wsize += // h_workload_info calculateAlignedBytes(sizeof(WorkloadInfo) * max_blocks_dimx); h_wsize += calculateAlignedBytes(sizeof(SplitT) * max_batch); // splits diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 6d7c611d6f..e0a1bb778d 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -17,10 +17,10 @@ /** @file fil.cu fil.cu implements the forest data types (dense and sparse), including their creation and prediction (the main inference kernel is defined in infer.cu). */ -#include "common.cuh" // for predict_params, storage, storage -#include "internal.cuh" // for cat_sets_device_owner, categorical_sets, output_t, +#include "common.cuh" // for predict_params, storage, storage +#include "internal.cuh" // for cat_sets_device_owner, categorical_sets, output_t, -#include // for algo_t, +#include // for algo_t, #include // for ASSERT #include // for handle_t @@ -28,9 +28,9 @@ creation and prediction (the main inference kernel is defined in infer.cu). */ #include // for device_uvector #include // for host_vector -#include // for expf -#include // for size_t -#include // for uint8_t +#include // for expf +#include // for size_t +#include // for uint8_t namespace ML { namespace fil { diff --git a/cpp/src/fil/treelite_import.cu b/cpp/src/fil/treelite_import.cu index 80d7f2c0f1..905e282e84 100644 --- a/cpp/src/fil/treelite_import.cu +++ b/cpp/src/fil/treelite_import.cu @@ -22,7 +22,7 @@ #include // for CUML_LOG_WARN #include // for algo_t, from_treelite, storage_type_repr, storage_type_t, treelite_params_t -#include // for fowler_noll_vo_fingerprint64_32 +#include // for fowler_noll_vo_fingerprint64_32 #include // for ASSERT #include // for handle_t @@ -32,18 +32,18 @@ #include // for ModelHandle #include // for Tree, Model, ModelImpl, ModelParam -#include // for omp - -#include // for std::max -#include // for std::bitset -#include // for NAN -#include // for std::size_t -#include // for uint8_t -#include // for ios, stringstream -#include // for std::numeric_limits -#include // for std::stack -#include // for std::string -#include // for std::is_same +#include // for omp + +#include // for std::max +#include // for std::bitset +#include // for NAN +#include // for std::size_t +#include // for uint8_t +#include // for ios, stringstream +#include // for std::numeric_limits +#include // for std::stack +#include // for std::string +#include // for std::is_same namespace ML { namespace fil { diff --git a/cpp/src/genetic/fitness.cuh b/cpp/src/genetic/fitness.cuh index 82d13203f3..78593ce956 100644 --- a/cpp/src/genetic/fitness.cuh +++ b/cpp/src/genetic/fitness.cuh @@ -63,18 +63,18 @@ void weightedPearson(const raft::handle_t& h, rmm::device_uvector y_tmp(n_samples, stream); rmm::device_uvector x_tmp(n_samples * n_progs, stream); - rmm::device_scalar y_mu(stream); // output mean - rmm::device_uvector x_mu(n_progs, stream); // predicted output mean + rmm::device_scalar y_mu(stream); // output mean + rmm::device_uvector x_mu(n_progs, stream); // predicted output mean rmm::device_uvector y_diff(n_samples, stream); // normalized output rmm::device_uvector x_diff(n_samples * n_progs, - stream); // normalized predicted output + stream); // normalized predicted output - rmm::device_uvector y_std(1, stream); // output stddev + rmm::device_uvector y_std(1, stream); // output stddev rmm::device_uvector x_std(n_progs, - stream); // predicted output stddev + stream); // predicted output stddev - rmm::device_scalar dWS(stream); // sample weight sum + rmm::device_scalar dWS(stream); // sample weight sum math_t N = (math_t)n_samples; // Sum of weights diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh index a63f4ce0a2..d1e6ef37c6 100644 --- a/cpp/src/glm/qn/glm_base.cuh +++ b/cpp/src/glm/qn/glm_base.cuh @@ -192,7 +192,7 @@ struct GLMBase : GLMDims { cudaStream_t stream, bool initGradZero = true) { - Loss* loss = static_cast(this); // static polymorphism + Loss* loss = static_cast(this); // static polymorphism linearFwd(handle, Zb, Xb, W); // linear part: forward pass loss->getLossAndDZ(loss_val, Zb, yb, stream); // loss specific part diff --git a/cpp/src/glm/qn/qn_util.cuh b/cpp/src/glm/qn/qn_util.cuh index 2af4522915..f6e7ca9950 100644 --- a/cpp/src/glm/qn/qn_util.cuh +++ b/cpp/src/glm/qn/qn_util.cuh @@ -53,19 +53,19 @@ enum OPT_RETCODE { template class LBFGSParam { public: - int m; // lbfgs memory limit - T epsilon; // controls convergence - int past; // lookback for function value based convergence test - T delta; // controls fun val based conv test + int m; // lbfgs memory limit + T epsilon; // controls convergence + int past; // lookback for function value based convergence test + T delta; // controls fun val based conv test int max_iterations; int linesearch; // see enum above int max_linesearch; - T min_step; // min. allowed step length - T max_step; // max. allowed step length - T ftol; // line search tolerance - T wolfe; // wolfe parameter - T ls_dec; // line search decrease factor - T ls_inc; // line search increase factor + T min_step; // min. allowed step length + T max_step; // max. allowed step length + T ftol; // line search tolerance + T wolfe; // wolfe parameter + T ls_dec; // line search decrease factor + T ls_inc; // line search increase factor public: LBFGSParam() diff --git a/cpp/src/knn/knn_opg_common.cuh b/cpp/src/knn/knn_opg_common.cuh index 86bb5564a1..a6bbfc54d7 100644 --- a/cpp/src/knn/knn_opg_common.cuh +++ b/cpp/src/knn/knn_opg_common.cuh @@ -91,9 +91,9 @@ struct opg_knn_param { size_t batch_size = 0; /**< Batch size */ bool verbose; /**< verbose */ - std::size_t n_outputs = 0; /**< Number of outputs per query (cl&re) */ - std::vector>* y; /**< Labels input array (cl&re) */ - std::vector*>* out; /**< KNN outputs output array (cl&re) */ + std::size_t n_outputs = 0; /**< Number of outputs per query (cl&re) */ + std::vector>* y; /**< Labels input array (cl&re) */ + std::vector*>* out; /**< KNN outputs output array (cl&re) */ std::vector* n_unique = nullptr; /**< Number of unique labels (classification) */ std::vector* uniq_labels = nullptr; /**< Unique labels (classification) */ diff --git a/cpp/src/svm/results.cuh b/cpp/src/svm/results.cuh index b8f0dcdfb1..b08fdeeae7 100644 --- a/cpp/src/svm/results.cuh +++ b/cpp/src/svm/results.cuh @@ -306,7 +306,7 @@ class Results { SvmType svmType; //!< SVM problem type: SVC or SVR int n_train; //!< number of training vectors (including duplicates for SVR) - const int TPB = 256; // threads per block + const int TPB = 256; // threads per block // Temporary variables used by cub in GetResults rmm::device_scalar d_num_selected; rmm::device_scalar d_val_reduced; diff --git a/cpp/src/svm/smosolver.cuh b/cpp/src/svm/smosolver.cuh index 43c482e76d..964935c69b 100644 --- a/cpp/src/svm/smosolver.cuh +++ b/cpp/src/svm/smosolver.cuh @@ -471,7 +471,7 @@ class SmoSolver { rmm::device_uvector f; //!< optimality indicator vector rmm::device_uvector y_label; //!< extra label for regression - rmm::device_uvector C_vec; //!< penalty parameter vector + rmm::device_uvector C_vec; //!< penalty parameter vector // Buffers for the working set [n_ws] //! change in alpha parameter during a blocksolve step @@ -490,7 +490,7 @@ class SmoSolver { raft::distance::kernels::KernelType kernel_type; float cache_size; //!< size of kernel cache in MiB - SvmType svmType; ///!< Type of the SVM problem to solve + SvmType svmType; ///!< Type of the SVM problem to solve // Variables to track convergence of training math_t diff_prev; diff --git a/cpp/src/tsne/barnes_hut_kernels.cuh b/cpp/src/tsne/barnes_hut_kernels.cuh index 67e3e46a7e..5059c1a8f1 100644 --- a/cpp/src/tsne/barnes_hut_kernels.cuh +++ b/cpp/src/tsne/barnes_hut_kernels.cuh @@ -542,7 +542,7 @@ __global__ __launch_bounds__(THREADS4, FACTOR4) void SortKernel(value_idx* restr */ template __global__ __launch_bounds__( - THREADS5, 1) void RepulsionKernel( /* int *restrict errd, */ + THREADS5, 1) void RepulsionKernel(/* int *restrict errd, */ const float theta, const float epssqd, // correction for zero distance const value_idx* restrict sortd, diff --git a/cpp/src/tsne/cannylab/bh.cu b/cpp/src/tsne/cannylab/bh.cu index 59ac35ae33..a7b3136a6d 100644 --- a/cpp/src/tsne/cannylab/bh.cu +++ b/cpp/src/tsne/cannylab/bh.cu @@ -47,7 +47,7 @@ Emerald Edition, pp. 75-92. January 2011. // threads per block #define THREADS1 1024 /* must be a power of 2 */ #define THREADS2 1024 -#define THREADS3 768 /* shared-memory limited on some devices */ +#define THREADS3 768 /* shared-memory limited on some devices */ #define THREADS4 1024 #define THREADS5 1024 #define THREADS6 1024 @@ -562,7 +562,7 @@ __global__ __launch_bounds__(THREADS4, } } } - k -= dec; // move on to next cell + k -= dec; // move on to next cell } __syncthreads(); // optional barrier for performance } diff --git a/cpp/src/tsne/exact_kernels.cuh b/cpp/src/tsne/exact_kernels.cuh index 7ccb6e279d..e7a92ac2ec 100644 --- a/cpp/src/tsne/exact_kernels.cuh +++ b/cpp/src/tsne/exact_kernels.cuh @@ -290,8 +290,8 @@ __global__ void repulsive_kernel(const value_t* restrict Y, value_t* restrict Z_sum2, const value_idx n, const value_idx dim, - const value_t df_power, // -(df + 1)/2) - const value_t recp_df) // 1 / df + const value_t df_power, // -(df + 1)/2) + const value_t recp_df) // 1 / df { const auto j = (blockIdx.x * blockDim.x) + threadIdx.x; // for every item in row const auto i = (blockIdx.y * blockDim.y) + threadIdx.y; // for every row diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.cuh b/cpp/src/umap/fuzzy_simpl_set/naive.cuh index 0ac61f05b2..f674b0ba0f 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.cuh +++ b/cpp/src/umap/fuzzy_simpl_set/naive.cuh @@ -194,7 +194,7 @@ __global__ void compute_membership_strength_kernel( const value_idx* knn_indices, const float* knn_dists, // nn outputs const value_t* sigmas, - const value_t* rhos, // continuous dists to nearest neighbors + const value_t* rhos, // continuous dists to nearest neighbors value_t* vals, int* rows, int* cols, // result coo diff --git a/cpp/src_prims/timeSeries/arima_helpers.cuh b/cpp/src_prims/timeSeries/arima_helpers.cuh index f11909f1ff..fff189878c 100644 --- a/cpp/src_prims/timeSeries/arima_helpers.cuh +++ b/cpp/src_prims/timeSeries/arima_helpers.cuh @@ -158,7 +158,7 @@ __global__ void _undiff_kernel(DataT* d_fc, for (int i = 0; i < num_steps; i++) { if (!double_diff) { // One simple or seasonal difference b_fc[i] += _select_read(b_in, n_in, b_fc, i - s0); - } else { // Two differences (simple, seasonal or both) + } else { // Two differences (simple, seasonal or both) DataT fc_acc = -_select_read(b_in, n_in, b_fc, i - s0 - s1); fc_acc += _select_read(b_in, n_in, b_fc, i - s0); fc_acc += _select_read(b_in, n_in, b_fc, i - s1); diff --git a/cpp/test/prims/batched/csr.cu b/cpp/test/prims/batched/csr.cu index 1ca7748a72..f092b7644e 100644 --- a/cpp/test/prims/batched/csr.cu +++ b/cpp/test/prims/batched/csr.cu @@ -38,7 +38,7 @@ template struct CSRInputs { CSROperation operation; int batch_size; - int m; // Dimensions of A + int m; // Dimensions of A int n; int nnz; // Number of non-zero elements in A int p; // Dimensions of B or x diff --git a/cpp/test/sg/fil_child_index_test.cu b/cpp/test/sg/fil_child_index_test.cu index b9b740f29f..55b6a19f4a 100644 --- a/cpp/test/sg/fil_child_index_test.cu +++ b/cpp/test/sg/fil_child_index_test.cu @@ -206,7 +206,7 @@ std::vector params = { CHILD_INDEX_TEST_PARAMS(node = NODE(def_left = true), input = QNAN, correct = 1), // !def_left CHILD_INDEX_TEST_PARAMS(node = NODE(thresh = QNAN), input = QNAN, correct = 2), // !def_left CHILD_INDEX_TEST_PARAMS( - node = NODE(def_left = true, thresh = QNAN), input = QNAN, correct = 1), // !def_left + node = NODE(def_left = true, thresh = QNAN), input = QNAN, correct = 1), // !def_left CHILD_INDEX_TEST_PARAMS(node = NODE(thresh = QNAN), input = 0.0, correct = 1), // val !>= thresh CHILD_INDEX_TEST_PARAMS( node = NODE(thresh = 0.0), parent_node_idx = 1, input = -INF, correct = 3), @@ -224,7 +224,7 @@ std::vector params = { node = NODE(thresh = 0.0), parent_node_idx = 4, input = -INF, correct = 9), CHILD_INDEX_TEST_PARAMS( node = NODE(thresh = 0.0), parent_node_idx = 4, input = 0.0, correct = 10), - CHILD_INDEX_TEST_PARAMS(parent_node_idx = 4, input = QNAN, correct = 10), // !def_left + CHILD_INDEX_TEST_PARAMS(parent_node_idx = 4, input = QNAN, correct = 10), // !def_left CHILD_INDEX_TEST_PARAMS( node = NODE(def_left = true), input = QNAN, parent_node_idx = 4, correct = 9), // !def_left // cannot match ( < 0 and realistic fid_num_cats) diff --git a/dependencies.yaml b/dependencies.yaml index e1cb756197..ef3716f817 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -95,8 +95,8 @@ dependencies: common: - output_types: [conda, requirements] packages: - - clang==15.0.7 - - clang-tools==15.0.7 + - clang==16.0.6 + - clang-tools==16.0.6 - ninja - tomli common_build: