rapidsai · rapids-bot · Aug 10, 2021 · Jul 21, 2021 · Jul 21, 2021 · Jul 21, 2021
@@ -25,8 +25,8 @@ endif()
 list(APPEND CUML_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 
 # set warnings as errors
-# list(APPEND CUML_CUDA_FLAGS -Werror=cross-execution-space-call)
-# list(APPEND CUML_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+list(APPEND CUML_CUDA_FLAGS -Werror=cross-execution-space-call)
+list(APPEND CUML_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
 
 if(DISABLE_DEPRECATION_WARNING)
     list(APPEND CUML_CXX_FLAGS -Wno-deprecated-declarations)

@@ -19,8 +19,13 @@
 #include <cuml/common/logger.hpp>
 #include <cuml/ensemble/treelite_defs.hpp>
 #include <cuml/tree/decisiontree.hpp>
+
 #include <map>
 
+namespace raft {
+class handle_t;  // forward decl
+}
+
 namespace ML {
 
 enum RF_type {

@@ -15,13 +15,12 @@
  */
 
 #pragma once
-#include <vector>
+
 #include "algo_helper.h"
 #include "flatnode.h"
 
-namespace raft {
-class handle_t;
-}
+#include <string>
+#include <vector>
 
 namespace ML {
 

@@ -19,6 +19,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/mr/device/allocator.hpp>
 #include <raft/mr/host/allocator.hpp>
+
 #include <vector>
 
 namespace ML {
@@ -171,17 +172,17 @@ class Tensor {
   std::shared_ptr<raft::mr::host::allocator> _hAllocator;
 
   /// Raw pointer to where the tensor data begins
-  DataPtrT _data;
+  DataPtrT _data{};
 
   /// Array of strides (in sizeof(T) terms) per each dimension
   IndexT _stride[Dim];
 
   /// Size per each dimension
   IndexT _size[Dim];
 
-  AllocState _state;
+  AllocState _state{};
 
-  cudaStream_t _stream;
+  cudaStream_t _stream{};
 };
 
 };  // end namespace ML
@@ -16,24 +16,24 @@
 
 #pragma once
 
-#include <thrust/device_ptr.h>
-#include <thrust/scan.h>
-#include <common/allocatorAdapter.hpp>
-#include <raft/cuda_utils.cuh>
 #include "../common.cuh"
 #include "pack.h"
 
+#include <common/allocatorAdapter.hpp>
+
+#include <raft/cuda_utils.cuh>
 #include <raft/sparse/convert/csr.cuh>
 
+#include <thrust/device_ptr.h>
+#include <thrust/scan.h>
+
 using namespace thrust;
 
 namespace ML {
 namespace Dbscan {
 namespace AdjGraph {
 namespace Algo {
 
-using namespace MLCommon;
-
 static const int TPB_X = 256;
 
 /**
@@ -61,4 +61,4 @@ void launcher(const raft::handle_t& handle,
 }  // namespace Algo
 }  // namespace AdjGraph
 }  // namespace Dbscan
-}  // namespace ML
+}  // namespace ML
@@ -16,11 +16,13 @@
 
 #pragma once
 
+#include "runner.cuh"
+
 #include <common/nvtx.hpp>
+
 #include <cuml/cluster/dbscan.hpp>
 #include <cuml/common/device_buffer.hpp>
 #include <cuml/common/logger.hpp>
-#include "runner.cuh"
 
 #include <algorithm>
 
@@ -65,7 +67,7 @@ size_t compute_batch_size(size_t& estimated_memory,
 
   // To avoid overflow, we need: batch_size <= MAX_LABEL / n_rows (floor div)
   Index_ MAX_LABEL = std::numeric_limits<Index_>::max();
-  if (batch_size > MAX_LABEL / n_rows) {
+  if (batch_size > static_cast<std::size_t>(MAX_LABEL / n_rows)) {
     Index_ new_batch_size = MAX_LABEL / n_rows;
     CUML_LOG_WARN(
       "Batch size limited by the chosen integer type (%d bytes). %d -> %d. "
@@ -77,7 +79,8 @@ size_t compute_batch_size(size_t& estimated_memory,
   }
 
   // Warn when a smaller index type could be used
-  if (sizeof(Index_) > sizeof(int) && batch_size < std::numeric_limits<int>::max() / n_rows) {
+  if ((sizeof(Index_) > sizeof(int)) &&
+      (batch_size < std::numeric_limits<int>::max() / static_cast<std::size_t>(n_rows))) {
     CUML_LOG_WARN(
       "You are using an index type of size (%d bytes) but a smaller index "
       "type (%d bytes) would be sufficient. Using the smaller integer type "
@@ -110,8 +113,11 @@ void dbscanFitImpl(const raft::handle_t& handle,
   int algo_adj = 1;
   int algo_ccl = 2;
 
-  int my_rank, n_rank;
-  Index_ start_row, n_owned_rows;
+  int my_rank{0};
+  int n_rank{1};
+  Index_ start_row{0};
+  Index_ n_owned_rows{n_rows};
+
   if (opg) {
     const auto& comm     = handle.get_comms();
     my_rank              = comm.get_rank();
@@ -122,10 +128,6 @@ void dbscanFitImpl(const raft::handle_t& handle,
     n_owned_rows         = max(Index_(0), end_row - start_row);
     // Note: it is possible for a node to have no work in theory. It won't
     // happen in practice (because n_rows is much greater than n_rank)
-  } else {
-    my_rank      = 0;
-    n_rank       = 1;
-    n_owned_rows = n_rows;
   }
 
   CUML_LOG_DEBUG("#%d owns %ld rows", (int)my_rank, (unsigned long)n_owned_rows);
@@ -200,4 +202,4 @@ void dbscanFitImpl(const raft::handle_t& handle,
 }
 
 }  // namespace Dbscan
-}  // namespace ML
+}  // namespace ML
@@ -18,6 +18,7 @@
 
 #include <label/merge_labels.cuh>
 
+#include <raft/handle.hpp>
 namespace ML {
 namespace Dbscan {
 namespace MergeLabels {

@@ -18,6 +18,10 @@
 
 #include "runner.cuh"
 
+#include <common/nvtx.hpp>
+
+#include <cuml/common/logger.hpp>
+
 namespace ML {
 namespace Dbscan {
 namespace MergeLabels {

@@ -16,22 +16,25 @@
 
 #pragma once
 
-#include <raft/cudart_utils.h>
-#include <common/nvtx.hpp>
-#include <cuml/common/device_buffer.hpp>
-#include <label/classlabels.cuh>
-#include <raft/cuda_utils.cuh>
-#include <raft/mr/device/allocator.hpp>
-#include <raft/sparse/csr.cuh>
 #include "adjgraph/runner.cuh"
 #include "corepoints/compute.cuh"
 #include "corepoints/exchange.cuh"
 #include "mergelabels/runner.cuh"
 #include "mergelabels/tree_reduction.cuh"
 #include "vertexdeg/runner.cuh"
 
+#include <cuml/common/device_buffer.hpp>
 #include <cuml/common/logger.hpp>
 
+#include <common/nvtx.hpp>
+
+#include <label/classlabels.cuh>
+
+#include <raft/cudart_utils.h>
+#include <raft/cuda_utils.cuh>
+#include <raft/mr/device/allocator.hpp>
+#include <raft/sparse/csr.cuh>
+
 namespace ML {
 namespace Dbscan {
 
@@ -302,13 +305,12 @@ size_t run(const raft::handle_t& handle,
 
       // Perform stream reduction on the core points. The core_pts acts as the stencil and we use
       // thrust::counting_iterator to return the index
-      auto core_point_count =
-        thrust::copy_if(thrust_exec_policy,
-                        index_iterator,
-                        index_iterator + N,
-                        dev_core_pts,
-                        dev_core_indices,
-                        [=] __device__(const bool is_core_point) { return is_core_point; });
+      thrust::copy_if(thrust_exec_policy,
+                      index_iterator,
+                      index_iterator + N,
+                      dev_core_pts,
+                      dev_core_indices,
+                      [=] __device__(const bool is_core_point) { return is_core_point; });
 
       ML::POP_RANGE();
     }

@@ -15,32 +15,37 @@
  */
 
 #pragma once
-#include <common/Timer.h>
+
+#include "batched-levelalgo/builder.cuh"
+#include "quantile/quantile.h"
+#include "treelite_util.h"
+
 #include <cuml/tree/algo_helper.h>
 #include <cuml/tree/flatnode.h>
+#include <cuml/common/logger.hpp>
+#include <cuml/tree/decisiontree.hpp>
+
+#include <common/Timer.h>
+#include <common/iota.cuh>
+#include <common/nvtx.hpp>
+
 #include <raft/cudart_utils.h>
+#include <raft/handle.hpp>
+#include <raft/mr/device/allocator.hpp>
+#include <raft/mr/host/allocator.hpp>
+
 #include <treelite/c_api.h>
 #include <treelite/tree.h>
+
 #include <algorithm>
 #include <climits>
-#include <common/iota.cuh>
-#include <cuml/common/logger.hpp>
-#include <cuml/tree/decisiontree.hpp>
 #include <iomanip>
 #include <locale>
 #include <map>
 #include <numeric>
-#include <raft/handle.hpp>
-#include <raft/mr/device/allocator.hpp>
-#include <raft/mr/host/allocator.hpp>
 #include <random>
 #include <type_traits>
 #include <vector>
-#include "batched-levelalgo/builder.cuh"
-#include "quantile/quantile.h"
-#include "treelite_util.h"
-
-#include <common/nvtx.hpp>
 
 /** check for treelite runtime API errors and assert accordingly */
 #define TREELITE_CHECK(call)                                                                     \

@@ -266,7 +266,7 @@ struct forest {
           // for GROVE_PER_CLASS, averaging happens in infer_k
           ot                 = output_t(ot & ~output_t::AVG);
           params.num_outputs = params.num_classes;
-          do_transform = ot != output_t::RAW && ot != output_t::SOFTMAX || global_bias != 0.0f;
+          do_transform = (ot != output_t::RAW && ot != output_t::SOFTMAX) || global_bias != 0.0f;
           break;
         case leaf_algo_t::CATEGORICAL_LEAF:
           params.num_outputs = params.num_classes;
@@ -276,7 +276,7 @@ struct forest {
           // for VECTOR_LEAF, averaging happens in infer_k
           ot                 = output_t(ot & ~output_t::AVG);
           params.num_outputs = params.num_classes;
-          do_transform = ot != output_t::RAW && ot != output_t::SOFTMAX || global_bias != 0.0f;
+          do_transform = (ot != output_t::RAW && ot != output_t::SOFTMAX) || global_bias != 0.0f;
           break;
         default: ASSERT(false, "internal error: invalid leaf_algo_");
       }
@@ -633,12 +633,12 @@ void tl2fil_leaf_payload(fil_node_t* fil_node,
   auto vec = tl_tree.LeafVector(tl_node_id);
   switch (forest_params.leaf_algo) {
     case leaf_algo_t::CATEGORICAL_LEAF:
-      ASSERT(vec.size() == forest_params.num_classes,
+      ASSERT(vec.size() == static_cast<std::size_t>(forest_params.num_classes),
              "inconsistent number of classes in treelite leaves");
       fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size());
       break;
     case leaf_algo_t::VECTOR_LEAF: {
-      ASSERT(vec.size() == forest_params.num_classes,
+      ASSERT(vec.size() == static_cast<std::size_t>(forest_params.num_classes),
              "inconsistent number of classes in treelite leaves");
       fil_node->val.idx = *leaf_counter;
       for (int k = 0; k < forest_params.num_classes; k++) {
@@ -769,14 +769,14 @@ inline void tree_depth_hist(const tl::Tree<T, L>& tree, std::vector<level_entry>
     stack.pop();
 
     while (!tree.IsLeaf(node_id)) {
-      if (depth >= hist.size()) hist.resize(depth + 1, {0, 0});
+      if (static_cast<std::size_t>(depth) >= hist.size()) hist.resize(depth + 1, {0, 0});
       hist[depth].n_branch_nodes++;
       stack.push({tree.LeftChild(node_id), depth + 1});
       node_id = tree.RightChild(node_id);
       depth++;
     }
 
-    if (depth >= hist.size()) hist.resize(depth + 1, {0, 0});
+    if (static_cast<std::size_t>(depth) >= hist.size()) hist.resize(depth + 1, {0, 0});
     hist[depth].n_leaves++;
   }
 }
@@ -794,7 +794,7 @@ std::stringstream depth_hist_and_max(const tl::ModelImpl<T, L>& model)
   ios default_state(nullptr);
   default_state.copyfmt(forest_shape);
   forest_shape << "Depth histogram:" << endl << "depth branches leaves   nodes" << endl;
-  for (int level = 0; level < hist.size(); ++level) {
+  for (std::size_t level = 0; level < hist.size(); ++level) {
     level_entry e = hist[level];
     forest_shape << setw(5) << level << setw(9) << e.n_branch_nodes << setw(7) << e.n_leaves
                  << setw(8) << e.n_branch_nodes + e.n_leaves << endl;
@@ -928,7 +928,7 @@ void tl2fil_dense(std::vector<dense_node>* pnodes,
     vector_leaf->resize(max_leaves_per_tree * params->num_trees * params->num_classes);
   }
   pnodes->resize(num_nodes, dense_node());
-  for (int i = 0; i < model.trees.size(); ++i) {
+  for (std::size_t i = 0; i < model.trees.size(); ++i) {
     size_t leaf_counter = max_leaves_per_tree * i;
     tree2fil_dense(pnodes,
                    i * tree_num_nodes(params->depth),
@@ -976,10 +976,10 @@ struct tl2fil_sparse_check_t<sparse_node8> {
 
     // check the number of tree nodes
     const std::vector<tl::Tree<threshold_t, leaf_t>>& trees = model.trees;
-    for (int i = 0; i < trees.size(); ++i) {
+    for (std::size_t i = 0; i < trees.size(); ++i) {
       int num_nodes = trees[i].num_nodes;
       ASSERT(num_nodes <= MAX_TREE_NODES,
-             "tree %d has %d nodes, "
+             "tree %lu has %d nodes, "
              "but only %d supported for 8-byte sparse nodes",
              i,
              num_nodes,
@@ -1019,7 +1019,7 @@ void tl2fil_sparse(std::vector<int>* ptrees,
 
   // convert the nodes
 #pragma omp parallel for
-  for (int i = 0; i < num_trees; ++i) {
+  for (std::size_t i = 0; i < num_trees; ++i) {
     // Max number of leaves processed so far
     size_t leaf_counter = ((*ptrees)[i] + i) / 2;
     tree2fil_sparse(*pnodes, (*ptrees)[i], model.trees[i], *params, vector_leaf, &leaf_counter);