rapidsai · rapids-bot · Jul 5, 2023 · May 24, 2023 · May 24, 2023 · May 24, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,6 +18,10 @@ repos:
             types_or: [python, cython]
             exclude: thirdparty
             additional_dependencies: [flake8-force]
+    - repo: https://github.com/MarcoGorelli/cython-lint
+      rev: v0.15.0
+      hooks:
+          - id: cython-lint
     - repo: https://github.com/pre-commit/mirrors-clang-format
       rev: v16.0.1
       hooks:

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,3 +9,7 @@ ignore-words-list = "inout,numer,startd,couldn,referr"
 builtin = "clear"
 # disable warnings about binary files and wrong encoding
 quiet-level = 3
+
+[tool.cython-lint]
+max-line-length = 999
+ignore = ['E501']
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -167,4 +167,4 @@ cdef extern from "cuml/cluster/kmeans.hpp" namespace "ML::kmeans":
                         const double *X,
                         int64_t n_samples,
                         int64_t n_features,
-                        double *X_new) except +
+                        double *X_new) except +
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,15 +16,13 @@
 
 # distutils: language = c++
 
-import ctypes
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 from cuml.internals.safe_imports import gpu_only_import
 cp = gpu_only_import('cupy')
 
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t, int64_t
-from libc.stdlib cimport calloc, malloc, free
 
 from cuml.internals.array import CumlArray
 from cuml.internals.base import Base
@@ -37,8 +35,6 @@ from cuml.internals.mixins import ClusterMixin
 from cuml.internals.mixins import CMajorInputTagMixin
 from cuml.metrics.distance_type cimport DistanceType
 
-from collections import defaultdict
-
 cdef extern from "cuml/cluster/dbscan.hpp" \
         namespace "ML::Dbscan":
 

@@ -40,7 +40,6 @@ from cuml.internals.api_decorators import device_interop_preparation
 from cuml.internals.api_decorators import enable_device_interop
 from cuml.internals.mixins import ClusterMixin
 from cuml.internals.mixins import CMajorInputTagMixin
-from cuml.internals import logger
 from cuml.internals.import_utils import has_hdbscan
 
 import cuml
@@ -257,7 +256,7 @@ def condense_hierarchy(dendrogram,
         new CondensedHierarchy[int, float](
             handle_[0], <size_t>n_leaves)
 
-    children, n_rows, _, _ = \
+    children, _, _, _ = \
         input_to_cuml_array(dendrogram[:, 0:2].astype('int32'), order='C',
                             check_dtype=[np.int32],
                             convert_to_dtype=(np.int32))
@@ -457,7 +456,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         A score of how persistent each cluster is. A score of 1.0 represents
         a perfectly stable cluster that persists over all distance scales,
         while a score of 0.0 represents a perfectly ephemeral cluster. These
-        scores can be used to gauge the relative coherence of the 
+        scores can be used to gauge the relative coherence of the
         clusters output by the algorithm.
 
     condensed_tree_ : CondensedTree object
@@ -1026,7 +1025,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
 
         cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
 
-        self.X_m, self.n_rows, self.n_cols, dtype = \
+        self.X_m, self.n_rows, self.n_cols, _ = \
             input_to_cuml_array(self._cpu_model._raw_data, order='C',
                                 check_dtype=[np.float32],
                                 convert_to_dtype=(np.float32

@@ -25,20 +25,13 @@ from cuml.internals.safe_imports import gpu_only_import
 cp = gpu_only_import('cupy')
 
 from cuml.internals.array import CumlArray
-from cuml.internals.base import Base
-from cuml.common.doc_utils import generate_docstring
 from pylibraft.common.handle cimport handle_t
 
-from pylibraft.common.handle import Handle
 from cuml.common import (
     input_to_cuml_array,
     input_to_host_array
 )
-from cuml.common.array_descriptor import CumlArrayDescriptor
-from cuml.internals.available_devices import is_cuda_available
 from cuml.internals.device_type import DeviceType
-from cuml.internals.mixins import ClusterMixin
-from cuml.internals.mixins import CMajorInputTagMixin
 from cuml.internals import logger
 from cuml.internals.import_utils import has_hdbscan
 
@@ -96,7 +89,7 @@ cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML":
         DistanceType metric,
         float* membership_vec,
         size_t batch_size)
-    
+
     void compute_membership_vector(
         const handle_t& handle,
         CondensedHierarchy[int, float] &condensed_tree,
@@ -107,7 +100,7 @@ cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML":
         int min_samples,
         DistanceType metric,
         float* membership_vec,
-        size_t batch_size);
+        size_t batch_size)
 
     void out_of_sample_predict(const handle_t &handle,
                                CondensedHierarchy[int, float] &condensed_tree,
@@ -249,7 +242,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty
         The new data points to predict cluster labels for. They should
         have the same dimensionality as the original dataset over which
         clusterer was fit.
-    
+
     batch_size : int, optional, default=min(4096, n_points_to_predict)
         Lowers memory requirement by computing distance-based membership
         in smaller batches of points in the prediction data. A batch size
@@ -306,7 +299,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty
                             convert_to_dtype=(np.float32
                                               if convert_dtype
                                               else None))
-    
+
     if clusterer.n_clusters_ == 0:
         return np.zeros(n_prediction_points, dtype=np.float32)
 
@@ -318,7 +311,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty
 
     cdef uintptr_t prediction_ptr = points_to_predict_m.ptr
     cdef uintptr_t input_ptr = clusterer.X_m.ptr
-    
+
     membership_vec = CumlArray.empty(
         (n_prediction_points * clusterer.n_clusters_,),
         dtype="float32")

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,24 +16,21 @@
 
 # distutils: language = c++
 
-import ctypes
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 from cuml.internals.safe_imports import gpu_only_import
 rmm = gpu_only_import('rmm')
-import warnings
 import typing
 
 from cython.operator cimport dereference as deref
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t, int64_t
-from libc.stdlib cimport calloc, malloc, free
+from libc.stdlib cimport calloc, free
 
 from cuml.cluster.cpp.kmeans cimport fit_predict as cpp_fit_predict
 from cuml.cluster.cpp.kmeans cimport predict as cpp_predict
 from cuml.cluster.cpp.kmeans cimport transform as cpp_transform
 from cuml.cluster.cpp.kmeans cimport KMeansParams
-from cuml.cluster.cpp.kmeans cimport InitMethod
 
 from cuml.internals.array import CumlArray
 from cuml.common.array_descriptor import CumlArrayDescriptor
@@ -246,7 +243,7 @@ class KMeans(Base,
         else:
             self.init = 'preset'
             self._params_init = Array
-            self.cluster_centers_, n_rows, self.n_cols, self.dtype = \
+            self.cluster_centers_, _n_rows, self.n_cols, self.dtype = \
                 input_to_cuml_array(init, order='C',
                                     check_dtype=[np.float32, np.float64])
 
@@ -417,7 +414,7 @@ class KMeans(Base,
         Sum of squared distances of samples to their closest cluster center.
         """
 
-        X_m, n_rows, n_cols, dtype = \
+        X_m, n_rows, n_cols, _ = \
             input_to_cuml_array(X, order='C', check_dtype=self.dtype,
                                 convert_to_dtype=(self.dtype if convert_dtype
                                                   else None),
@@ -452,8 +449,6 @@ class KMeans(Base,
         cdef KMeansParams* params = \
             <KMeansParams*><size_t>self._get_kmeans_params()
 
-        cur_int_dtype = labels_.dtype
-
         if self.dtype == np.float32:
             if int_dtype == np.int32:
                 cpp_predict(
@@ -548,7 +543,7 @@ class KMeans(Base,
 
         """
 
-        X_m, n_rows, n_cols, dtype = \
+        X_m, n_rows, _n_cols, _dtype = \
             input_to_cuml_array(X, order='C', check_dtype=self.dtype,
                                 convert_to_dtype=(self.dtype if convert_dtype
                                                   else None),

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,21 +16,17 @@
 
 # distutils: language = c++
 
-import ctypes
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
-import warnings
 
 from cuml.internals.safe_imports import gpu_only_import
 rmm = gpu_only_import('rmm')
 
 from cython.operator cimport dereference as deref
-from libcpp cimport bool
 from libc.stdint cimport uintptr_t, int64_t
-from libc.stdlib cimport calloc, malloc, free
+from libc.stdlib cimport free
 
 from cuml.internals.array import CumlArray
-from cuml.internals.base import Base
 from pylibraft.common.handle cimport handle_t
 from cuml.common import input_to_cuml_array
 
@@ -79,10 +75,10 @@ cdef extern from "cuml/cluster/kmeans_mg.hpp" \
                   const double *sample_weight,
                   double *centroids,
                   double &inertia,
-                  int64_t &n_iter) except +                  
+                  int64_t &n_iter) except +
 
-class KMeansMG(KMeans):
 
+class KMeansMG(KMeans):
     """
     A Multi-Node Multi-GPU implementation of KMeans
 
@@ -141,16 +137,10 @@ class KMeansMG(KMeans):
 
         cdef uintptr_t cluster_centers_ptr = self.cluster_centers_.ptr
 
-
         int_dtype = np.int32 if np.int64(n_rows) * np.int64(n_cols) < 2**31-1 else np.int64
 
         print(str(n_rows * n_cols))
 
-        labels_ = CumlArray.zeros(shape=n_rows, dtype=int_dtype,
-                                  index=X_m.index)
-
-        cdef uintptr_t labels_ptr = labels_.ptr
-
         cdef float inertiaf = 0
         cdef double inertiad = 0
 
@@ -224,9 +214,9 @@ class KMeansMG(KMeans):
 
         self.handle.sync()
 
-        self.labels_, _, _, _ =  input_to_cuml_array(self.predict(X,
-                                                     sample_weight=sample_weight), order='C',
-                                                     convert_to_dtype=self.dtype)
+        self.labels_, _, _, _ = input_to_cuml_array(self.predict(X,
+                                                    sample_weight=sample_weight), order='C',
+                                                    convert_to_dtype=self.dtype)
 
         del(X_m)
         free(params)

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ np = cpu_only_import('numpy')
 
 from cuml.common.opg_data_utils_mg cimport *
 from libc.stdlib cimport malloc, free
-from libc.stdint cimport uintptr_t, uint32_t, uint64_t
+from libc.stdint cimport uintptr_t
 from cuml.common import input_to_cuml_array
 from cython.operator cimport dereference as deref
 from cuml.internals.array import CumlArray
@@ -213,7 +213,7 @@ def _build_part_inputs(cuda_arr_ifaces,
 
     cuml_arr_ifaces = []
     for arr in cuda_arr_ifaces:
-        X_m, n_rows, n_cols, dtype = \
+        X_m, _, _, _ = \
             input_to_cuml_array(arr, order="F",
                                 convert_to_dtype=(np.float32
                                                   if convert_dtype

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,8 +16,6 @@
 
 # distutils: language = c++
 
-import warnings
-
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,24 +16,16 @@
 # distutils: language = c++
 
 
-import ctypes
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 
 from cuml.internals.safe_imports import gpu_only_import
 rmm = gpu_only_import('rmm')
 
-from libc.stdlib cimport malloc, free
+from libc.stdint cimport uintptr_t
 
-from libcpp cimport bool
-from libc.stdint cimport uintptr_t, uint32_t, uint64_t
-from cython.operator cimport dereference as deref
-
-from cuml.internals.array import CumlArray
 import cuml.common.opg_data_utils_mg as opg
 import cuml.internals
-from cuml.internals.base import Base
-from pylibraft.common.handle cimport handle_t
 from cuml.decomposition.utils cimport *
 from cuml.decomposition.utils_mg cimport *
 from cuml.common import input_to_cuml_array
@@ -97,7 +89,6 @@ class BaseDecompositionMG(object):
                                                              rank_to_sizes,
                                                              rank)
 
-        cdef uintptr_t trans_data
         cdef uintptr_t trans_part_desc
         if _transform:
             trans_arys = opg.build_pred_or_trans_arys(X_arys, "F", self.dtype)