Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cython-lint configuration. #5439

Merged
merged 15 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ repos:
types_or: [python, cython]
exclude: thirdparty
additional_dependencies: [flake8-force]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.15.0
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.1
hooks:
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ ignore-words-list = "inout,numer,startd,couldn,referr"
builtin = "clear"
# disable warnings about binary files and wrong encoding
quiet-level = 3

[tool.cython-lint]
max-line-length = 999
bdice marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to re-enable E501 with a reasonable line length limit (following our flake8/black conventions) in a follow-up PR. Wrapping lines takes a little more manual effort than I wanted to put in for this first pass.

ignore = ['E501']
4 changes: 2 additions & 2 deletions python/cuml/cluster/cpp/kmeans.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -167,4 +167,4 @@ cdef extern from "cuml/cluster/kmeans.hpp" namespace "ML::kmeans":
const double *X,
int64_t n_samples,
int64_t n_features,
double *X_new) except +
double *X_new) except +
6 changes: 1 addition & 5 deletions python/cuml/cluster/dbscan.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,15 +16,13 @@

# distutils: language = c++

import ctypes
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
from cuml.internals.safe_imports import gpu_only_import
cp = gpu_only_import('cupy')

from libcpp cimport bool
from libc.stdint cimport uintptr_t, int64_t
from libc.stdlib cimport calloc, malloc, free

from cuml.internals.array import CumlArray
from cuml.internals.base import Base
Expand All @@ -37,8 +35,6 @@ from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.metrics.distance_type cimport DistanceType

from collections import defaultdict

cdef extern from "cuml/cluster/dbscan.hpp" \
namespace "ML::Dbscan":

Expand Down
7 changes: 3 additions & 4 deletions python/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals import logger
from cuml.internals.import_utils import has_hdbscan

import cuml
Expand Down Expand Up @@ -257,7 +256,7 @@ def condense_hierarchy(dendrogram,
new CondensedHierarchy[int, float](
handle_[0], <size_t>n_leaves)

children, n_rows, _, _ = \
children, _, _, _ = \
input_to_cuml_array(dendrogram[:, 0:2].astype('int32'), order='C',
check_dtype=[np.int32],
convert_to_dtype=(np.int32))
Expand Down Expand Up @@ -457,7 +456,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
A score of how persistent each cluster is. A score of 1.0 represents
a perfectly stable cluster that persists over all distance scales,
while a score of 0.0 represents a perfectly ephemeral cluster. These
scores can be used to gauge the relative coherence of the
scores can be used to gauge the relative coherence of the
clusters output by the algorithm.

condensed_tree_ : CondensedTree object
Expand Down Expand Up @@ -1026,7 +1025,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):

cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()

self.X_m, self.n_rows, self.n_cols, dtype = \
self.X_m, self.n_rows, self.n_cols, _ = \
input_to_cuml_array(self._cpu_model._raw_data, order='C',
check_dtype=[np.float32],
convert_to_dtype=(np.float32
Expand Down
17 changes: 5 additions & 12 deletions python/cuml/cluster/hdbscan/prediction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,13 @@ from cuml.internals.safe_imports import gpu_only_import
cp = gpu_only_import('cupy')

from cuml.internals.array import CumlArray
from cuml.internals.base import Base
from cuml.common.doc_utils import generate_docstring
from pylibraft.common.handle cimport handle_t

from pylibraft.common.handle import Handle
from cuml.common import (
input_to_cuml_array,
input_to_host_array
)
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.device_type import DeviceType
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals import logger
from cuml.internals.import_utils import has_hdbscan

Expand Down Expand Up @@ -96,7 +89,7 @@ cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML":
DistanceType metric,
float* membership_vec,
size_t batch_size)

void compute_membership_vector(
const handle_t& handle,
CondensedHierarchy[int, float] &condensed_tree,
Expand All @@ -107,7 +100,7 @@ cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML":
int min_samples,
DistanceType metric,
float* membership_vec,
size_t batch_size);
size_t batch_size)

void out_of_sample_predict(const handle_t &handle,
CondensedHierarchy[int, float] &condensed_tree,
Expand Down Expand Up @@ -249,7 +242,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty
The new data points to predict cluster labels for. They should
have the same dimensionality as the original dataset over which
clusterer was fit.

batch_size : int, optional, default=min(4096, n_points_to_predict)
Lowers memory requirement by computing distance-based membership
in smaller batches of points in the prediction data. A batch size
Expand Down Expand Up @@ -306,7 +299,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty
convert_to_dtype=(np.float32
if convert_dtype
else None))

if clusterer.n_clusters_ == 0:
return np.zeros(n_prediction_points, dtype=np.float32)

Expand All @@ -318,7 +311,7 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty

cdef uintptr_t prediction_ptr = points_to_predict_m.ptr
cdef uintptr_t input_ptr = clusterer.X_m.ptr

membership_vec = CumlArray.empty(
(n_prediction_points * clusterer.n_clusters_,),
dtype="float32")
Expand Down
15 changes: 5 additions & 10 deletions python/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,24 +16,21 @@

# distutils: language = c++

import ctypes
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')
import warnings
import typing

from cython.operator cimport dereference as deref
from libcpp cimport bool
from libc.stdint cimport uintptr_t, int64_t
from libc.stdlib cimport calloc, malloc, free
from libc.stdlib cimport calloc, free

from cuml.cluster.cpp.kmeans cimport fit_predict as cpp_fit_predict
from cuml.cluster.cpp.kmeans cimport predict as cpp_predict
from cuml.cluster.cpp.kmeans cimport transform as cpp_transform
from cuml.cluster.cpp.kmeans cimport KMeansParams
from cuml.cluster.cpp.kmeans cimport InitMethod

from cuml.internals.array import CumlArray
from cuml.common.array_descriptor import CumlArrayDescriptor
Expand Down Expand Up @@ -246,7 +243,7 @@ class KMeans(Base,
else:
self.init = 'preset'
self._params_init = Array
self.cluster_centers_, n_rows, self.n_cols, self.dtype = \
self.cluster_centers_, _n_rows, self.n_cols, self.dtype = \
input_to_cuml_array(init, order='C',
check_dtype=[np.float32, np.float64])

Expand Down Expand Up @@ -417,7 +414,7 @@ class KMeans(Base,
Sum of squared distances of samples to their closest cluster center.
"""

X_m, n_rows, n_cols, dtype = \
X_m, n_rows, n_cols, _ = \
input_to_cuml_array(X, order='C', check_dtype=self.dtype,
convert_to_dtype=(self.dtype if convert_dtype
else None),
Expand Down Expand Up @@ -452,8 +449,6 @@ class KMeans(Base,
cdef KMeansParams* params = \
<KMeansParams*><size_t>self._get_kmeans_params()

cur_int_dtype = labels_.dtype

if self.dtype == np.float32:
if int_dtype == np.int32:
cpp_predict(
Expand Down Expand Up @@ -548,7 +543,7 @@ class KMeans(Base,

"""

X_m, n_rows, n_cols, dtype = \
X_m, n_rows, _n_cols, _dtype = \
input_to_cuml_array(X, order='C', check_dtype=self.dtype,
convert_to_dtype=(self.dtype if convert_dtype
else None),
Expand Down
24 changes: 7 additions & 17 deletions python/cuml/cluster/kmeans_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,21 +16,17 @@

# distutils: language = c++

import ctypes
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
import warnings

from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')

from cython.operator cimport dereference as deref
from libcpp cimport bool
from libc.stdint cimport uintptr_t, int64_t
from libc.stdlib cimport calloc, malloc, free
from libc.stdlib cimport free

from cuml.internals.array import CumlArray
from cuml.internals.base import Base
from pylibraft.common.handle cimport handle_t
from cuml.common import input_to_cuml_array

Expand Down Expand Up @@ -79,10 +75,10 @@ cdef extern from "cuml/cluster/kmeans_mg.hpp" \
const double *sample_weight,
double *centroids,
double &inertia,
int64_t &n_iter) except +
int64_t &n_iter) except +

class KMeansMG(KMeans):

class KMeansMG(KMeans):
"""
A Multi-Node Multi-GPU implementation of KMeans

Expand Down Expand Up @@ -141,16 +137,10 @@ class KMeansMG(KMeans):

cdef uintptr_t cluster_centers_ptr = self.cluster_centers_.ptr


int_dtype = np.int32 if np.int64(n_rows) * np.int64(n_cols) < 2**31-1 else np.int64

print(str(n_rows * n_cols))

labels_ = CumlArray.zeros(shape=n_rows, dtype=int_dtype,
index=X_m.index)

cdef uintptr_t labels_ptr = labels_.ptr

cdef float inertiaf = 0
cdef double inertiad = 0

Expand Down Expand Up @@ -224,9 +214,9 @@ class KMeansMG(KMeans):

self.handle.sync()

self.labels_, _, _, _ = input_to_cuml_array(self.predict(X,
sample_weight=sample_weight), order='C',
convert_to_dtype=self.dtype)
self.labels_, _, _, _ = input_to_cuml_array(self.predict(X,
sample_weight=sample_weight), order='C',
convert_to_dtype=self.dtype)

del(X_m)
free(params)
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/common/opg_data_utils_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -19,7 +19,7 @@ np = cpu_only_import('numpy')

from cuml.common.opg_data_utils_mg cimport *
from libc.stdlib cimport malloc, free
from libc.stdint cimport uintptr_t, uint32_t, uint64_t
from libc.stdint cimport uintptr_t
from cuml.common import input_to_cuml_array
from cython.operator cimport dereference as deref
from cuml.internals.array import CumlArray
Expand Down Expand Up @@ -213,7 +213,7 @@ def _build_part_inputs(cuda_arr_ifaces,

cuml_arr_ifaces = []
for arr in cuda_arr_ifaces:
X_m, n_rows, n_cols, dtype = \
X_m, _, _, _ = \
input_to_cuml_array(arr, order="F",
convert_to_dtype=(np.float32
if convert_dtype
Expand Down
4 changes: 1 addition & 3 deletions python/cuml/datasets/arima.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,8 +16,6 @@

# distutils: language = c++

import warnings

from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')

Expand Down
13 changes: 2 additions & 11 deletions python/cuml/decomposition/base_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,24 +16,16 @@
# distutils: language = c++


import ctypes
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')

from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')

from libc.stdlib cimport malloc, free
from libc.stdint cimport uintptr_t

from libcpp cimport bool
from libc.stdint cimport uintptr_t, uint32_t, uint64_t
from cython.operator cimport dereference as deref

from cuml.internals.array import CumlArray
import cuml.common.opg_data_utils_mg as opg
import cuml.internals
from cuml.internals.base import Base
from pylibraft.common.handle cimport handle_t
from cuml.decomposition.utils cimport *
from cuml.decomposition.utils_mg cimport *
from cuml.common import input_to_cuml_array
Expand Down Expand Up @@ -97,7 +89,6 @@ class BaseDecompositionMG(object):
rank_to_sizes,
rank)

cdef uintptr_t trans_data
cdef uintptr_t trans_part_desc
if _transform:
trans_arys = opg.build_pred_or_trans_arys(X_arys, "F", self.dtype)
Expand Down
Loading