Skip to content

Commit

Permalink
HDBSCAN CPU/GPU Interop (#5137)
Browse files Browse the repository at this point in the history
Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Victor Lafargue (https://github.com/viclafargue)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #5137
  • Loading branch information
divyegala authored Jan 19, 2023
1 parent 43f5ef3 commit bd899e1
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 12 deletions.
128 changes: 120 additions & 8 deletions python/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,14 +28,16 @@ cp = gpu_only_import('cupy')
from warnings import warn

from cuml.internals.array import CumlArray
from cuml.internals.base import Base
from cuml.internals.base import UniversalBase
from cuml.common.doc_utils import generate_docstring
from pylibraft.common.handle cimport handle_t
from rmm._lib.device_uvector cimport device_uvector

from pylibraft.common.handle import Handle
from cuml.common import input_to_cuml_array
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals import logger
Expand Down Expand Up @@ -321,7 +323,7 @@ def delete_hdbscan_output(obj):
del obj.hdbscan_output_


class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):

"""
HDBSCAN Clustering
Expand Down Expand Up @@ -470,7 +472,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
Even then in some optimized cases a tree may not be generated.
"""
sk_import_path_ = 'hdbscan'
_cpu_estimator_import_path = 'hdbscan.HDBSCAN'

labels_ = CumlArrayDescriptor()
probabilities_ = CumlArrayDescriptor()
Expand All @@ -487,6 +489,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
mst_dst_ = CumlArrayDescriptor()
mst_weights_ = CumlArrayDescriptor()

@device_interop_preparation
def __init__(self, *,
min_cluster_size=5,
min_samples=None,
Expand Down Expand Up @@ -591,9 +594,6 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):

@property
def prediction_data_(self):
if not self.fit_called_:
raise ValueError(
'The model is not trained yet (call fit() first).')

if not self.prediction_data:
raise ValueError(
Expand Down Expand Up @@ -648,6 +648,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
MinimumSpanningTree(raw_tree, X.to_output("numpy"))
return self.minimum_spanning_tree_

@enable_device_interop
def generate_prediction_data(self):
"""
Create data that caches intermediate results used for predicting
Expand Down Expand Up @@ -701,6 +702,13 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
<PredictionData*> <uintptr_t> self.prediction_data_ptr
free(prediction_data_ptr)
del self.prediction_data_ptr
# this is only constructed when trying to gpu predict
# with a cpu model
if hasattr(self, "inverse_label_map_ptr"):
inverse_label_map_ptr = \
<device_uvector[int]*> <uintptr_t> self.inverse_label_map_ptr
free(inverse_label_map_ptr)
del self.inverse_label_map_ptr

def _construct_output_attributes(self):

Expand Down Expand Up @@ -746,6 +754,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
self.inverse_label_map = CumlArray.empty((0,), dtype="int32")

@generate_docstring()
@enable_device_interop
def fit(self, X, y=None, convert_dtype=True) -> "HDBSCAN":
"""
Fit HDBSCAN model from features.
Expand Down Expand Up @@ -864,6 +873,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
'type': 'dense',
'description': 'Cluster indexes',
'shape': '(n_samples, 1)'})
@enable_device_interop
def fit_predict(self, X, y=None) -> CumlArray:
"""
Fit the HDBSCAN model from features and return
Expand Down Expand Up @@ -997,6 +1007,108 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):

self.__dict__.update(state)

def _prep_cpu_to_gpu_prediction(self, convert_dtype=True):
"""
This is an internal function, to be called when HDBSCAN
is trained on CPU but GPU inference is desired.
"""
if not self.prediction_data:
raise ValueError("PredictionData not generated. "
"Please call clusterer.fit again with "
"prediction_data=True or call "
"clusterer.generate_prediction_data()")

if self._cpu_to_gpu_interop_prepped:
return

cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()

self.X_m, self.n_rows, self.n_cols, dtype = \
input_to_cuml_array(self._cpu_model._raw_data, order='C',
check_dtype=[np.float32],
convert_to_dtype=(np.float32
if convert_dtype
else None))

self.condensed_parent_, n_edges, _, _ = \
input_to_cuml_array(self.condensed_tree_.to_numpy()['parent'],
order='C',
convert_to_dtype=np.int32)

self.condensed_child_, _, _, _ = \
input_to_cuml_array(self.condensed_tree_.to_numpy()['child'],
order='C',
convert_to_dtype=np.int32)

self.condensed_lambdas_, _, _, _ = \
input_to_cuml_array(self.condensed_tree_.to_numpy()['lambda_val'],
order='C',
convert_to_dtype=np.float32)

self.condensed_sizes_, _, _, _ = \
input_to_cuml_array(self.condensed_tree_.to_numpy()['child_size'],
order='C',
convert_to_dtype=np.int32)

cdef uintptr_t parent_ptr = self.condensed_parent_.ptr
cdef uintptr_t child_ptr = self.condensed_child_.ptr
cdef uintptr_t lambdas_ptr = self.condensed_lambdas_.ptr
cdef uintptr_t sizes_ptr = self.condensed_sizes_.ptr

cdef CondensedHierarchy[int, float] *condensed_tree = \
new CondensedHierarchy[int, float](
handle_[0], <size_t>self.n_rows, <int>n_edges,
<int*> parent_ptr, <int*> child_ptr,
<float*> lambdas_ptr, <int*> sizes_ptr)
self.condensed_tree_ptr = <size_t> condensed_tree

self.core_dists = CumlArray.empty(self.n_rows, dtype="float32")
metric = _metrics_mapping[self.metric]

cdef uintptr_t X_ptr = self.X_m.ptr
cdef uintptr_t core_dists_ptr = self.core_dists.ptr

compute_core_dists(handle_[0],
<float*> X_ptr,
<float*> core_dists_ptr,
<size_t> self.n_rows,
<size_t> self.n_cols,
<DistanceType> metric,
<int> self.min_samples)

cdef device_uvector[int] *inverse_label_map = \
new device_uvector[int](0, handle_[0].get_stream())

cdef CLUSTER_SELECTION_METHOD cluster_selection_method
if self.cluster_selection_method == 'eom':
cluster_selection_method = CLUSTER_SELECTION_METHOD.EOM
elif self.cluster_selection_method == 'leaf':
cluster_selection_method = CLUSTER_SELECTION_METHOD.LEAF

compute_inverse_label_map(handle_[0],
deref(condensed_tree),
<size_t> self.n_rows,
<CLUSTER_SELECTION_METHOD>
cluster_selection_method,
deref(inverse_label_map),
<bool> self.allow_single_cluster,
<int> self.max_cluster_size,
<float> self.cluster_selection_epsilon)

self.n_clusters_ = <int> inverse_label_map[0].size()
self.inverse_label_map_ptr = <size_t> inverse_label_map[0].data()
self.inverse_label_map = \
_cuml_array_from_ptr(self.inverse_label_map_ptr,
self.n_clusters_ * sizeof(int),
(self.n_clusters_, ), "int32", self)

self.fit_called_ = True
self.generate_prediction_data()

self.handle.sync()

self._cpu_to_gpu_interop_prepped = True

def get_param_names(self):
return super().get_param_names() + [
"metric",
Expand All @@ -1013,7 +1125,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
"prediction_data"
]

def get_attributes_names(self):
def get_attr_names(self):
attr_names = ['labels_', 'probabilities_', 'cluster_persistence_',
'condensed_tree_', 'single_linkage_tree_',
'outlier_scores_']
Expand Down
64 changes: 62 additions & 2 deletions python/cuml/cluster/hdbscan/prediction.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,12 +30,18 @@ from cuml.common.doc_utils import generate_docstring
from pylibraft.common.handle cimport handle_t

from pylibraft.common.handle import Handle
from cuml.common import input_to_cuml_array
from cuml.common import (
input_to_cuml_array,
input_to_host_array
)
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.device_type import DeviceType
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals import logger
from cuml.internals.import_utils import has_hdbscan_plots
from cuml.internals.import_utils import has_hdbscan_prediction

import cuml
from cuml.metrics.distance_type cimport DistanceType
Expand Down Expand Up @@ -134,6 +140,34 @@ def all_points_membership_vectors(clusterer):
cluster ``j`` is in ``membership_vectors[i, j]``.
"""

device_type = cuml.global_settings.device_type

# cpu infer, cpu/gpu train
if device_type == DeviceType.host:
assert has_hdbscan_prediction()
from hdbscan.prediction import all_points_membership_vectors \
as cpu_all_points_membership_vectors

# trained on gpu
if not hasattr(clusterer, "_cpu_model"):
# the reference HDBSCAN implementations uses @property
# for attributes without setters available for them,
# so they can't be transferred from the GPU model
# to the CPU model
raise ValueError("Inferring on CPU is not supported yet when the "
"model has been trained on GPU")

# this took a long debugging session to figure out, but
# this method on cpu does not work without this copy for some reason
clusterer._cpu_model.prediction_data_.raw_data = \
clusterer._cpu_model.prediction_data_.raw_data.copy()
return cpu_all_points_membership_vectors(clusterer._cpu_model)

elif device_type == DeviceType.device:
# trained on cpu
if hasattr(clusterer, "_cpu_model"):
clusterer._prep_cpu_to_gpu_prediction()

if not clusterer.fit_called_:
raise ValueError("The clusterer is not fit on data. "
"Please call clusterer.fit first")
Expand Down Expand Up @@ -209,6 +243,32 @@ def approximate_predict(clusterer, points_to_predict, convert_dtype=True):
The soft cluster scores for each of the ``points_to_predict``
"""

device_type = cuml.global_settings.device_type

# cpu infer, cpu/gpu train
if device_type == DeviceType.host:
assert has_hdbscan_prediction()
from hdbscan.prediction import approximate_predict \
as cpu_approximate_predict

# trained on gpu
if not hasattr(clusterer, "_cpu_model"):
# the reference HDBSCAN implementations uses @property
# for attributes without setters available for them,
# so they can't be transferred from the GPU model
# to the CPU model
raise ValueError("Inferring on CPU is not supported yet when the "
"model has been trained on GPU")

host_points_to_predict = input_to_host_array(points_to_predict).array
return cpu_approximate_predict(clusterer._cpu_model,
host_points_to_predict)

elif device_type == DeviceType.device:
# trained on cpu
if hasattr(clusterer, "_cpu_model"):
clusterer._prep_cpu_to_gpu_prediction()

if not clusterer.fit_called_:
raise ValueError("The clusterer is not fit on data. "
"Please call clusterer.fit first")
Expand Down
Loading

0 comments on commit bd899e1

Please sign in to comment.