rapidsai · rapids-bot · Mar 6, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
@@ -59,7 +59,7 @@
     decomposition,
     linear_model,
 )  # noqa: F401
-from cuml.internals.import_utils import has_umap
+from cuml.internals.import_utils import has_hdbscan, has_umap
 from cuml.internals.safe_imports import cpu_only_import
 
 np = cpu_only_import("numpy")
@@ -69,6 +69,10 @@
     import umap
 
 
+if has_hdbscan():
+    import hdbscan
+
+
 class AlgorithmPair:
     """
     Wraps a cuML algorithm and (optionally) a cpu-based algorithm
@@ -272,6 +276,14 @@ def all_algorithms():
             name="DBSCAN",
             accepts_labels=False,
         ),
+        AlgorithmPair(
+            hdbscan.HDBSCAN if has_hdbscan() else None,
+            cuml.cluster.HDBSCAN,
+            shared_args={},
+            cpu_args={},
+            name="HDBSCAN",
+            accepts_labels=False,
+        ),
         AlgorithmPair(
             sklearn.linear_model.LinearRegression,
             cuml.linear_model.LinearRegression,
@@ -315,7 +327,8 @@ def all_algorithms():
         AlgorithmPair(
             sklearn.ensemble.RandomForestClassifier,
             cuml.ensemble.RandomForestClassifier,
-            shared_args={"max_features": 1.0, "n_estimators": 10},
+            shared_args={"max_features": "sqrt", "n_estimators": 50},
+            cpu_args={"n_jobs": -1},
             name="RandomForestClassifier",
             accepts_labels=True,
             cpu_data_prep_hook=_labels_to_int_hook,
@@ -325,7 +338,8 @@ def all_algorithms():
         AlgorithmPair(
             sklearn.ensemble.RandomForestRegressor,
             cuml.ensemble.RandomForestRegressor,
-            shared_args={"max_features": 1.0, "n_estimators": 10},
+            shared_args={"max_features": 1.0, "n_estimators": 50},
+            cpu_args={"n_jobs": -1},
             name="RandomForestRegressor",
             accepts_labels=True,
             accuracy_function=metrics.r2_score,
@@ -382,6 +396,24 @@ def all_algorithms():
             accepts_labels=True,
             accuracy_function=cuml.metrics.r2_score,
         ),
+        AlgorithmPair(
+            sklearn.svm.LinearSVC,
+            cuml.svm.LinearSVC,
+            shared_args={},
+            cuml_args={},
+            name="LinearSVC",
+            accepts_labels=True,
+            accuracy_function=cuml.metrics.accuracy_score,
+        ),
+        AlgorithmPair(
+            sklearn.svm.LinearSVR,
+            cuml.svm.LinearSVR,
+            shared_args={},
+            cuml_args={},
+            name="LinearSVR",
+            accepts_labels=True,
+            accuracy_function=cuml.metrics.accuracy_score,
+        ),
         AlgorithmPair(
             sklearn.neighbors.KNeighborsClassifier,
             cuml.neighbors.KNeighborsClassifier,

@@ -41,8 +41,7 @@ from cuml.internals.api_decorators import enable_device_interop
 from cuml.internals.mixins import ClusterMixin
 from cuml.internals.mixins import CMajorInputTagMixin
 from cuml.internals import logger
-from cuml.internals.import_utils import has_hdbscan_plots
-from cuml.internals.import_utils import has_hdbscan_prediction
+from cuml.internals.import_utils import has_hdbscan
 
 import cuml
 from cuml.metrics.distance_type cimport DistanceType
@@ -210,7 +209,7 @@ def _build_condensed_tree_plot_host(
     raw_tree['lambda_val'] = lambdas
     raw_tree['child_size'] = sizes
 
-    if has_hdbscan_plots():
+    if has_hdbscan(raise_if_unavailable=True):
         from hdbscan.plots import CondensedTree
         return CondensedTree(raw_tree,
                              cluster_selection_method,
@@ -586,7 +585,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
 
             raw_tree = raw_tree.astype(np.float64)
 
-            if has_hdbscan_plots():
+            if has_hdbscan(raise_if_unavailable=True):
                 from hdbscan.plots import SingleLinkageTree
                 self.single_linkage_tree_obj = SingleLinkageTree(raw_tree)
 
@@ -605,7 +604,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
                'model.generate_prediction_data()')
 
         if self.prediction_data_obj is None:
-            if has_hdbscan_prediction():
+            if has_hdbscan(raise_if_unavailable=True):
                 from sklearn.neighbors import KDTree, BallTree
                 from hdbscan.prediction import PredictionData
 
@@ -646,7 +645,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
 
             raw_tree = raw_tree.astype(np.float64)
 
-            if has_hdbscan_plots():
+            if has_hdbscan(raise_if_unavailable=True):
                 from hdbscan.plots import MinimumSpanningTree
                 self.minimum_spanning_tree_ = \
                     MinimumSpanningTree(raw_tree, X.to_output("numpy"))

@@ -40,8 +40,7 @@ from cuml.internals.device_type import DeviceType
 from cuml.internals.mixins import ClusterMixin
 from cuml.internals.mixins import CMajorInputTagMixin
 from cuml.internals import logger
-from cuml.internals.import_utils import has_hdbscan_plots
-from cuml.internals.import_utils import has_hdbscan_prediction
+from cuml.internals.import_utils import has_hdbscan
 
 import cuml
 from cuml.metrics.distance_type cimport DistanceType
@@ -144,7 +143,7 @@ def all_points_membership_vectors(clusterer):
 
     # cpu infer, cpu/gpu train
     if device_type == DeviceType.host:
-        assert has_hdbscan_prediction()
+        assert has_hdbscan(raise_if_unavailable=True)
         from hdbscan.prediction import all_points_membership_vectors \
             as cpu_all_points_membership_vectors
 
@@ -247,7 +246,7 @@ def approximate_predict(clusterer, points_to_predict, convert_dtype=True):
 
     # cpu infer, cpu/gpu train
     if device_type == DeviceType.host:
-        assert has_hdbscan_prediction()
+        assert has_hdbscan(raise_if_unavailable=True)
         from hdbscan.prediction import approximate_predict \
             as cpu_approximate_predict
 

@@ -162,30 +162,18 @@ def has_sklearn():
         return False
 
 
-def has_hdbscan_plots(raise_if_unavailable=True):
+def has_hdbscan(raise_if_unavailable=False):
     try:
-        from hdbscan.plots import SingleLinkageTree  # NOQA
+        import hdbscan  # NOQA
 
         return True
     except ImportError:
-        if raise_if_unavailable:
-            raise ImportError("hdbscan must be installed to use plots.")
-        else:
+        if not raise_if_unavailable:
             return False
-
-
-def has_hdbscan_prediction(raise_if_unavailable=True):
-    try:
-        from hdbscan.prediction import PredictionData  # NOQA
-
-        return True
-    except ImportError:
-        if raise_if_unavailable:
+        else:
             raise ImportError(
-                "hdbscan.prediction must be installed " "to use prediction."
+                "hdbscan is not available. Please install hdbscan."
             )
-        else:
-            return False
 
 
 def has_shap(min_version="0.37"):