rapidsai · rapids-bot · Nov 29, 2023 · Nov 14, 2023 · Nov 28, 2023 · Nov 28, 2023
@@ -101,7 +101,7 @@ inline void qn_fit_x_mg(const raft::handle_t& handle,
 
   switch (pams.loss) {
     case QN_LOSS_LOGISTIC: {
-      ASSERT(C == 2, "qn_mg.cuh: logistic loss invalid C");
+      ASSERT(C > 0, "qn_mg.cuh: logistic loss invalid C");
       ML::GLM::detail::LogisticLoss<T> loss(handle, D, pams.fit_intercept);
       ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
         handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);

@@ -195,6 +195,13 @@ def _func_fit(f, data, n_rows, n_cols, partsToSizes, rank):
         for p in partsToSizes:
             aggregated_partsToSizes[p[0]][1] += p[1]
 
-        return f.fit(
+        ret_status = f.fit(
             [(inp_X, inp_y)], n_rows, n_cols, aggregated_partsToSizes, rank
         )
+
+        if len(f.classes_) == 1:
+            raise ValueError(
+                f"This solver needs samples of at least 2 classes in the data, but the data contains only one class: {f.classes_[0]}"
+            )
+
+        return ret_status
@@ -63,7 +63,6 @@ class MGFitMixin(object):
                 check_dtype = self.dtype
 
             if sparse_input:
-
                 X_m = SparseCumlArray(input_data[i][0], convert_index=np.int32)
                 _, self.n_cols = X_m.shape
             else:

@@ -170,7 +170,7 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
                              "with softmax (multinomial).")
 
         if solves_classification and not solves_multiclass:
-            self._num_classes_dim = self._num_classes - 1
+            self._num_classes_dim = 1
         else:
             self._num_classes_dim = self._num_classes
 
@@ -185,7 +185,6 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
 
     def fit(self, input_data, n_rows, n_cols, parts_rank_size, rank, convert_dtype=False):
 
-        self.rank = rank
         assert len(input_data) == 1, f"Currently support only one (X, y) pair in the list. Received {len(input_data)} pairs."
         self.is_col_major = False
         order = 'F' if self.is_col_major else 'C'
@@ -207,11 +206,12 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
         self._num_classes = len(self.classes_)
         self.loss = "sigmoid" if self._num_classes <= 2 else "softmax"
         self.prepare_for_fit(self._num_classes)
+
         cdef uintptr_t mat_coef_ptr = self.coef_.ptr
 
         cdef qn_params qnpams = self.solver_model.qnparams.params
 
-        sparse_input = True if isinstance(X, list) else False
+        sparse_input = isinstance(X, list)
 
         if self.dtype == np.float32:
             if sparse_input is False:

@@ -339,12 +339,12 @@ def imp():
         datatype, nrows, ncols, n_info, n_classes=n_classes
     )
 
-    if convert_to_sparse is False:
-        # X_dask and y_dask are dask cudf
-        X_dask, y_dask = _prep_training_data(client, X, y, n_parts)
-    else:
+    if convert_to_sparse:
         # X_dask and y_dask are dask array
         X_dask, y_dask = _prep_training_data_sparse(client, X, y, n_parts)
+    else:
+        # X_dask and y_dask are dask cudf
+        X_dask, y_dask = _prep_training_data(client, X, y, n_parts)
 
     lr = cumlLBFGS_dask(
         solver="qn",
@@ -557,30 +557,48 @@ def test_elasticnet(
         ("elasticnet", 2.0, 0.2),
     ],
 )
-@pytest.mark.parametrize("datatype", [np.float32])
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("delayed", [True])
 @pytest.mark.parametrize("n_classes", [2, 8])
 def test_sparse_from_dense(
     fit_intercept, regularization, datatype, delayed, n_classes, client
 ):
-    penalty = regularization[0]
-    C = regularization[1]
-    l1_ratio = regularization[2]
+    penalty, C, l1_ratio = regularization
 
-    test_lbfgs(
-        nrows=1e5,
-        ncols=20,
-        n_parts=2,
-        fit_intercept=fit_intercept,
-        datatype=datatype,
-        delayed=delayed,
-        client=client,
-        penalty=penalty,
-        n_classes=n_classes,
-        C=C,
-        l1_ratio=l1_ratio,
-        convert_to_sparse=True,
-    )
+    if datatype == np.float32:
+        test_lbfgs(
+            nrows=1e5,
+            ncols=20,
+            n_parts=2,
+            fit_intercept=fit_intercept,
+            datatype=datatype,
+            delayed=delayed,
+            client=client,
+            penalty=penalty,
+            n_classes=n_classes,
+            C=C,
+            l1_ratio=l1_ratio,
+            convert_to_sparse=True,
+        )
+    else:
+        with pytest.raises(
+            RuntimeError,
+            match="dtypes other than float32 are currently not supported yet. See issue: https://github.com/rapidsai/cuml/issues/5589",
+        ):
+            test_lbfgs(
+                nrows=1e5,
+                ncols=20,
+                n_parts=2,
+                fit_intercept=fit_intercept,
+                datatype=datatype,
+                delayed=delayed,
+                client=client,
+                penalty=penalty,
+                n_classes=n_classes,
+                C=C,
+                l1_ratio=l1_ratio,
+                convert_to_sparse=True,
+            )
 
 
 @pytest.mark.parametrize("dtype", [np.float32])
@@ -621,3 +639,27 @@ def test_sparse_nlp20news(dtype, nlp_20news, client):
     cpu_preds = cpu.predict(X_test)
     cpu_score = accuracy_score(y_test, cpu_preds.tolist())
     assert cuml_score >= cpu_score or np.abs(cuml_score - cpu_score) < 1e-3
+
+
+@pytest.mark.parametrize("fit_intercept", [False, True])
+def test_exception_one_label(fit_intercept, client):
+    n_parts = 2
+    datatype = "float32"
+
+    X = np.array([(1, 2), (1, 3), (2, 1), (3, 1)], datatype)
+    y = np.array([1.0, 1.0, 1.0, 1.0], datatype)
+    X_df, y_df = _prep_training_data(client, X, y, n_parts)
+
+    err_msg = "This solver needs samples of at least 2 classes in the data, but the data contains only one class: 1.0"
+
+    from cuml.dask.linear_model import LogisticRegression as cumlLBFGS_dask
+
+    mg = cumlLBFGS_dask(fit_intercept=fit_intercept, verbose=6)
+    with pytest.raises(RuntimeError, match=err_msg):
+        mg.fit(X_df, y_df)
+
+    from sklearn.linear_model import LogisticRegression
+
+    lr = LogisticRegression(fit_intercept=fit_intercept)
+    with pytest.raises(ValueError, match=err_msg):
+        lr.fit(X, y)