NVIDIA-Genomics-Research · cjnolet · Jan 24, 2023 · Nov 17, 2022 · Dec 19, 2022 · Jan 24, 2023
diff --git a/notebooks/rapids_scanpy_funcs.py b/notebooks/rapids_scanpy_funcs.py
@@ -188,13 +188,33 @@ def regress_out(normalized, n_counts, percent_mito, verbose=False):
 
     if n_counts.shape[0] < 100000 and cp.sparse.issparse(normalized):
         normalized = normalized.todense()
-
-    for i in range(normalized.shape[1]):
-        if verbose and i % 500 == 0:
-            print("Regressed %s out of %s" %(i, normalized.shape[1]))
+
+    # cuML gained support for multi-target regression in version 22.12. This
+    # removes the need for a Python for loop and speeds up the code
+    # significantly. When 'normalized' has not been converted to dense, the
+    # multi-target regression is not used to prevent running out of memory.
+    cuml_supports_multi_target = LinearRegression._get_tags()['multioutput']
+    is_dense = not cp.sparse.issparse(normalized)
+
+    if cuml_supports_multi_target and is_dense:
         X = regressors
-        y = normalized[:,i]
-        outputs[:, i] = _regress_out_chunk(X, y)
+        y = normalized
+
+        # Use SVD algorithm as this is the only algorithm supported in the
+        # multi-target regression. In addition, it is more numerically stable
+        # than the default 'eig' algorithm.
+        lr = LinearRegression(fit_intercept=False, output_type="cupy", algorithm='svd')
+        lr.fit(X, y, convert_dtype=True)
+        # Instead of "return y - lr.predict(X), we write to outputs to maintain
+        # "F" ordering like in the else branch.
+        outputs[:] = y - lr.predict(X)
+    else:
+        for i in range(normalized.shape[1]):
+            if verbose and i % 500 == 0:
+                print("Regressed %s out of %s" %(i, normalized.shape[1]))
+            X = regressors
+            y = normalized[:,i]
+            outputs[:, i] = _regress_out_chunk(X, y)
 
     return outputs