diff --git a/notebooks/rapids_scanpy_funcs.py b/notebooks/rapids_scanpy_funcs.py
index f992528..128dec2 100644
--- a/notebooks/rapids_scanpy_funcs.py
+++ b/notebooks/rapids_scanpy_funcs.py
@@ -188,13 +188,30 @@ def regress_out(normalized, n_counts, percent_mito, verbose=False):
     
     if n_counts.shape[0] < 100000 and cp.sparse.issparse(normalized):
         normalized = normalized.todense()
-    
-    for i in range(normalized.shape[1]):
-        if verbose and i % 500 == 0:
-            print("Regressed %s out of %s" %(i, normalized.shape[1]))
+
+    # cuML gained support for multi-target regression in version 22.12. This
+    # removes the need for a Python for loop and speeds up the code
+    # significantly. When 'normalized' has not been converted to dense, the
+    # multi-target regression is not used to prevent running out of memory.
+    cuml_supports_multi_target = LinearRegression._get_tags()['multioutput']
+    is_dense = not cp.sparse.issparse(normalized)
+
+    if cuml_supports_multi_target and is_dense:
         X = regressors
-        y = normalized[:,i]
-        outputs[:, i] = _regress_out_chunk(X, y)
+        y = normalized
+
+        lr = LinearRegression(fit_intercept=False, output_type="cupy")
+        lr.fit(X, y, convert_dtype=True)
+        # Instead of "return y - lr.predict(X), we write to outputs to maintain
+        # "F" ordering like in the else branch.
+        outputs[:] = y - lr.predict(X)
+    else:
+        for i in range(normalized.shape[1]):
+            if verbose and i % 500 == 0:
+                print("Regressed %s out of %s" %(i, normalized.shape[1]))
+            X = regressors
+            y = normalized[:,i]
+            outputs[:, i] = _regress_out_chunk(X, y)
     
     return outputs