rapidsai · rapids-bot · Mar 24, 2021 · Mar 18, 2021 · Mar 18, 2021 · Mar 23, 2021
@@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH})
 
   ExternalProject_Add(raft
     GIT_REPOSITORY    https://github.com/rapidsai/raft.git
-    GIT_TAG           6455e05b3889db2b495cf3189b33c2b07bfbebf2
+    GIT_TAG           fc46618d76d70710b07d445e79d3e07dea6cad2f
     PREFIX            ${RAFT_DIR}
     CONFIGURE_COMMAND ""
     BUILD_COMMAND     ""

@@ -14,14 +14,16 @@
 # limitations under the License.
 #
 
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import Pipeline, make_pipeline
 
-
-Pipeline.__doc__ = """
+disclaimer = """
 This code is developed and maintained by scikit-learn and imported
 by cuML to maintain the familiar sklearn namespace structure.
 cuML includes tests to ensure full compatibility of these wrappers
 with CUDA-based data and cuML estimators, but all of the underlying code
-is due to the scikit-learn developers.\n\n""" + Pipeline.__doc__
+is due to the scikit-learn developers.\n\n"""
+
+Pipeline.__doc__ = disclaimer + Pipeline.__doc__
+make_pipeline.__doc__ = disclaimer + make_pipeline.__doc__
 
-__all__ = ['Pipeline']
+__all__ = ['Pipeline', 'make_pipeline']
@@ -19,7 +19,6 @@
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.datasets import fetch_california_housing
 from sklearn.feature_extraction.text import CountVectorizer
-import zlib
 
 
 def pytest_configure(config):
@@ -32,7 +31,7 @@ def nlp_20news():
         twenty_train = fetch_20newsgroups(subset='train',
                                           shuffle=True,
                                           random_state=42)
-    except (IOError, zlib.error):
+    except:  # noqa E722
         pytest.xfail(reason="Error fetching 20 newsgroup dataset")
 
     count_vect = CountVectorizer()

@@ -18,7 +18,7 @@
 import cuml
 import cupy
 
-from cuml.pipeline import Pipeline
+from cuml.pipeline import Pipeline, make_pipeline
 from cuml.model_selection import GridSearchCV
 
 from cuml.test.utils import ClassEnumerator
@@ -73,30 +73,46 @@ def classification_dataset(request):
                                        'MBSGDRegressor',
                                        'RandomForestRegressor',
                                        'KNeighborsRegressor'])
-def test_pipeline_with_regression(regression_dataset, model_key):
+@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline'])
+def test_pipeline_with_regression(regression_dataset, model_key,
+                                  instantiation):
     X_train, X_test, y_train, y_test = regression_dataset
     model_const = models[model_key]
     if model_key == 'RandomForestRegressor':
         model = model_const(n_bins=2)
     else:
         model = model_const()
-    pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+
+    if instantiation == 'Pipeline':
+        pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    elif instantiation == 'make_pipeline':
+        pipe = make_pipeline(StandardScaler(), model)
     pipe.fit(X_train, y_train)
     prediction = pipe.predict(X_test)
     assert isinstance(prediction, cupy.ndarray)
+    _ = pipe.score(X_test, y_test)
 
 
 @pytest.mark.parametrize('model_key', ['MBSGDClassifier',
                                        'RandomForestClassifier',
                                        'KNeighborsClassifier'])
-def test_pipeline_with_classification(classification_dataset, model_key):
+@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline'])
+def test_pipeline_with_classification(classification_dataset, model_key,
+                                      instantiation):
     X_train, X_test, y_train, y_test = classification_dataset
     model_const = models[model_key]
     if model_key == 'RandomForestClassifier':
         model = model_const(n_bins=2)
     else:
         model = model_const()
-    pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    if instantiation == 'Pipeline':
+        pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    elif instantiation == 'make_pipeline':
+        pipe = make_pipeline(StandardScaler(), model)
     pipe.fit(X_train, y_train)
     prediction = pipe.predict(X_test)
     assert isinstance(prediction, cupy.ndarray)
+    if model_key == 'RandomForestClassifier':
+        pytest.skip("RandomForestClassifier is not yet supported"
+                    "by the Pipeline utility")
+    _ = pipe.score(X_test, y_test)