From 4851eb1b0439480da2da7ee89e20a57482c5f255 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Thu, 18 Mar 2021 18:03:01 +0000
Subject: [PATCH 1/4] Adding make_pipeline + test score with pipeline

---
 python/cuml/pipeline/__init__.py         | 12 ++++++-----
 python/cuml/test/test_meta_estimators.py | 26 +++++++++++++++++++-----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/python/cuml/pipeline/__init__.py b/python/cuml/pipeline/__init__.py
index 4bed8639b6..1a7c1dd2f5 100644
--- a/python/cuml/pipeline/__init__.py
+++ b/python/cuml/pipeline/__init__.py
@@ -14,14 +14,16 @@
 # limitations under the License.
 #
 
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import Pipeline, make_pipeline
 
-
-Pipeline.__doc__ = """
+disclaimer = """
 This code is developed and maintained by scikit-learn and imported
 by cuML to maintain the familiar sklearn namespace structure.
 cuML includes tests to ensure full compatibility of these wrappers
 with CUDA-based data and cuML estimators, but all of the underlying code
-is due to the scikit-learn developers.\n\n""" + Pipeline.__doc__
+is due to the scikit-learn developers.\n\n"""
+
+Pipeline.__doc__ = disclaimer + Pipeline.__doc__
+make_pipeline.__doc__ = disclaimer + make_pipeline.__doc__
 
-__all__ = ['Pipeline']
+__all__ = ['Pipeline', 'make_pipeline']
diff --git a/python/cuml/test/test_meta_estimators.py b/python/cuml/test/test_meta_estimators.py
index 07503e5878..74add81658 100644
--- a/python/cuml/test/test_meta_estimators.py
+++ b/python/cuml/test/test_meta_estimators.py
@@ -18,7 +18,7 @@
 import cuml
 import cupy
 
-from cuml.pipeline import Pipeline
+from cuml.pipeline import Pipeline, make_pipeline
 from cuml.model_selection import GridSearchCV
 
 from cuml.test.utils import ClassEnumerator
@@ -73,30 +73,46 @@ def classification_dataset(request):
                                        'MBSGDRegressor',
                                        'RandomForestRegressor',
                                        'KNeighborsRegressor'])
-def test_pipeline_with_regression(regression_dataset, model_key):
+@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline'])
+def test_pipeline_with_regression(regression_dataset, model_key,
+                                  instantiation):
     X_train, X_test, y_train, y_test = regression_dataset
     model_const = models[model_key]
     if model_key == 'RandomForestRegressor':
         model = model_const(n_bins=2)
     else:
         model = model_const()
-    pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+
+    if instantiation == 'Pipeline':
+        pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    elif instantiation == 'make_pipeline':
+        pipe = make_pipeline(StandardScaler(), model)
     pipe.fit(X_train, y_train)
     prediction = pipe.predict(X_test)
     assert isinstance(prediction, cupy.ndarray)
+    _ = pipe.score(X_test, y_test)
 
 
 @pytest.mark.parametrize('model_key', ['MBSGDClassifier',
                                        'RandomForestClassifier',
                                        'KNeighborsClassifier'])
-def test_pipeline_with_classification(classification_dataset, model_key):
+@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline'])
+def test_pipeline_with_classification(classification_dataset, model_key,
+                                      instantiation):
     X_train, X_test, y_train, y_test = classification_dataset
     model_const = models[model_key]
     if model_key == 'RandomForestClassifier':
         model = model_const(n_bins=2)
     else:
         model = model_const()
-    pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    if instantiation == 'Pipeline':
+        pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)])
+    elif instantiation == 'make_pipeline':
+        pipe = make_pipeline(StandardScaler(), model)
     pipe.fit(X_train, y_train)
     prediction = pipe.predict(X_test)
     assert isinstance(prediction, cupy.ndarray)
+    if model_key == 'RandomForestClassifier':
+        pytest.skip("RandomForestClassifier is not yet supported:"
+                    "by the Pipeline utility")
+    _ = pipe.score(X_test, y_test)

From eb2cc3b053cabff9bd6a06e054622dd8b73d5e69 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Thu, 18 Mar 2021 18:06:45 +0000
Subject: [PATCH 2/4] Fix typo

---
 python/cuml/test/test_meta_estimators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/test/test_meta_estimators.py b/python/cuml/test/test_meta_estimators.py
index 74add81658..c430e742c3 100644
--- a/python/cuml/test/test_meta_estimators.py
+++ b/python/cuml/test/test_meta_estimators.py
@@ -113,6 +113,6 @@ def test_pipeline_with_classification(classification_dataset, model_key,
     prediction = pipe.predict(X_test)
     assert isinstance(prediction, cupy.ndarray)
     if model_key == 'RandomForestClassifier':
-        pytest.skip("RandomForestClassifier is not yet supported:"
+        pytest.skip("RandomForestClassifier is not yet supported"
                     "by the Pipeline utility")
     _ = pipe.score(X_test, y_test)

From 6ee545ce4633a299886a1bab99f492436d4ef973 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Tue, 23 Mar 2021 13:36:29 +0000
Subject: [PATCH 3/4] RAFT downgrade

---
 cpp/cmake/Dependencies.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake
index f0b7a35690..1b59e52e22 100644
--- a/cpp/cmake/Dependencies.cmake
+++ b/cpp/cmake/Dependencies.cmake
@@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH})
 
   ExternalProject_Add(raft
     GIT_REPOSITORY    https://github.com/rapidsai/raft.git
-    GIT_TAG           6455e05b3889db2b495cf3189b33c2b07bfbebf2
+    GIT_TAG           fc46618d76d70710b07d445e79d3e07dea6cad2f
     PREFIX            ${RAFT_DIR}
     CONFIGURE_COMMAND ""
     BUILD_COMMAND     ""

From 81512ed8c99d41adc79840b1d89ebfac1aa4096c Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Wed, 24 Mar 2021 09:48:21 +0000
Subject: [PATCH 4/4] Catch all fetch_20newsgroups exceptions

---
 python/cuml/test/conftest.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cuml/test/conftest.py b/python/cuml/test/conftest.py
index 07c9a42de3..c4a49e4019 100644
--- a/python/cuml/test/conftest.py
+++ b/python/cuml/test/conftest.py
@@ -19,7 +19,6 @@
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.datasets import fetch_california_housing
 from sklearn.feature_extraction.text import CountVectorizer
-import zlib
 
 
 def pytest_configure(config):
@@ -32,7 +31,7 @@ def nlp_20news():
         twenty_train = fetch_20newsgroups(subset='train',
                                           shuffle=True,
                                           random_state=42)
-    except (IOError, zlib.error):
+    except:  # noqa E722
         pytest.xfail(reason="Error fetching 20 newsgroup dataset")
 
     count_vect = CountVectorizer()