From 4851eb1b0439480da2da7ee89e20a57482c5f255 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 18 Mar 2021 18:03:01 +0000 Subject: [PATCH 1/4] Adding make_pipeline + test score with pipeline --- python/cuml/pipeline/__init__.py | 12 ++++++----- python/cuml/test/test_meta_estimators.py | 26 +++++++++++++++++++----- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/python/cuml/pipeline/__init__.py b/python/cuml/pipeline/__init__.py index 4bed8639b6..1a7c1dd2f5 100644 --- a/python/cuml/pipeline/__init__.py +++ b/python/cuml/pipeline/__init__.py @@ -14,14 +14,16 @@ # limitations under the License. # -from sklearn.pipeline import Pipeline +from sklearn.pipeline import Pipeline, make_pipeline - -Pipeline.__doc__ = """ +disclaimer = """ This code is developed and maintained by scikit-learn and imported by cuML to maintain the familiar sklearn namespace structure. cuML includes tests to ensure full compatibility of these wrappers with CUDA-based data and cuML estimators, but all of the underlying code -is due to the scikit-learn developers.\n\n""" + Pipeline.__doc__ +is due to the scikit-learn developers.\n\n""" + +Pipeline.__doc__ = disclaimer + Pipeline.__doc__ +make_pipeline.__doc__ = disclaimer + make_pipeline.__doc__ -__all__ = ['Pipeline'] +__all__ = ['Pipeline', 'make_pipeline'] diff --git a/python/cuml/test/test_meta_estimators.py b/python/cuml/test/test_meta_estimators.py index 07503e5878..74add81658 100644 --- a/python/cuml/test/test_meta_estimators.py +++ b/python/cuml/test/test_meta_estimators.py @@ -18,7 +18,7 @@ import cuml import cupy -from cuml.pipeline import Pipeline +from cuml.pipeline import Pipeline, make_pipeline from cuml.model_selection import GridSearchCV from cuml.test.utils import ClassEnumerator @@ -73,30 +73,46 @@ def classification_dataset(request): 'MBSGDRegressor', 'RandomForestRegressor', 'KNeighborsRegressor']) -def test_pipeline_with_regression(regression_dataset, model_key): +@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline']) +def test_pipeline_with_regression(regression_dataset, model_key, + instantiation): X_train, X_test, y_train, y_test = regression_dataset model_const = models[model_key] if model_key == 'RandomForestRegressor': model = model_const(n_bins=2) else: model = model_const() - pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)]) + + if instantiation == 'Pipeline': + pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)]) + elif instantiation == 'make_pipeline': + pipe = make_pipeline(StandardScaler(), model) pipe.fit(X_train, y_train) prediction = pipe.predict(X_test) assert isinstance(prediction, cupy.ndarray) + _ = pipe.score(X_test, y_test) @pytest.mark.parametrize('model_key', ['MBSGDClassifier', 'RandomForestClassifier', 'KNeighborsClassifier']) -def test_pipeline_with_classification(classification_dataset, model_key): +@pytest.mark.parametrize('instantiation', ['Pipeline', 'make_pipeline']) +def test_pipeline_with_classification(classification_dataset, model_key, + instantiation): X_train, X_test, y_train, y_test = classification_dataset model_const = models[model_key] if model_key == 'RandomForestClassifier': model = model_const(n_bins=2) else: model = model_const() - pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)]) + if instantiation == 'Pipeline': + pipe = Pipeline(steps=[('scaler', StandardScaler()), ('model', model)]) + elif instantiation == 'make_pipeline': + pipe = make_pipeline(StandardScaler(), model) pipe.fit(X_train, y_train) prediction = pipe.predict(X_test) assert isinstance(prediction, cupy.ndarray) + if model_key == 'RandomForestClassifier': + pytest.skip("RandomForestClassifier is not yet supported:" + "by the Pipeline utility") + _ = pipe.score(X_test, y_test) From eb2cc3b053cabff9bd6a06e054622dd8b73d5e69 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 18 Mar 2021 18:06:45 +0000 Subject: [PATCH 2/4] Fix typo --- python/cuml/test/test_meta_estimators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/test_meta_estimators.py b/python/cuml/test/test_meta_estimators.py index 74add81658..c430e742c3 100644 --- a/python/cuml/test/test_meta_estimators.py +++ b/python/cuml/test/test_meta_estimators.py @@ -113,6 +113,6 @@ def test_pipeline_with_classification(classification_dataset, model_key, prediction = pipe.predict(X_test) assert isinstance(prediction, cupy.ndarray) if model_key == 'RandomForestClassifier': - pytest.skip("RandomForestClassifier is not yet supported:" + pytest.skip("RandomForestClassifier is not yet supported" "by the Pipeline utility") _ = pipe.score(X_test, y_test) From 6ee545ce4633a299886a1bab99f492436d4ef973 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Tue, 23 Mar 2021 13:36:29 +0000 Subject: [PATCH 3/4] RAFT downgrade --- cpp/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index f0b7a35690..1b59e52e22 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 6455e05b3889db2b495cf3189b33c2b07bfbebf2 + GIT_TAG fc46618d76d70710b07d445e79d3e07dea6cad2f PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" From 81512ed8c99d41adc79840b1d89ebfac1aa4096c Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 24 Mar 2021 09:48:21 +0000 Subject: [PATCH 4/4] Catch all fetch_20newsgroups exceptions --- python/cuml/test/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cuml/test/conftest.py b/python/cuml/test/conftest.py index 07c9a42de3..c4a49e4019 100644 --- a/python/cuml/test/conftest.py +++ b/python/cuml/test/conftest.py @@ -19,7 +19,6 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.datasets import fetch_california_housing from sklearn.feature_extraction.text import CountVectorizer -import zlib def pytest_configure(config): @@ -32,7 +31,7 @@ def nlp_20news(): twenty_train = fetch_20newsgroups(subset='train', shuffle=True, random_state=42) - except (IOError, zlib.error): + except: # noqa E722 pytest.xfail(reason="Error fetching 20 newsgroup dataset") count_vect = CountVectorizer()