From 0ee9ac0084730591435c86feb691b73c98981514 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Dec 2024 13:31:43 -0800 Subject: [PATCH 01/25] Replaces uses of cudf._lib.Column.from_unique_ptr with pylibcudf.Column.from_libcudf --- python/cudf/cudf/_lib/strings_udf.pyx | 8 ++++---- python/cudf/cudf/_lib/utils.pyx | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx index dd2fafbe07f..83f0cb850a5 100644 --- a/python/cudf/cudf/_lib/strings_udf.pyx +++ b/python/cudf/cudf/_lib/strings_udf.pyx @@ -1,7 +1,6 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. from libc.stdint cimport uint8_t, uint16_t, uintptr_t - from pylibcudf.libcudf.strings_udf cimport ( get_character_cases_table as cpp_get_character_cases_table, get_character_flags_table as cpp_get_character_flags_table, @@ -27,6 +26,7 @@ from rmm.librmm.device_buffer cimport device_buffer from rmm.pylibrmm.device_buffer cimport DeviceBuffer from cudf._lib.column cimport Column +from pylibcudf cimport Column as plc_Column def get_cuda_build_version(): @@ -52,9 +52,9 @@ def column_from_udf_string_array(DeviceBuffer d_buffer): c_result = move(cpp_column_from_udf_string_array(data, size)) cpp_free_udf_string_array(data, size) - result = Column.from_unique_ptr(move(c_result)) - - return result + return Column.from_pylibcudf( + plc_Column.from_libcudf(move(c_result)) + ) def get_character_flags_table_ptr(): diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 6b3f10e1806..ff032656f80 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -16,7 +16,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column - +from pylibcudf cimport Column as plc_Column try: import ujson as json except ImportError: @@ -223,10 +223,11 @@ cdef columns_from_unique_ptr( cdef size_t i - columns = [Column.from_unique_ptr(move(dereference(it+i))) - for i in range(c_columns.size())] - - return columns + return [ + Column.from_pylibcudf( + plc_Column.from_libcudf(move(dereference(it+i))) + ) for i in range(c_columns.size()) + ] cpdef columns_from_pylibcudf_table(tbl): From 76315b0a83fc31210085aadd3c81b61586c86be0 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Dec 2024 15:15:54 -0800 Subject: [PATCH 02/25] migrate changes from Propagate failures in pandas integration tests --- .../third-party-integration/test.sh | 16 +++++++++++----- .../tests/test_stumpy_distributed.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh index f8ddbaba0f3..30e3ffc9a43 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/test.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh @@ -26,6 +26,8 @@ main() { LIBS=${LIBS#[} LIBS=${LIBS%]} + ANY_FAILURES=0 + for lib in ${LIBS//,/ }; do lib=$(echo "$lib" | tr -d '""') echo "Running tests for library $lib" @@ -56,10 +58,6 @@ main() { rapids-logger "Check GPU usage" nvidia-smi - EXITCODE=0 - trap "EXITCODE=1" ERR - set +e - rapids-logger "pytest ${lib}" NUM_PROCESSES=8 @@ -72,12 +70,20 @@ main() { fi done + EXITCODE=0 + trap "EXITCODE=1" ERR + set +e + TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib} + set -e rapids-logger "Test script exiting with value: ${EXITCODE}" + if [[ ${EXITCODE} != 0 ]]; then + ANY_FAILURES=1 + fi done - exit ${EXITCODE} + exit ${ANY_FAILURES} } main "$@" diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py index 0777d982ac2..f275659288e 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py @@ -41,7 +41,7 @@ def test_multidimensional_distributed_timeseries(dask_client): rng = np.random.default_rng(seed=42) # Each row represents data from a different dimension while each column represents # data from the same dimension - your_time_series = rng.random(3, 1000) + your_time_series = rng.random((3, 1000)) # Approximately, how many data points might be found in a pattern window_size = 50 From 719da3af09cf345526c68b5bde4528027ec2801c Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Dec 2024 15:20:27 -0800 Subject: [PATCH 03/25] clean up --- python/cudf/cudf/_lib/strings_udf.pyx | 7 +++---- python/cudf/cudf/_lib/utils.pyx | 11 +++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx index 83f0cb850a5..b77ca0c1ce5 100644 --- a/python/cudf/cudf/_lib/strings_udf.pyx +++ b/python/cudf/cudf/_lib/strings_udf.pyx @@ -26,7 +26,6 @@ from rmm.librmm.device_buffer cimport device_buffer from rmm.pylibrmm.device_buffer cimport DeviceBuffer from cudf._lib.column cimport Column -from pylibcudf cimport Column as plc_Column def get_cuda_build_version(): @@ -52,9 +51,9 @@ def column_from_udf_string_array(DeviceBuffer d_buffer): c_result = move(cpp_column_from_udf_string_array(data, size)) cpp_free_udf_string_array(data, size) - return Column.from_pylibcudf( - plc_Column.from_libcudf(move(c_result)) - ) + result = Column.from_unique_ptr(move(c_result)) + + return result def get_character_flags_table_ptr(): diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index ff032656f80..6b3f10e1806 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -16,7 +16,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from pylibcudf cimport Column as plc_Column + try: import ujson as json except ImportError: @@ -223,11 +223,10 @@ cdef columns_from_unique_ptr( cdef size_t i - return [ - Column.from_pylibcudf( - plc_Column.from_libcudf(move(dereference(it+i))) - ) for i in range(c_columns.size()) - ] + columns = [Column.from_unique_ptr(move(dereference(it+i))) + for i in range(c_columns.size())] + + return columns cpdef columns_from_pylibcudf_table(tbl): From 29798e96b008457070ac5ee24c3c20545e450df5 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:48:51 -0500 Subject: [PATCH 04/25] remove deleted line --- python/cudf/cudf/_lib/strings_udf.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx index b77ca0c1ce5..dd2fafbe07f 100644 --- a/python/cudf/cudf/_lib/strings_udf.pyx +++ b/python/cudf/cudf/_lib/strings_udf.pyx @@ -1,6 +1,7 @@ # Copyright (c) 2022-2024, NVIDIA CORPORATION. from libc.stdint cimport uint8_t, uint16_t, uintptr_t + from pylibcudf.libcudf.strings_udf cimport ( get_character_cases_table as cpp_get_character_cases_table, get_character_flags_table as cpp_get_character_flags_table, From 5b870f49ab1d458d3699963951182c468e00fd8f Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 6 Dec 2024 04:45:29 -0800 Subject: [PATCH 05/25] add to ci job --- .github/workflows/pr.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 7c0bd6d52e2..fe309852a45 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -40,6 +40,7 @@ jobs: - pandas-tests - pandas-tests-diff - telemetry-setup + - third-party-integration-tests-cudf-pandas secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() @@ -325,6 +326,17 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" + third-party-integration-tests-cudf-pandas: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: | + ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml telemetry-summarize: runs-on: ubuntu-latest From 30a9391a6c18da5a5c39b992dc12ee83a715888e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 6 Dec 2024 12:20:44 -0800 Subject: [PATCH 06/25] xfail failing tests --- .../third_party_integration_tests/tests/test_holoviews.py | 3 +++ .../third_party_integration_tests/tests/test_matplotlib.py | 3 +++ .../third_party_integration_tests/tests/test_numpy.py | 3 +++ .../third_party_integration_tests/tests/test_pytorch.py | 3 +++ .../third_party_integration_tests/tests/test_seaborn.py | 3 +++ .../third_party_integration_tests/tests/test_tensorflow.py | 1 + .../third_party_integration_tests/tests/test_xgboost.py | 3 +++ 7 files changed, 19 insertions(+) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index bef02c86355..7ac26deb673 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -75,5 +75,8 @@ def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_holoviews_hexbin(df): return get_plot_info(hv.HexTiles(df, kdims=["x", "y"], vdims="y")) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py index 1909392b9f7..6dc23017b38 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py @@ -33,6 +33,9 @@ def assert_plots_equal(expect, got): pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal) +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_line(): df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py index 472f1889354..3f332e6fd60 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py @@ -37,6 +37,9 @@ def test_numpy_dot(df): return np.dot(df, df.T) +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_numpy_fft(sr): fft = np.fft.fft(sr) return fft diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py index ad287471aa0..57655547161 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py @@ -116,6 +116,9 @@ def test_torch_train(data): return model(test_x1, test_x2) +@pytest.mark.xfail( + reason="AssertionError: The values for attribute 'device' do not match: cpu != cuda:0." +) def test_torch_tensor_ctor(): s = pd.Series(range(5)) return torch.tensor(s.values) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py index 021c5bac9b7..b1a5bc6c3c4 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py @@ -54,6 +54,9 @@ def test_scatter(df): return ax +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_lineplot_with_sns_data(): df = sns.load_dataset("flights") ax = sns.lineplot(data=df, x="month", y="passengers") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py index ba1f518cbfd..b4fad3024e7 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py @@ -271,6 +271,7 @@ def call(self, values): return tf.concat(values, axis=-1) +@pytest.mark.xfail(reason="ValueError: Invalid dtype: object") def test_full_example_train_with_df(df, target): # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example # Inputs are directly passed as dictionary of series diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 70f1e6a4250..fb8de793290 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -113,6 +113,9 @@ def test_with_external_memory( return predt +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_predict(device: str) -> np.ndarray: reg = xgb.XGBRegressor(n_estimators=2, device=device) From ce69e269726a7e13fbc3d656ffdd8573c1755a4e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 6 Dec 2024 19:31:33 -0800 Subject: [PATCH 07/25] remove xdist worksteal strategy --- .../third-party-integration/run-library-tests.sh | 4 ---- .../third_party_integration_tests/tests/test_holoviews.py | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh index d44d25d658c..761adaead28 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -23,8 +23,6 @@ runtest() { -v \ --continue-on-collection-errors \ --cache-clear \ - --numprocesses=${NUM_PROCESSES} \ - --dist=worksteal \ ${TEST_DIR}/test_${lib}*.py } @@ -42,8 +40,6 @@ main() { -v \ --continue-on-collection-errors \ --cache-clear \ - --numprocesses=${NUM_PROCESSES} \ - --dist=worksteal \ ${TEST_DIR}/test_${lib}*.py } diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index 7ac26deb673..4310f233fd0 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -71,6 +71,9 @@ def test_holoviews_heatmap(df): ) +@pytest.mark.xfail( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) From 4c327b25eaa6a47a09c4bc4f3932b6179deef929 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:03:42 -0500 Subject: [PATCH 08/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py --- .../third_party_integration_tests/tests/test_xgboost.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index fb8de793290..70f1e6a4250 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -113,9 +113,6 @@ def test_with_external_memory( return predt -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_predict(device: str) -> np.ndarray: reg = xgb.XGBRegressor(n_estimators=2, device=device) From 581f938fb26dcbc5b4361eceee63ac70641b49f9 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:03:48 -0500 Subject: [PATCH 09/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py --- .../third_party_integration_tests/tests/test_tensorflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py index b4fad3024e7..ba1f518cbfd 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py @@ -271,7 +271,6 @@ def call(self, values): return tf.concat(values, axis=-1) -@pytest.mark.xfail(reason="ValueError: Invalid dtype: object") def test_full_example_train_with_df(df, target): # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example # Inputs are directly passed as dictionary of series From 688a5618d416f40119f01c0768e0a73450d45574 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:03:55 -0500 Subject: [PATCH 10/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py --- .../third_party_integration_tests/tests/test_seaborn.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py index b1a5bc6c3c4..021c5bac9b7 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py @@ -54,9 +54,6 @@ def test_scatter(df): return ax -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) def test_lineplot_with_sns_data(): df = sns.load_dataset("flights") ax = sns.lineplot(data=df, x="month", y="passengers") From a47fb4fd8257d27b898b4389a0e89b23e0959737 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:03:59 -0500 Subject: [PATCH 11/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py --- .../third_party_integration_tests/tests/test_pytorch.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py index 57655547161..ad287471aa0 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py @@ -116,9 +116,6 @@ def test_torch_train(data): return model(test_x1, test_x2) -@pytest.mark.xfail( - reason="AssertionError: The values for attribute 'device' do not match: cpu != cuda:0." -) def test_torch_tensor_ctor(): s = pd.Series(range(5)) return torch.tensor(s.values) From dcd54e475f09dcfa9fe9be823581c819e70eeca0 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:04:04 -0500 Subject: [PATCH 12/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py --- .../third_party_integration_tests/tests/test_numpy.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py index 3f332e6fd60..472f1889354 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py @@ -37,9 +37,6 @@ def test_numpy_dot(df): return np.dot(df, df.T) -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) def test_numpy_fft(sr): fft = np.fft.fft(sr) return fft From 8090316324f1d028da443cc56150ffe1cf1442af Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:04:09 -0500 Subject: [PATCH 13/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py --- .../third_party_integration_tests/tests/test_matplotlib.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py index 6dc23017b38..1909392b9f7 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py @@ -33,9 +33,6 @@ def assert_plots_equal(expect, got): pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal) -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) def test_line(): df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-") From 22cc7089241867ed57a412aab5bd7e3b4393318e Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:04:23 -0500 Subject: [PATCH 14/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py --- .../third_party_integration_tests/tests/test_holoviews.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index 4310f233fd0..4ba906aeeba 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -78,8 +78,5 @@ def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) def test_holoviews_hexbin(df): return get_plot_info(hv.HexTiles(df, kdims=["x", "y"], vdims="y")) From 725bcf321cdc2806d519d6d9699c2a835807b0c0 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:04:29 -0500 Subject: [PATCH 15/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py --- .../third_party_integration_tests/tests/test_holoviews.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index 4ba906aeeba..bef02c86355 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -71,9 +71,6 @@ def test_holoviews_heatmap(df): ) -@pytest.mark.xfail( - reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" -) def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) From 5106ab5953ed479d5dd50d47ddface7f8b4fa8d2 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 9 Dec 2024 07:33:23 -0800 Subject: [PATCH 16/25] fix a bug and xfail a test --- .../third_party_integration_tests/tests/test_tensorflow.py | 1 + .../third_party_integration_tests/tests/test_xgboost.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py index ba1f518cbfd..b4fad3024e7 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py @@ -271,6 +271,7 @@ def call(self, values): return tf.concat(values, axis=-1) +@pytest.mark.xfail(reason="ValueError: Invalid dtype: object") def test_full_example_train_with_df(df, target): # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example # Inputs are directly passed as dictionary of series diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 70f1e6a4250..4f2556f12a8 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -123,7 +123,7 @@ def test_predict(device: str) -> np.ndarray: predt0 = reg.predict(X_df) - predt1 = booster.inplace_predict(X_df) + predt1 = booster.inplace_predict(X_df).get() np.testing.assert_allclose(predt0, predt1) predt2 = booster.predict(xgb.DMatrix(X_df)) From 75641d51fa79f883705a60d2f76ea02b0f0f3f66 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:30:07 -0500 Subject: [PATCH 17/25] remove get call --- .../third_party_integration_tests/tests/test_xgboost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 4f2556f12a8..70f1e6a4250 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -123,7 +123,7 @@ def test_predict(device: str) -> np.ndarray: predt0 = reg.predict(X_df) - predt1 = booster.inplace_predict(X_df).get() + predt1 = booster.inplace_predict(X_df) np.testing.assert_allclose(predt0, predt1) predt2 = booster.predict(xgb.DMatrix(X_df)) From 5b11e6671b02dbda18d282b3a686b3fb14c30887 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 11 Dec 2024 07:55:04 -0800 Subject: [PATCH 18/25] skip tests --- .../third-party-integration/run-library-tests.sh | 6 ++++++ .../third_party_integration_tests/tests/test_holoviews.py | 3 +++ .../third_party_integration_tests/tests/test_matplotlib.py | 6 ++++++ .../third_party_integration_tests/tests/test_numpy.py | 3 +++ .../third_party_integration_tests/tests/test_pytorch.py | 3 +++ .../third_party_integration_tests/tests/test_seaborn.py | 3 +++ .../third_party_integration_tests/tests/test_xgboost.py | 3 +++ 7 files changed, 27 insertions(+) diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh index 761adaead28..7f2ab32412c 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -20,9 +20,12 @@ runtest() { pytest \ $plugin \ + --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ + --numprocesses=${NUM_PROCESSES} \ + --dist=worksteal \ ${TEST_DIR}/test_${lib}*.py } @@ -37,9 +40,12 @@ main() { pytest \ --compare \ -p cudf.pandas \ + --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ + --numprocesses=${NUM_PROCESSES} \ + --dist=worksteal \ ${TEST_DIR}/test_${lib}*.py } diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index bef02c86355..8be48953974 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -71,6 +71,9 @@ def test_holoviews_heatmap(df): ) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py index 1909392b9f7..c91808021e8 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py @@ -33,6 +33,9 @@ def assert_plots_equal(expect, got): pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_line(): df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-") @@ -40,6 +43,9 @@ def test_line(): return plt.gca() +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_bar(): data = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) ax = data.plot(kind="bar") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py index 472f1889354..4d35d9e8946 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py @@ -37,6 +37,9 @@ def test_numpy_dot(df): return np.dot(df, df.T) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_numpy_fft(sr): fft = np.fft.fft(sr) return fft diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py index ad287471aa0..7cea635afc4 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py @@ -116,6 +116,9 @@ def test_torch_train(data): return model(test_x1, test_x2) +@pytest.mark.skip( + reason="AssertionError: The values for attribute 'device' do not match: cpu != cuda:0." +) def test_torch_tensor_ctor(): s = pd.Series(range(5)) return torch.tensor(s.values) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py index 021c5bac9b7..f6a8a96ae3c 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py @@ -54,6 +54,9 @@ def test_scatter(df): return ax +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_lineplot_with_sns_data(): df = sns.load_dataset("flights") ax = sns.lineplot(data=df, x="month", y="passengers") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 70f1e6a4250..0fd632507a6 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -113,6 +113,9 @@ def test_with_external_memory( return predt +@pytest.mark.skip( + reason="TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly." +) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_predict(device: str) -> np.ndarray: reg = xgb.XGBRegressor(n_estimators=2, device=device) From 0b4f5fa4a210c2905146ece641f7ed00454a3a1c Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 11 Dec 2024 12:07:45 -0800 Subject: [PATCH 19/25] import or skip catboost --- .../third-party-integration/run-library-tests.sh | 2 -- .../third_party_integration_tests/tests/test_catboost.py | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh index 7f2ab32412c..d44d25d658c 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -20,7 +20,6 @@ runtest() { pytest \ $plugin \ - --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ @@ -40,7 +39,6 @@ main() { pytest \ --compare \ -p cudf.pandas \ - --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py index 04cc69231fe..a1c2b33b344 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py @@ -3,7 +3,13 @@ import numpy as np import pandas as pd import pytest -from catboost import CatBoostClassifier, CatBoostRegressor, Pool + +try: + from catboost import CatBoostClassifier, CatBoostRegressor, Pool +except Exception: + pytest.skip( + "ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject" + ) from sklearn.datasets import make_classification, make_regression rng = np.random.default_rng(seed=42) From 4a2f78af57d5c6ba8b6611d7b169900c393b3886 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 11 Dec 2024 18:04:17 -0500 Subject: [PATCH 20/25] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py --- .../third_party_integration_tests/tests/test_catboost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py index a1c2b33b344..ca472041458 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py @@ -8,7 +8,7 @@ from catboost import CatBoostClassifier, CatBoostRegressor, Pool except Exception: pytest.skip( - "ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject" + reason="ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject", allow_module_level=True ) from sklearn.datasets import make_classification, make_regression From 3d70643414d10e881ec71e1427fceb2ef37dc6d4 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 11 Dec 2024 19:08:58 -0500 Subject: [PATCH 21/25] clean up --- .../third_party_integration_tests/tests/test_catboost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py index ca472041458..6f9db8194c0 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py @@ -8,7 +8,8 @@ from catboost import CatBoostClassifier, CatBoostRegressor, Pool except Exception: pytest.skip( - reason="ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject", allow_module_level=True + reason="ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject", + allow_module_level=True, ) from sklearn.datasets import make_classification, make_regression From ead0792494f88081a5010655702f64087b63b637 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 11 Dec 2024 22:06:20 -0500 Subject: [PATCH 22/25] ignore catboost tests --- .../third-party-integration/run-library-tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh index d44d25d658c..7f2ab32412c 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -20,6 +20,7 @@ runtest() { pytest \ $plugin \ + --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ @@ -39,6 +40,7 @@ main() { pytest \ --compare \ -p cudf.pandas \ + --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ From 93d15a90e5cb178d0be5c8e43dd9259f2b667d5b Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 12 Dec 2024 06:04:00 -0800 Subject: [PATCH 23/25] remove catboost tests --- .../run-library-tests.sh | 2 - .../dependencies.yaml | 15 -- .../tests/test_catboost.py | 136 ------------------ 3 files changed, 153 deletions(-) delete mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh index 7f2ab32412c..d44d25d658c 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -20,7 +20,6 @@ runtest() { pytest \ $plugin \ - --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ @@ -40,7 +39,6 @@ main() { pytest \ --compare \ -p cudf.pandas \ - --ignore=${TEST_DIR}/test_catboost.py \ -v \ --continue-on-collection-errors \ --cache-clear \ diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml index e726b7fdca1..3891110e9d3 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml @@ -76,13 +76,6 @@ files: - py_version - test_base - test_xgboost - test_catboost: - output: none - includes: - - cuda_version - - py_version - - test_base - - test_catboost test_cuml: output: none includes: @@ -251,14 +244,6 @@ dependencies: - pip - pip: - xgboost>=2.0.1 - test_catboost: - common: - - output_types: conda - packages: - - numpy - - scipy - - scikit-learn - - catboost test_cuml: common: - output_types: conda diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py deleted file mode 100644 index 6f9db8194c0..00000000000 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -import numpy as np -import pandas as pd -import pytest - -try: - from catboost import CatBoostClassifier, CatBoostRegressor, Pool -except Exception: - pytest.skip( - reason="ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject", - allow_module_level=True, - ) -from sklearn.datasets import make_classification, make_regression - -rng = np.random.default_rng(seed=42) - - -def assert_catboost_equal(expect, got, rtol=1e-7, atol=0.0): - if isinstance(expect, (tuple, list)): - assert len(expect) == len(got) - for e, g in zip(expect, got): - assert_catboost_equal(e, g, rtol, atol) - elif isinstance(expect, np.ndarray): - np.testing.assert_allclose(expect, got, rtol=rtol, atol=atol) - elif isinstance(expect, pd.DataFrame): - pd.testing.assert_frame_equal(expect, got) - elif isinstance(expect, pd.Series): - pd.testing.assert_series_equal(expect, got) - else: - assert expect == got - - -pytestmark = pytest.mark.assert_eq(fn=assert_catboost_equal) - - -@pytest.fixture -def regression_data(): - X, y = make_regression(n_samples=100, n_features=10, random_state=42) - return pd.DataFrame(X), pd.Series(y) - - -@pytest.fixture -def classification_data(): - X, y = make_classification( - n_samples=100, n_features=10, n_classes=2, random_state=42 - ) - return pd.DataFrame(X), pd.Series(y) - - -def test_catboost_regressor_with_dataframe(regression_data): - X, y = regression_data - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -def test_catboost_regressor_with_numpy(regression_data): - X, y = regression_data - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X.values, y.values) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_classifier_with_dataframe(classification_data): - X, y = classification_data - model = CatBoostClassifier(iterations=10, verbose=0) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -def test_catboost_classifier_with_numpy(classification_data): - X, y = classification_data - model = CatBoostClassifier(iterations=10, verbose=0) - model.fit(X.values, y.values) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_with_pool_and_dataframe(regression_data): - X, y = regression_data - train_pool = Pool(X, y) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(train_pool) - predictions = model.predict(X) - return predictions - - -def test_catboost_with_pool_and_numpy(regression_data): - X, y = regression_data - train_pool = Pool(X.values, y.values) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(train_pool) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_with_categorical_features(): - data = { - "numerical_feature": rng.standard_normal(100), - "categorical_feature": rng.choice(["A", "B", "C"], size=100), - "target": rng.integers(0, 2, size=100), - } - df = pd.DataFrame(data) - X = df[["numerical_feature", "categorical_feature"]] - y = df["target"] - cat_features = ["categorical_feature"] - model = CatBoostClassifier( - iterations=10, verbose=0, cat_features=cat_features - ) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -@pytest.mark.parametrize( - "X, y", - [ - ( - pd.DataFrame(rng.standard_normal((100, 5))), - pd.Series(rng.standard_normal(100)), - ), - (rng.standard_normal((100, 5)), rng.standard_normal(100)), - ], -) -def test_catboost_train_test_split(X, y): - from sklearn.model_selection import train_test_split - - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X_train, y_train) - predictions = model.predict(X_test) - return len(X_train), len(X_test), len(y_train), len(y_test), predictions From f2e7eb0f66e96eb259d11ee8c0c3e8893fc2f41f Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:32:58 -0500 Subject: [PATCH 24/25] Update .github/workflows/pr.yaml --- .github/workflows/pr.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fe309852a45..737a8248bf8 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -40,7 +40,6 @@ jobs: - pandas-tests - pandas-tests-diff - telemetry-setup - - third-party-integration-tests-cudf-pandas secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() From 80ea00e9ce8f204f037caa88804c4028b536d5c5 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:33:03 -0500 Subject: [PATCH 25/25] Update .github/workflows/pr.yaml --- .github/workflows/pr.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 737a8248bf8..7c0bd6d52e2 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -325,17 +325,6 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" - third-party-integration-tests-cudf-pandas: - needs: wheel-build-cudf - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 - with: - build_type: pull-request - node_type: "gpu-v100-latest-1" - arch: "amd64" - container_image: "rapidsai/ci-conda:latest" - run_script: | - ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml telemetry-summarize: runs-on: ubuntu-latest