From 1a21a3e6e58297ce3636730df0217464230170ce Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 17 Dec 2024 12:18:16 -0800 Subject: [PATCH 1/6] Fix failing Xgboost test in the cudf.pandas third-party integration tests --- .github/workflows/pr.yaml | 14 +++++++++++++- .../tests/test_xgboost.py | 10 +++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 49ca5ca0fb9..8f2b6a5a90b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -39,6 +39,7 @@ jobs: - unit-tests-cudf-pandas - pandas-tests - pandas-tests-diff + - third-party-integration-tests-cudf-pandas - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 @@ -326,7 +327,18 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" - + # TODO: Remove this CI job after https://github.com/rapidsai/cudf/issues/17490 is resolved + third-party-integration-tests-cudf-pandas: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: | + ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml telemetry-summarize: runs-on: ubuntu-latest needs: pr-builder diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 0fd632507a6..9dfc09da863 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -113,9 +113,6 @@ def test_with_external_memory( return predt -@pytest.mark.skip( - reason="TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly." -) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_predict(device: str) -> np.ndarray: reg = xgb.XGBRegressor(n_estimators=2, device=device) @@ -127,6 +124,13 @@ def test_predict(device: str) -> np.ndarray: predt0 = reg.predict(X_df) predt1 = booster.inplace_predict(X_df) + # After https://github.com/dmlc/xgboost/pull/11014, .inplace_predict() + # returns a real cupy array when called on a cudf.pandas proxy dataframe. + # So we need to ensure we have a valid numpy array. + # TODO: We should remove the call to .get() when .inplace_predict() + # returns a cudf.pandas proxy numpy array + if not isinstance(predt1, np.ndarray): + predt1 = predt1.get() np.testing.assert_allclose(predt0, predt1) predt2 = booster.predict(xgb.DMatrix(X_df)) From 11d37f729a59ec6a06d30c66fb3d1f1fa26375c2 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Tue, 17 Dec 2024 19:46:25 -0500 Subject: [PATCH 2/6] Update .github/workflows/pr.yaml Co-authored-by: Bradley Dice --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 8f2b6a5a90b..1bbbfb94f82 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -331,7 +331,7 @@ jobs: third-party-integration-tests-cudf-pandas: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" From a36826242b457b8045d78dfcf6ca7c09896719a1 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:43:06 -0500 Subject: [PATCH 3/6] Update python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py --- .../third_party_integration_tests/tests/test_xgboost.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 9dfc09da863..ba98273404d 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -127,8 +127,6 @@ def test_predict(device: str) -> np.ndarray: # After https://github.com/dmlc/xgboost/pull/11014, .inplace_predict() # returns a real cupy array when called on a cudf.pandas proxy dataframe. # So we need to ensure we have a valid numpy array. - # TODO: We should remove the call to .get() when .inplace_predict() - # returns a cudf.pandas proxy numpy array if not isinstance(predt1, np.ndarray): predt1 = predt1.get() np.testing.assert_allclose(predt0, predt1) From ea12b3148a970d32fbc5e02694e5faf6bde17efb Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:50:47 -0500 Subject: [PATCH 4/6] Update .github/workflows/pr.yaml --- .github/workflows/pr.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1bbbfb94f82..c4dd485cf45 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -39,7 +39,6 @@ jobs: - unit-tests-cudf-pandas - pandas-tests - pandas-tests-diff - - third-party-integration-tests-cudf-pandas - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 From f29856929560a3444e4fbc66cb4b096351270cfa Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:50:51 -0500 Subject: [PATCH 5/6] Update .github/workflows/pr.yaml --- .github/workflows/pr.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c4dd485cf45..cbce8615e53 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -326,18 +326,6 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" - # TODO: Remove this CI job after https://github.com/rapidsai/cudf/issues/17490 is resolved - third-party-integration-tests-cudf-pandas: - needs: wheel-build-cudf - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 - with: - build_type: pull-request - node_type: "gpu-v100-latest-1" - arch: "amd64" - container_image: "rapidsai/ci-conda:latest" - run_script: | - ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml telemetry-summarize: runs-on: ubuntu-latest needs: pr-builder From 616b3cf8e2fe32887aab50583b81090a72c8c649 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:52:45 -0500 Subject: [PATCH 6/6] Update pr.yaml --- .github/workflows/pr.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index cbce8615e53..49ca5ca0fb9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -326,6 +326,7 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" + telemetry-summarize: runs-on: ubuntu-latest needs: pr-builder