Skip to content

Commit

Permalink
Fix benchmark image. (#14376)
Browse files Browse the repository at this point in the history
Fixes benchmark image and benchmark diff.

I will revert the CI/build changes in a follow-up PR.

Authors:
   - Bradley Dice (https://github.com/bdice)

Approvers:
   - Ray Douglass (https://github.com/raydouglass)
  • Loading branch information
bdice authored Nov 8, 2023
1 parent b64e862 commit e63055a
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 140 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
skip_upload_pkgs: libcudf-example
docs-build:
if: github.ref_type == 'branch'
needs: python-build
#needs: python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
240 changes: 120 additions & 120 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,85 +12,85 @@ concurrency:
jobs:
pr-builder:
needs:
- checks
- conda-cpp-build
- conda-cpp-tests
- conda-python-build
- conda-python-cudf-tests
- conda-python-other-tests
- conda-java-tests
- conda-notebook-tests
#- checks
#- conda-cpp-build
#- conda-cpp-tests
#- conda-python-build
#- conda-python-cudf-tests
#- conda-python-other-tests
#- conda-java-tests
#- conda-notebook-tests
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- unit-tests-cudf-pandas
- pandas-tests
#- wheel-build-cudf
#- wheel-tests-cudf
#- wheel-build-dask-cudf
#- wheel-tests-dask-cudf
#- unit-tests-cudf-pandas
#- pandas-tests
#- pandas-tests-diff
#- pandas-tests-diff-comment
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
conda-python-cudf-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
test_script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
test_script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
# checks:
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# enable_check_generated_files: false
# conda-cpp-build:
# needs: checks
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# conda-cpp-tests:
# needs: conda-cpp-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# conda-python-build:
# needs: conda-cpp-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# conda-python-cudf-tests:
# needs: conda-python-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# test_script: "ci/test_python_cudf.sh"
# conda-python-other-tests:
# # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
# needs: conda-python-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# test_script: "ci/test_python_other.sh"
# conda-java-tests:
# needs: conda-cpp-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# node_type: "gpu-v100-latest-1"
# arch: "amd64"
# container_image: "rapidsai/ci-conda:latest"
# run_script: "ci/test_java.sh"
# conda-notebook-tests:
# needs: conda-python-build
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# node_type: "gpu-v100-latest-1"
# arch: "amd64"
# container_image: "rapidsai/ci-conda:latest"
# run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
#needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -99,53 +99,53 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-dask-cudf:
needs: wheel-tests-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: "ci/build_wheel_dask_cudf.sh"
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
pandas-tests:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
build_type: pull-request
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# wheel-build-cudf:
# needs: checks
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# script: "ci/build_wheel_cudf.sh"
# wheel-tests-cudf:
# needs: wheel-build-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# build_type: pull-request
# script: ci/test_wheel_cudf.sh
# wheel-build-dask-cudf:
# needs: wheel-tests-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
# build_type: pull-request
# script: "ci/build_wheel_dask_cudf.sh"
# wheel-tests-dask-cudf:
# needs: wheel-build-dask-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
# build_type: pull-request
# script: ci/test_wheel_dask_cudf.sh
# unit-tests-cudf-pandas:
# needs: wheel-build-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
# build_type: pull-request
# script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
# # run the Pandas unit tests using PR branch
# needs: wheel-build-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
# build_type: pull-request
# script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
#pandas-tests-diff:
# # diff the results of running the Pandas unit tests and publish a job summary
# needs: [pandas-tests-main, pandas-tests-pr]
Expand Down
16 changes: 9 additions & 7 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@ conda activate docs

rapids-print-env

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

#rapids-logger "Downloading artifacts from previous jobs"
#CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
#PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
#
#rapids-mamba-retry install \
# --channel "${CPP_CHANNEL}" \
# --channel "${PYTHON_CHANNEL}" \
# libcudf cudf dask-cudf
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf cudf dask-cudf
libcudf=23.10 cudf=23.10 dask-cudf=23.10

export RAPIDS_VERSION_NUMBER="23.10"
export RAPIDS_DOCS_DIR="$(mktemp -d)"
Expand Down
Binary file modified docs/cudf/source/_static/duckdb-benchmark-groupby-join.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
55 changes: 43 additions & 12 deletions docs/cudf/source/cudf_pandas/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ We reproduced the [Database-like ops benchmark](https://duckdblabs.github.io/db-
including a solution using `cudf.pandas`. Here are the results:

<figure>
<img src="../_static/duckdb-benchmark-groupby-join.png"
class="align-center" width="750"
alt="_static/duckdb-benchmark-groupby-join.png" />

![duckdb-benchmark-groupby-join](../_static/duckdb-benchmark-groupby-join.png)

<figcaption style="text-align: center;">Results of the <a
href="https://duckdblabs.github.io/db-benchmark/">Database-like ops
benchmark</a> including <span
Expand Down Expand Up @@ -46,7 +46,7 @@ source pandas/py-pandas/bin/activate
pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 # or cudf-cu11
```

5. Modify pandas join/group code to use `cudf.pandas`:
5. Modify pandas join/group code to use `cudf.pandas` and be compatible with pandas 1.5 APIs:

```bash
diff --git a/pandas/groupby-pandas.py b/pandas/groupby-pandas.py
Expand All @@ -59,15 +59,46 @@ index 58eeb26..2ddb209 100755

print("# groupby-pandas.py", flush=True)

diff --git a/pandas/join-pandas.py b/pandas/join-pandas.py
index f39beb0..655dd82 100755
--- a/pandas/join-pandas.py
+++ b/pandas/join-pandas.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env -S python3 -m cudf.pandas
diff --git a/pandas/join-pandas.py b/pandas/join-pandas.py
index f39beb0..a9ad651 100755
--- a/pandas/join-pandas.py
+++ b/pandas/join-pandas.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env -S python3 -m cudf.pandas

print("# join-pandas.py", flush=True)
print("# join-pandas.py", flush=True)

@@ -26,7 +26,7 @@ if len(src_jn_y) != 3:

print("loading datasets " + data_name + ", " + y_data_name[0] + ", " + y_data_name[1] + ", " + y_data_name[2], flush=True)

-x = pd.read_csv(src_jn_x, engine='pyarrow', dtype_backend='pyarrow')
+x = pd.read_csv(src_jn_x, engine='pyarrow')

# x['id1'] = x['id1'].astype('Int32')
# x['id2'] = x['id2'].astype('Int32')
@@ -35,17 +35,17 @@ x['id4'] = x['id4'].astype('category') # remove after datatable#1691
x['id5'] = x['id5'].astype('category')
x['id6'] = x['id6'].astype('category')

-small = pd.read_csv(src_jn_y[0], engine='pyarrow', dtype_backend='pyarrow')
+small = pd.read_csv(src_jn_y[0], engine='pyarrow')
# small['id1'] = small['id1'].astype('Int32')
small['id4'] = small['id4'].astype('category')
# small['v2'] = small['v2'].astype('float64')
-medium = pd.read_csv(src_jn_y[1], engine='pyarrow', dtype_backend='pyarrow')
+medium = pd.read_csv(src_jn_y[1], engine='pyarrow')
# medium['id1'] = medium['id1'].astype('Int32')
# medium['id2'] = medium['id2'].astype('Int32')
medium['id4'] = medium['id4'].astype('category')
medium['id5'] = medium['id5'].astype('category')
# medium['v2'] = medium['v2'].astype('float64')
-big = pd.read_csv(src_jn_y[2], engine='pyarrow', dtype_backend='pyarrow')
+big = pd.read_csv(src_jn_y[2], engine='pyarrow')
# big['id1'] = big['id1'].astype('Int32')
# big['id2'] = big['id2'].astype('Int32')
# big['id3'] = big['id3'].astype('Int32')
```
6. Run Modified pandas benchmarks:
Expand Down

0 comments on commit e63055a

Please sign in to comment.