diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f66f91bbb7..43f6d2bb5b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -13,7 +13,14 @@ jobs: pr-builder: needs: - checks - - changed_files + - conda-cpp-build + - conda-cpp-tests + - conda-python-build + - conda-python-cudf-tests + - conda-python-other-tests + - conda-java-tests + - conda-notebook-tests + - docs-build - wheel-build-cudf - wheel-build-cudf-main - wheel-tests-cudf @@ -31,50 +38,88 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.10 with: enable_check_generated_files: false - changed_files: - runs-on: ubuntu-latest - name: Test changed files - outputs: - cudf_files_changed: ${{ steps.changed_files.outputs.cudf_files_changed }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Get changed files in cudf - id: changed_files - run: | - git fetch origin branch-23.10-xdf - diff=$(git diff --name-only origin/branch-23.10-xdf..HEAD python/cudf) - if [ -n "$diff" ]; then - has_changes='true' - else - has_changes='false' - fi - echo "Found changed: ${has_changes}" - echo "cudf_files_changed=${has_changes}" >> "$GITHUB_OUTPUT" - wheel-build-cudf: + conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.10 with: - matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request - script: "ci/build_wheel_cudf.sh" + conda-cpp-tests: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.10 + with: + build_type: pull-request + conda-python-build: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.10 + with: + build_type: pull-request + conda-python-cudf-tests: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: pull-request + test_script: "ci/test_python_cudf.sh" + conda-python-other-tests: + # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.10 + with: + build_type: pull-request + test_script: "ci/test_python_other.sh" + conda-java-tests: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_java.sh" + conda-notebook-tests: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/test_notebooks.sh" + docs-build: + needs: conda-python-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/build_docs.sh" wheel-build-cudf-main: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 with: - matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request script: "ci/build_wheel_cudf.sh main" + wheel-build-cudf: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.10 + with: + build_type: pull-request + script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: - if: needs.changed_files.outputs.cudf_files_changed == 'true' - needs: [wheel-build-cudf, changed_files] + needs: wheel-build-cudf secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.10 with: - matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-dask-cudf: diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 7c8bca562e..27a3a84e3f 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,7 +9,6 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 -- aws-sdk-cpp<1.11 - benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 78c7c6ba49..eb229f15af 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -9,7 +9,6 @@ channels: - nvidia dependencies: - aiobotocore>=2.2.0 -- aws-sdk-cpp<1.11 - benchmark==1.8.0 - boto3>=1.21.21 - botocore>=1.24.21 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 54b687faa6..d3e15f70cc 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -103,6 +103,7 @@ requirements: - nvtx >=0.2.1 - packaging - cachetools + - rich test: requires: diff --git a/conda/recipes/cudf_kafka/build.sh b/conda/recipes/cudf_kafka/build.sh index 5d8720f1c9..f4bb6e1bc9 100644 --- a/conda/recipes/cudf_kafka/build.sh +++ b/conda/recipes/cudf_kafka/build.sh @@ -1,4 +1,16 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory +# Need to set CUDA_HOME inside conda environments because the hacked together +# setup.py for cudf-kafka searches that way. +# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates +# cudf_kafka to use scikit-build +CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*} +if [[ ${CUDA_MAJOR} == "12" ]]; then + target_name="x86_64-linux" + if [[ ! $(arch) == "x86_64" ]]; then + target_name="sbsa-linux" + fi + export CUDA_HOME="${PREFIX}/targets/${target_name}/" +fi ./build.sh -v cudf_kafka diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index ec0cc40251..a79c23b7d9 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -33,6 +33,9 @@ build: - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + - RAPIDS_CUDA_VERSION requirements: build: @@ -41,6 +44,11 @@ requirements: - {{ compiler('cxx') }} - ninja - sysroot_{{ target_platform }} {{ sysroot_version }} + # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates + # cudf_kafka to use scikit-build + {% if cuda_major == "12" %} + - cuda-gdb + {% endif %} host: - python - cython >=3.0.0 @@ -48,6 +56,9 @@ requirements: - cudf ={{ version }} - libcudf_kafka ={{ version }} - setuptools + {% if cuda_major == "12" %} + - cuda-cudart-dev + {% endif %} run: - python - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index b1f5b083e0..25b3f19de7 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -22,9 +22,6 @@ gbench_version: gtest_version: - ">=1.13.0" -aws_sdk_cpp_version: - - "<1.11" - libarrow_version: - "=12" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 28357f0d96..627065817b 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -74,7 +74,6 @@ requirements: - gtest {{ gtest_version }} - gmock {{ gtest_version }} - zlib {{ zlib_version }} - - aws-sdk-cpp {{ aws_sdk_cpp_version }} outputs: - name: libcudf @@ -108,7 +107,6 @@ outputs: - dlpack {{ dlpack_version }} - gtest {{ gtest_version }} - gmock {{ gtest_version }} - - aws-sdk-cpp {{ aws_sdk_cpp_version }} test: commands: - test -f $PREFIX/lib/libcudf.so diff --git a/dependencies.yaml b/dependencies.yaml index 92a1871029..9cb3c26a86 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -234,7 +234,6 @@ dependencies: - libkvikio==23.10.* - output_types: conda packages: - - aws-sdk-cpp<1.11 - fmt>=9.1.0,<10 - &gbench benchmark==1.8.0 - >est gtest>=1.13.0 diff --git a/docs/cudf/source/cudf_pandas/index.rst b/docs/cudf/source/cudf_pandas/index.rst index 83649020e6..a5edd435f0 100644 --- a/docs/cudf/source/cudf_pandas/index.rst +++ b/docs/cudf/source/cudf_pandas/index.rst @@ -31,31 +31,20 @@ Speeding up pandas with cuDF ``cudf.pandas`` can be used with Jupyter Notebooks or any Python script just by loading a notebook extension or adding one command-line flag: -.. list-table:: - :widths: 1 1 - :header-rows: 1 - * - Python Script - - Notebook - - * - .. code-block:: python - - import pandas as pd - df = pd.read_csv("filepath") - df.groupby("col").mean() - df.rolling(window=3).sum() - - # python -m cudf.pandas script.py - - - - .. code-block:: python - - %load_ext cudf.pandas - - import pandas as pd - df = pd.read_csv("filepath") - df.groupby("col").mean() - df.rolling(window=3).sum() ++--------------------------------------+------------------------------------+ +| Python Script | Notebook | ++======================================+====================================+ +| .. code-block:: python | .. code-block:: python | +| | | +| import pandas as pd | %load_ext cudf.pandas | +| df = pd.read_csv("filepath") | | +| df.groupby("col").mean() | import pandas as pd | +| df.rolling(window=3).sum() | df = pd.read_csv("filepath") | +| | df.groupby("col").mean() | +| # python -m cudf.pandas script.py | df.rolling(window=3).sum() | +| | | ++--------------------------------------+------------------------------------+ With cuDF's pandas Accelerator Mode, you can take pandas from worst-to-first on diff --git a/docs/cudf/source/user_guide/pandas-comparison.md b/docs/cudf/source/user_guide/pandas-comparison.md index 9e821fd883..b8206d889f 100644 --- a/docs/cudf/source/user_guide/pandas-comparison.md +++ b/docs/cudf/source/user_guide/pandas-comparison.md @@ -15,7 +15,7 @@ filtering, concatenating, joining, groupby and window operations - among many others. The best way to check if we support a particular Pandas API is to search -our [API docs](/api_docs/index). +our [API docs](/user_guide/api_docs/index). ## Data types @@ -145,7 +145,7 @@ For example, `s.sum()` is not guaranteed to produce identical results to Pandas nor produce identical results from run to run, when `s` is a Series of floats. If you need to compare floating point results, you should typically do so using the functions provided in the -[`cudf.testing`](/api_docs/general_utilities) +[`cudf.testing`](/user_guide/api_docs/general_utilities) module, which allow you to compare values up to a desired precision. ## Column names diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index c904d1b021..b4bbd0a8c3 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -138,7 +138,7 @@ def __init__( if len(levels) == 0: raise ValueError("Must pass non-zero number of levels/codes") if not isinstance(codes, cudf.DataFrame) and not isinstance( - codes[0], (abc.Sequence, np.ndarray) + codes[0], (abc.Sequence, np.ndarray, cp.ndarray) ): raise TypeError("Codes is not a Sequence of sequences") diff --git a/python/cudf/cudf/tests/test_no_cuinit.py b/python/cudf/cudf/tests/test_no_cuinit.py index 3731f9b5fd..b142b0dab3 100644 --- a/python/cudf/cudf/tests/test_no_cuinit.py +++ b/python/cudf/cudf/tests/test_no_cuinit.py @@ -26,7 +26,7 @@ def cuda_gdb(request): return gdb else: output = subprocess.run( - [gdb, "--version"], capture_output=True, text=True + [gdb, "--version"], capture_output=True, text=True, cwd="/" ) if output.returncode != 0: request.applymarker( @@ -97,6 +97,7 @@ def test_cudf_create_series_cuinit(cuda_gdb): env=env, capture_output=True, text=True, + cwd="/", ) cuInit_called = output.stdout.find("in cuInit ()") diff --git a/python/cudf/cudf/tests/test_numba_import.py b/python/cudf/cudf/tests/test_numba_import.py index dcde0f68aa..238a32a94f 100644 --- a/python/cudf/cudf/tests/test_numba_import.py +++ b/python/cudf/cudf/tests/test_numba_import.py @@ -41,6 +41,8 @@ def test_kernel(x): ) def test_numba_mvc_enabled_cuda_11(): cp = subprocess.run( - [sys.executable, "-c", TEST_NUMBA_MVC_ENABLED], capture_output=True + [sys.executable, "-c", TEST_NUMBA_MVC_ENABLED], + capture_output=True, + cwd="/", ) assert cp.returncode == 0