Merge pull request #16812 from rapidsai/branch-24.08

rapidsai · Sep 17, 2024 · 2c3055d · 2c3055d
2 parents e776742 + 2607537
commit 2c3055d
Show file tree

Hide file tree

Showing 72 changed files with 2,766 additions and 453 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -27,6 +27,7 @@ jobs:
       - wheel-tests-cudf
       - wheel-build-cudf-polars
       - wheel-tests-cudf-polars
+      - cudf-polars-polars-tests
       - wheel-build-dask-cudf
       - wheel-tests-dask-cudf
       - devcontainer
@@ -154,6 +155,17 @@ jobs:
       # This always runs, but only fails if this PR touches code in
       # pylibcudf or cudf_polars
       script: "ci/test_wheel_cudf_polars.sh"
+  cudf-polars-polars-tests:
+    needs: wheel-build-cudf-polars
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      # This selects "ARCH=amd64 + the latest supported Python + CUDA".
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+      build_type: pull-request
+      # This always runs, but only fails if this PR touches code in
+      # pylibcudf or cudf_polars
+      script: "ci/test_cudf_polars_polars_tests.sh"
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit

diff --git a/ci/run_cudf_polars_polars_tests.sh b/ci/run_cudf_polars_polars_tests.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+# Support invoking run_cudf_polars_pytests.sh outside the script directory
+# Assumption, polars has been cloned in the root of the repo.
+cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../polars/
+
+DESELECTED_TESTS=(
+    "tests/unit/test_polars_import.py::test_polars_import" # relies on a polars built in place
+    "tests/unit/streaming/test_streaming_sort.py::test_streaming_sort[True]" # relies on polars built in debug mode
+    "tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error
+    "tests/docs/test_user_guide.py" # No dot binary in CI image
+)
+
+DESELECTED_TESTS=$(printf -- " --deselect %s" "${DESELECTED_TESTS[@]}")
+python -m pytest \
+       --import-mode=importlib \
+       --cache-clear \
+       -m "" \
+       -p cudf_polars.testing.plugin \
+       -v \
+       --tb=short \
+       ${DESELECTED_TESTS} \
+       "$@" \
+       py-polars/tests
diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -eou pipefail
+
+# We will only fail these tests if the PR touches code in pylibcudf
+# or cudf_polars itself.
+# Note, the three dots mean we are doing diff between the merge-base
+# of upstream and HEAD. So this is asking, "does _this branch_ touch
+# files in cudf_polars/pylibcudf", rather than "are there changes
+# between upstream and this branch which touch cudf_polars/pylibcudf"
+# TODO: is the target branch exposed anywhere in an environment variable?
+if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
+then
+    HAS_CHANGES=1
+    rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
+else
+    HAS_CHANGES=0
+    rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
+fi
+
+rapids-logger "Download wheels"
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+
+# Download the cudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
+
+rapids-logger "Install cudf"
+python -m pip install ./local-cudf-dep/cudf*.whl
+
+rapids-logger "Install cudf_polars"
+python -m pip install $(echo ./dist/cudf_polars*.whl)
+
+# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
+TAG="py-1.7.0"
+rapids-logger "Clone polars to ${TAG}"
+git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1
+
+# Install requirements for running polars tests
+rapids-logger "Install polars test requirements"
+python -m pip install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt
+
+function set_exitcode()
+{
+    EXITCODE=$?
+}
+EXITCODE=0
+trap set_exitcode ERR
+set +e
+
+rapids-logger "Run polars tests"
+./ci/run_cudf_polars_polars_tests.sh
+
+trap ERR
+set -e
+
+if [ ${EXITCODE} != 0 ]; then
+    rapids-logger "Running polars test suite FAILED: exitcode ${EXITCODE}"
+else
+    rapids-logger "Running polars test suite PASSED"
+fi
+
+if [ ${HAS_CHANGES} == 1 ]; then
+    exit ${EXITCODE}
+else
+    exit 0
+fi
diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
@@ -13,20 +13,29 @@ set -eou pipefail
 if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
 then
     HAS_CHANGES=1
+    rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
 else
     HAS_CHANGES=0
+    rapids-logger "PR does not have changes in cudf-polars/pylibcudf, test fails NOT treated as failure"
 fi
 
+rapids-logger "Download wheels"
+
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 
 # Download the cudf built in the previous step
 RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
+
+rapids-logger "Install cudf"
 python -m pip install ./local-cudf-dep/cudf*.whl
 
 rapids-logger "Install cudf_polars"
 python -m pip install $(echo ./dist/cudf_polars*.whl)[test]
 
+rapids-logger "Pin to 1.7.0 Temporarily"
+python -m pip install polars==1.7.0
+
 rapids-logger "Run cudf_polars tests"
 
 function set_exitcode()

diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh
@@ -93,7 +93,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
    */
   __device__ inline cudf::size_type operator[](size_type idx) const
   {
-    void const* tp = p_ + (idx * this->width_);
+    void const* tp = p_ + (static_cast<std::ptrdiff_t>(idx) * this->width_);
     return type_dispatcher(this->dtype_, normalize_type{}, tp);
   }
 
@@ -109,7 +109,7 @@ struct input_indexalator : base_normalator<input_indexalator, cudf::size_type> {
   CUDF_HOST_DEVICE input_indexalator(void const* data, data_type dtype, cudf::size_type offset = 0)
     : base_normalator<input_indexalator, cudf::size_type>(dtype), p_{static_cast<char const*>(data)}
   {
-    p_ += offset * this->width_;
+    p_ += static_cast<std::ptrdiff_t>(offset) * this->width_;
   }
 
  protected:
@@ -165,7 +165,7 @@ struct output_indexalator : base_normalator<output_indexalator, cudf::size_type>
   __device__ inline output_indexalator const operator[](size_type idx) const
   {
     output_indexalator tmp{*this};
-    tmp.p_ += (idx * this->width_);
+    tmp.p_ += static_cast<std::ptrdiff_t>(idx) * this->width_;
     return tmp;
   }
 

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -631,7 +631,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - polars>=1.0,<1.3
+          - polars>=1.6
   run_dask_cudf:
     common:
       - output_types: [conda, requirements, pyproject]

diff --git a/docs/cudf/source/_static/Polars_GPU_speedup_80GB.png b/docs/cudf/source/_static/Polars_GPU_speedup_80GB.png
diff --git a/docs/cudf/source/_static/compute_heavy_queries_polars.png b/docs/cudf/source/_static/compute_heavy_queries_polars.png
diff --git a/docs/cudf/source/_static/pds_benchmark_polars.png b/docs/cudf/source/_static/pds_benchmark_polars.png
diff --git a/docs/cudf/source/cudf_polars/index.rst b/docs/cudf/source/cudf_polars/index.rst
@@ -0,0 +1,41 @@
+cuDF-based GPU backend for Polars [Open Beta]
+=============================================
+
+cuDF supports an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API.
+The engine supports most of the core expressions and data types as well as a growing set of more advanced dataframe manipulations
+and data file formats. When using the GPU engine, Polars will convert expressions into an optimized query plan and determine
+whether the plan is supported on the GPU. If it is not, the execution will transparently fall back to the standard Polars engine
+and run on the CPU.
+
+Benchmark
+---------
+We reproduced the `Polars Decision Support (PDS) <https://github.com/pola-rs/polars-benchmark>`__ benchmark to compare Polars GPU engine with the default CPU settings across several dataset sizes. Here are the results:
+
+.. figure:: ../_static/pds_benchmark_polars.png
+   :width: 600px
+
+
+
+You can see up to 13x speedup using the GPU backend on the compute-heavy PDS queries involving complex aggregation and join operations. Below are the speedups for the top performing queries:
+
+
+.. figure:: ../_static/compute_heavy_queries_polars.png
+   :width: 1000px
+
+:emphasis:`PDS-H benchmark | GPU: NVIDIA H100 PCIe | CPU: Intel Xeon W9-3495X (Sapphire Rapids) | Storage: Local NVMe`
+
+You can reproduce the results by visiting the `Polars Decision Support (PDS) GitHub repository <https://github.com/pola-rs/polars-benchmark>`__.
+
+Learn More
+----------
+
+The GPU backend for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page <https://docs.pola.rs/user-guide/gpu-support/>`__ on the Polars website.
+
+Launch on Google Colab
+----------------------
+
+.. figure:: ../_static/colab.png
+   :width: 200px
+   :target: https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb
+
+   Take the cuDF backend for Polars for a test-drive in a free GPU-enabled notebook environment using your Google account by `launching on Colab <https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb>`__.
diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst
@@ -29,5 +29,6 @@ other operations.
 
    user_guide/index
    cudf_pandas/index
+   cudf_polars/index
    libcudf_docs/index
    developer_guide/index
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
@@ -7,3 +7,4 @@ strings
     contains
     replace
     slice
+    strip
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst
@@ -0,0 +1,6 @@
+=====
+strip
+=====
+
+.. automodule:: cudf._lib.pylibcudf.strings.strip
+   :members:
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
@@ -16,6 +16,8 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
 from cudf._lib.pylibcudf.libcudf.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
+import cudf._lib.pylibcudf as plc
+
 
 @acquire_spill_lock()
 def add_months(Column col, Column months):
@@ -37,43 +39,9 @@ def add_months(Column col, Column months):
 
 @acquire_spill_lock()
 def extract_datetime_component(Column col, object field):
-
-    cdef unique_ptr[column] c_result
-    cdef column_view col_view = col.view()
-
-    with nogil:
-        if field == "year":
-            c_result = move(libcudf_datetime.extract_year(col_view))
-        elif field == "month":
-            c_result = move(libcudf_datetime.extract_month(col_view))
-        elif field == "day":
-            c_result = move(libcudf_datetime.extract_day(col_view))
-        elif field == "weekday":
-            c_result = move(libcudf_datetime.extract_weekday(col_view))
-        elif field == "hour":
-            c_result = move(libcudf_datetime.extract_hour(col_view))
-        elif field == "minute":
-            c_result = move(libcudf_datetime.extract_minute(col_view))
-        elif field == "second":
-            c_result = move(libcudf_datetime.extract_second(col_view))
-        elif field == "millisecond":
-            c_result = move(
-                libcudf_datetime.extract_millisecond_fraction(col_view)
-            )
-        elif field == "microsecond":
-            c_result = move(
-                libcudf_datetime.extract_microsecond_fraction(col_view)
-            )
-        elif field == "nanosecond":
-            c_result = move(
-                libcudf_datetime.extract_nanosecond_fraction(col_view)
-            )
-        elif field == "day_of_year":
-            c_result = move(libcudf_datetime.day_of_year(col_view))
-        else:
-            raise ValueError(f"Invalid datetime field: '{field}'")
-
-    result = Column.from_unique_ptr(move(c_result))
+    result = Column.from_pylibcudf(
+        plc.datetime.extract_datetime_component(col.to_pylibcudf(mode="read"), field)
+    )
 
     if field == "weekday":
         # Pandas counts Monday-Sunday as 0-6

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -15,13 +15,11 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 from .gpumemoryview cimport gpumemoryview
 from .scalar cimport Scalar
-from .types cimport DataType, type_id
+from .types cimport DataType, size_of, type_id
 from .utils cimport int_to_bitmask_ptr, int_to_void_ptr
 
 import functools
 
-import numpy as np
-
 
 cdef class Column:
     """A container of nullable device data as a column of elements.
@@ -303,14 +301,15 @@ cdef class Column:
             raise ValueError("mask not yet supported.")
 
         typestr = iface['typestr'][1:]
+        data_type = _datatype_from_dtype_desc(typestr)
+
         if not is_c_contiguous(
             iface['shape'],
             iface['strides'],
-            np.dtype(typestr).itemsize
+            size_of(data_type)
         ):
             raise ValueError("Data must be C-contiguous")
 
-        data_type = _datatype_from_dtype_desc(typestr)
         size = iface['shape'][0]
         return Column(
             data_type,

diff --git a/python/cudf/cudf/_lib/pylibcudf/datetime.pyx b/python/cudf/cudf/_lib/pylibcudf/datetime.pyx
@@ -4,6 +4,16 @@ from libcpp.utility cimport move
 
 from cudf._lib.pylibcudf.libcudf.column.column cimport column
 from cudf._lib.pylibcudf.libcudf.datetime cimport (
+    day_of_year as cpp_day_of_year,
+    extract_day as cpp_extract_day,
+    extract_hour as cpp_extract_hour,
+    extract_microsecond_fraction as cpp_extract_microsecond_fraction,
+    extract_millisecond_fraction as cpp_extract_millisecond_fraction,
+    extract_minute as cpp_extract_minute,
+    extract_month as cpp_extract_month,
+    extract_nanosecond_fraction as cpp_extract_nanosecond_fraction,
+    extract_second as cpp_extract_second,
+    extract_weekday as cpp_extract_weekday,
     extract_year as cpp_extract_year,
 )
 
@@ -31,3 +41,42 @@ cpdef Column extract_year(
     with nogil:
         result = move(cpp_extract_year(values.view()))
     return Column.from_libcudf(move(result))
+
+
+def extract_datetime_component(Column col, str field):
+
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        if field == "year":
+            c_result = move(cpp_extract_year(col.view()))
+        elif field == "month":
+            c_result = move(cpp_extract_month(col.view()))
+        elif field == "day":
+            c_result = move(cpp_extract_day(col.view()))
+        elif field == "weekday":
+            c_result = move(cpp_extract_weekday(col.view()))
+        elif field == "hour":
+            c_result = move(cpp_extract_hour(col.view()))
+        elif field == "minute":
+            c_result = move(cpp_extract_minute(col.view()))
+        elif field == "second":
+            c_result = move(cpp_extract_second(col.view()))
+        elif field == "millisecond":
+            c_result = move(
+                cpp_extract_millisecond_fraction(col.view())
+            )
+        elif field == "microsecond":
+            c_result = move(
+                cpp_extract_microsecond_fraction(col.view())
+            )
+        elif field == "nanosecond":
+            c_result = move(
+                cpp_extract_nanosecond_fraction(col.view())
+            )
+        elif field == "day_of_year":
+            c_result = move(cpp_day_of_year(col.view()))
+        else:
+            raise ValueError(f"Invalid datetime field: '{field}'")
+
+    return Column.from_libcudf(move(c_result))
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,4 @@ strings @@
         contains
         replace
         slice
+        strip