Skip to content

Commit

Permalink
Upgrade wheels to use arrow 13 (#14339)
Browse files Browse the repository at this point in the history
In #14330 we upgraded to Arrow 13. However, we only did so for conda packages. For wheels, we couldn't do the same because pyarrow 13 started supporting two manylinux versions, both 2.17 and 2.28. This results in ABI compatibility issues because cudf wheels bundle an identical libarrow and was previously configured to compile with the CXX11 ABI turned off for compatibility with the  libarrow in the 2.17 wheels. To address this, this PR modifies the CMake logic to only set that flag conditionally based on the glibc version on the host system.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Robert Maynard (https://github.com/robertmaynard)
  - Ray Douglass (https://github.com/raydouglass)
  - Bradley Dice (https://github.com/bdice)

URL: #14339
  • Loading branch information
vyasr authored Oct 31, 2023
1 parent 2abf9a6 commit b4746d8
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 46 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
build-2_28-wheels: "true"
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: wheel-build-cudf
Expand Down
3 changes: 1 addition & 2 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"

./ci/build_wheel.sh cudf ${package_dir}

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="cudf_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
15 changes: 14 additions & 1 deletion ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,21 @@

set -eou pipefail

# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]
Expand Down
15 changes: 14 additions & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,20 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl

# Always install latest dask for testing
Expand Down
62 changes: 34 additions & 28 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
add_library(arrow_shared ALIAS Arrow::Arrow)

# When using the libarrow inside a wheel we must build libcudf with the old ABI because pyarrow's
# `libarrow.so` is compiled for manylinux2014 (centos7 toolchain) which uses the old ABI. Note
# that these flags will often be redundant because we build wheels in manylinux containers that
# actually have the old libc++ anyway, but setting them explicitly ensures correct and consistent
# behavior in all other cases such as aarch builds on newer manylinux or testing builds in newer
# containers. Note that tests will not build successfully without also propagating these options
# to builds of GTest. Similarly, benchmarks will not work without updating GBench (and possibly
# NVBench) builds. We are currently ignoring these limitations since we don't anticipate using
# this feature except for building wheels.
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>"
# When using the libarrow inside a wheel, whether or not libcudf may be built using the new C++11
# ABI is dependent on whether the libarrow inside the wheel was compiled using that ABI because we
# need the arrow library that we bundle in cudf to be ABI-compatible with the one inside pyarrow.
# We determine what options to use by checking the glibc version on the current system, which is
# also how pip determines which manylinux-versioned pyarrow wheel to install. Note that tests will
# not build successfully without also propagating these options to builds of GTest. Similarly,
# benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently
# ignoring these limitations since we don't anticipate using this feature except for building
# wheels.
EXECUTE_PROCESS(
COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6
OUTPUT_VARIABLE GLIBC_EXECUTABLE
OUTPUT_STRIP_TRAILING_WHITESPACE
)
EXECUTE_PROCESS(
COMMAND ${GLIBC_EXECUTABLE}
OUTPUT_VARIABLE GLIBC_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
if(GLIBC_VERSION_MINOR LESS 28)
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>"
)
endif()

rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)
Expand Down Expand Up @@ -408,22 +424,12 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
endfunction()

if(NOT DEFINED CUDF_VERSION_Arrow)
# Temporarily use Arrow 12.0.1 in wheels and Arrow 13.0.0 otherwise
if(USE_LIBARROW_FROM_PYARROW)
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
12.0.1
CACHE STRING "The version of Arrow to find (or build)"
)
else()
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
13.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()
set(CUDF_VERSION_Arrow
# This version must be kept in sync with the libarrow version pinned for builds in
# dependencies.yaml.
13.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()

find_and_configure_arrow(
Expand Down
13 changes: 2 additions & 11 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -242,16 +242,11 @@ dependencies:
- cython>=3.0.0
# TODO: Pin to numpy<1.25 until cudf requires pandas 2
- &numpy numpy>=1.21,<1.25
- output_types: [conda]
- output_types: [conda, requirements, pyproject]
packages:
# Hard pin the patch version used during the build. This must be kept
# in sync with the version pinned in get_arrow.cmake.
- pyarrow==13.0.0.*
- output_types: [requirements, pyproject]
packages:
# Hard pin the patch version used during the build. This must be kept
# in sync with the version pinned in get_arrow.cmake.
- pyarrow==12.0.1.*
build_python:
common:
- output_types: [conda, requirements, pyproject]
Expand All @@ -272,14 +267,10 @@ dependencies:
- libarrow==13.*
pyarrow_run:
common:
- output_types: [conda]
- output_types: [conda, requirements, pyproject]
packages:
# Allow runtime version to float up to minor version
- pyarrow==13.*
- output_types: [requirements, pyproject]
packages:
# Allow runtime version to float up to minor version
- pyarrow==12.*
cudatoolkit:
specific:
- output_types: conda
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ requires = [
"ninja",
"numpy>=1.21,<1.25",
"protoc-wheel",
"pyarrow==12.0.1.*",
"pyarrow==13.0.0.*",
"rmm==23.12.*",
"scikit-build>=0.13.1",
"setuptools",
Expand Down Expand Up @@ -38,7 +38,7 @@ dependencies = [
"pandas>=1.3,<1.6.0dev0",
"protobuf>=4.21,<5",
"ptxcompiler",
"pyarrow==12.*",
"pyarrow==13.*",
"rmm==23.12.*",
"typing_extensions>=4.0.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_kafka/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
requires = [
"cython>=3.0.0",
"numpy>=1.21,<1.25",
"pyarrow==12.0.1.*",
"pyarrow==13.0.0.*",
"setuptools",
"wheel",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
Expand Down

0 comments on commit b4746d8

Please sign in to comment.