From 7d6c377ccba923ee21922d0cc9d13682cfac3e72 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 27 Oct 2023 05:19:35 -0500 Subject: [PATCH] Upgrade `arrow` to `13` (#14330) This PR upgrades `arrow` to `13`. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) - https://github.com/jakirkham Approvers: - Bradley Dice (https://github.com/bdice) - https://github.com/jakirkham - Ray Douglass (https://github.com/raydouglass) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14330 --- .../all_cuda-118_arch-x86_64.yaml | 4 ++-- .../all_cuda-120_arch-x86_64.yaml | 4 ++-- conda/recipes/cudf/meta.yaml | 6 ++--- conda/recipes/libcudf/conda_build_config.yaml | 2 +- conda/recipes/libcudf/meta.yaml | 3 ++- cpp/cmake/thirdparty/get_arrow.cmake | 22 ++++++++++++++----- dependencies.yaml | 21 +++++++++++++----- python/cudf/cudf/tests/test_decimal.py | 11 +++++++++- 8 files changed, 52 insertions(+), 21 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 623f79b7c34..cfcbde71b01 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -39,7 +39,7 @@ dependencies: - hypothesis - identify>=2.5.20 - ipython -- libarrow==12.0.1.* +- libarrow==13.0.0.* - libcufile-dev=1.4.0.31 - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 @@ -68,7 +68,7 @@ dependencies: - pre-commit - protobuf>=4.21,<5 - ptxcompiler -- pyarrow==12.0.1.* +- pyarrow==13.0.0.* - pydata-sphinx-theme!=0.14.2 - pytest - pytest-benchmark diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 770095ad088..db19d658b0d 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -41,7 +41,7 @@ dependencies: - hypothesis - identify>=2.5.20 - ipython -- libarrow==12.0.1.* +- libarrow==13.0.0.* - libcufile-dev - libcurand-dev - libkvikio==23.12.* @@ -66,7 +66,7 @@ dependencies: - pip - pre-commit - protobuf>=4.21,<5 -- pyarrow==12.0.1.* +- pyarrow==13.0.0.* - pydata-sphinx-theme!=0.14.2 - pytest - pytest-benchmark diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 7405ae2dfb5..619df00087c 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -55,13 +55,13 @@ requirements: - cuda-version ={{ cuda_version }} - sysroot_{{ target_platform }} {{ sysroot_version }} host: - - protobuf ==4.21.* + - protobuf ==4.23.* - python - cython >=3.0.0 - scikit-build >=0.13.1 - setuptools - dlpack >=0.5,<0.6.0a0 - - pyarrow ==12.0.1.* + - pyarrow ==13.0.0.* - libcudf ={{ version }} - rmm ={{ minor_version }} {% if cuda_major == "11" %} @@ -82,7 +82,7 @@ requirements: - numba >=0.57,<0.58 # TODO: Pin to numpy<1.25 until cudf requires pandas 2 - numpy >=1.21,<1.25 - - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} + - {{ pin_compatible('pyarrow', max_pin='x') }} - libcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 63688a641de..fe692614b8e 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -23,7 +23,7 @@ gtest_version: - ">=1.13.0" libarrow_version: - - "==12.0.1" + - "==13.0.0" dlpack_version: - ">=0.5,<0.6.0a0" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 627065817ba..ecd777bf91f 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -91,6 +91,8 @@ outputs: requirements: build: - cmake {{ cmake_version }} + host: + - libarrow {{ libarrow_version }} run: {% if cuda_major == "11" %} - cudatoolkit @@ -103,7 +105,6 @@ outputs: - nvcomp {{ nvcomp_version }} - librmm ={{ minor_version }} - libkvikio ={{ minor_version }} - - libarrow {{ libarrow_version }} - dlpack {{ dlpack_version }} - gtest {{ gtest_version }} - gmock {{ gtest_version }} diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 10d3145a36f..3b2cbc57d1c 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -408,12 +408,22 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endfunction() if(NOT DEFINED CUDF_VERSION_Arrow) - set(CUDF_VERSION_Arrow - # This version must be kept in sync with the libarrow version pinned for builds in - # dependencies.yaml. - 12.0.1 - CACHE STRING "The version of Arrow to find (or build)" - ) + # Temporarily use Arrow 12.0.1 in wheels and Arrow 13.0.0 otherwise + if(USE_LIBARROW_FROM_PYARROW) + set(CUDF_VERSION_Arrow + # This version must be kept in sync with the libarrow version pinned for builds in + # dependencies.yaml. + 12.0.1 + CACHE STRING "The version of Arrow to find (or build)" + ) + else() + set(CUDF_VERSION_Arrow + # This version must be kept in sync with the libarrow version pinned for builds in + # dependencies.yaml. + 13.0.0 + CACHE STRING "The version of Arrow to find (or build)" + ) + endif() endif() find_and_configure_arrow( diff --git a/dependencies.yaml b/dependencies.yaml index 59755c31e92..1f2b42c49c4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -224,7 +224,7 @@ dependencies: - &gmock gmock>=1.13.0 # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - - libarrow==12.0.1.* + - libarrow==13.0.0.* - librdkafka>=1.9.0,<1.10.0a0 # Align nvcomp version with rapids-cmake - nvcomp==2.6.1 @@ -240,11 +240,18 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - cython>=3.0.0 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - &numpy numpy>=1.21,<1.25 + - output_types: [conda] + packages: + # Hard pin the patch version used during the build. This must be kept + # in sync with the version pinned in get_arrow.cmake. + - pyarrow==13.0.0.* + - output_types: [requirements, pyproject] + packages: # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - pyarrow==12.0.1.* - # TODO: Pin to numpy<1.25 until cudf requires pandas 2 - - &numpy numpy>=1.21,<1.25 build_python: common: - output_types: [conda, requirements, pyproject] @@ -262,10 +269,14 @@ dependencies: - output_types: conda packages: # Allow runtime version to float up to minor version - - libarrow==12.* + - libarrow==13.* pyarrow_run: common: - - output_types: [conda, requirements, pyproject] + - output_types: [conda] + packages: + # Allow runtime version to float up to minor version + - pyarrow==13.* + - output_types: [requirements, pyproject] packages: # Allow runtime version to float up to minor version - pyarrow==12.* diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index e4b2af90448..0745e5aba48 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -6,6 +6,7 @@ import numpy as np import pyarrow as pa import pytest +from packaging import version import cudf from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn @@ -91,7 +92,15 @@ def test_from_arrow_max_precision_decimal32(): "to_dtype", [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)], ) -def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype): +def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype): + request.applymarker( + pytest.mark.xfail( + condition=version.parse(pa.__version__) >= version.parse("13.0.0") + and from_dtype == np.dtype("float32") + and to_dtype.precision > 7, + reason="https://github.com/rapidsai/cudf/issues/14169", + ) + ) got = data.astype(from_dtype) pa_arr = got.to_arrow().cast(