From 19dc46fdb068e8613b6e7e85686c622f92277d0f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 9 Feb 2022 15:12:07 -0600 Subject: [PATCH] Upgrade `arrow` & `pyarrow` to `6.0.1` (#9686) Resolves: #9645 This PR upgrades `arrow` & `pyarrow` to `6.0.1` from `5.0.0`. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/nvdbaranec - AJ Schmidt (https://github.com/ajschmidt8) - Robert Maynard (https://github.com/robertmaynard) - Keith Kraus (https://github.com/kkraus14) - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/9686 --- conda/environments/cudf_dev_cuda11.5.yml | 6 +++--- conda/recipes/cudf/meta.yaml | 4 ++-- conda/recipes/libcudf/meta.yaml | 4 ++-- cpp/cmake/thirdparty/get_arrow.cmake | 4 ++-- python/cudf/cudf/core/column/column.py | 18 +++++------------- python/cudf/cudf/core/dataframe.py | 7 +++++++ python/cudf/cudf/core/frame.py | 6 +++++- 7 files changed, 26 insertions(+), 23 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index b9577d937d9..b926a6cdc99 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. name: cudf_dev channels: @@ -17,7 +17,7 @@ dependencies: - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - - pyarrow=5.0.0=*cuda + - pyarrow=6.0.1=*cuda - fastavro>=0.22.9 - python-snappy>=0.6.0 - notebook>=0.5.0 @@ -45,7 +45,7 @@ dependencies: - dask>=2021.11.1,<=2022.01.0 - distributed>=2021.11.1,<=2022.01.0 - streamz - - arrow-cpp=5.0.0 + - arrow-cpp=6.0.1 - dlpack>=0.5,<0.6.0a0 - arrow-cpp-proc * cuda - double-conversion diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index bd1412bc611..0145e2e4d01 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -31,7 +31,7 @@ requirements: - setuptools - numba >=0.54 - dlpack>=0.5,<0.6.0a0 - - pyarrow 5.0.0 *cuda + - pyarrow 6.0.1 *cuda - libcudf {{ version }} - rmm {{ minor_version }} - cudatoolkit {{ cuda_version }} diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 70c020d4abd..57205f22e57 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -40,7 +40,7 @@ requirements: host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* - - arrow-cpp 5.0.0 *cuda + - arrow-cpp 6.0.1 *cuda - arrow-cpp-proc * cuda - dlpack>=0.5,<0.6.0a0 run: diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index ae1448da502..83c5e4c3e8f 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -308,7 +308,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endfunction() -set(CUDF_VERSION_Arrow 5.0.0) +set(CUDF_VERSION_Arrow 6.0.1) find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 2c3951c0e5e..ad95c23e395 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2093,24 +2093,16 @@ def as_column( dtype = "bool" np_type = np.dtype(dtype).type pa_type = np_to_pa_dtype(np.dtype(dtype)) - # TODO: A warning is emitted from pyarrow 5.0.0's function - # pyarrow.lib._sequence_to_array: - # "DeprecationWarning: an integer is required (got type float). - # Implicit conversion to integers using __int__ is deprecated, - # and may be removed in a future version of Python." - # This warning does not appear in pyarrow 6.0.1 and will be - # resolved by https://github.com/rapidsai/cudf/pull/9686/. - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - pa_array = pa.array( + data = as_column( + pa.array( arbitrary, type=pa_type, from_pandas=True if nan_as_null is None else nan_as_null, - ) - data = as_column( - pa_array, dtype=dtype, nan_as_null=nan_as_null, + ), + dtype=dtype, + nan_as_null=nan_as_null, ) except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError): if is_categorical_dtype(dtype): diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d03cd6b1124..8a49efabcde 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4605,10 +4605,17 @@ def to_arrow(self, preserve_index=True): a: int64 b: int64 index: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + index: [[1,2,3]] >>> df.to_arrow(preserve_index=False) pyarrow.Table a: int64 b: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] """ data = self.copy(deep=False) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a9e425f2012..2c5606cb17f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from __future__ import annotations @@ -1922,6 +1922,10 @@ def to_arrow(self): a: int64 b: int64 index: int64 + ---- + a: [[1,2,3]] + b: [[4,5,6]] + index: [[1,2,3]] """ return pa.Table.from_pydict( {name: col.to_arrow() for name, col in self._data.items()}