Skip to content

Commit

Permalink
Upgrade arrow & pyarrow to 6.0.1 (#9686)
Browse files Browse the repository at this point in the history
Resolves: #9645 
This PR upgrades `arrow` & `pyarrow` to `6.0.1` from `5.0.0`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/nvdbaranec
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Robert Maynard (https://github.com/robertmaynard)
  - Keith Kraus (https://github.com/kkraus14)
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Bradley Dice (https://github.com/bdice)

URL: #9686
  • Loading branch information
galipremsagar authored Feb 9, 2022
1 parent 3fe168d commit 19dc46f
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 23 deletions.
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

name: cudf_dev
channels:
Expand All @@ -17,7 +17,7 @@ dependencies:
- numba>=0.54
- numpy
- pandas>=1.0,<1.4.0dev0
- pyarrow=5.0.0=*cuda
- pyarrow=6.0.1=*cuda
- fastavro>=0.22.9
- python-snappy>=0.6.0
- notebook>=0.5.0
Expand Down Expand Up @@ -45,7 +45,7 @@ dependencies:
- dask>=2021.11.1,<=2022.01.0
- distributed>=2021.11.1,<=2022.01.0
- streamz
- arrow-cpp=5.0.0
- arrow-cpp=6.0.1
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -31,7 +31,7 @@ requirements:
- setuptools
- numba >=0.54
- dlpack>=0.5,<0.6.0a0
- pyarrow 5.0.0 *cuda
- pyarrow 6.0.1 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -40,7 +40,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 5.0.0 *cuda
- arrow-cpp 6.0.1 *cuda
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -308,7 +308,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

endfunction()

set(CUDF_VERSION_Arrow 5.0.0)
set(CUDF_VERSION_Arrow 6.0.1)

find_and_configure_arrow(
${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
Expand Down
18 changes: 5 additions & 13 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2093,24 +2093,16 @@ def as_column(
dtype = "bool"
np_type = np.dtype(dtype).type
pa_type = np_to_pa_dtype(np.dtype(dtype))
# TODO: A warning is emitted from pyarrow 5.0.0's function
# pyarrow.lib._sequence_to_array:
# "DeprecationWarning: an integer is required (got type float).
# Implicit conversion to integers using __int__ is deprecated,
# and may be removed in a future version of Python."
# This warning does not appear in pyarrow 6.0.1 and will be
# resolved by https://github.com/rapidsai/cudf/pull/9686/.
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
pa_array = pa.array(
data = as_column(
pa.array(
arbitrary,
type=pa_type,
from_pandas=True
if nan_as_null is None
else nan_as_null,
)
data = as_column(
pa_array, dtype=dtype, nan_as_null=nan_as_null,
),
dtype=dtype,
nan_as_null=nan_as_null,
)
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
if is_categorical_dtype(dtype):
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4605,10 +4605,17 @@ def to_arrow(self, preserve_index=True):
a: int64
b: int64
index: int64
----
a: [[1,2,3]]
b: [[4,5,6]]
index: [[1,2,3]]
>>> df.to_arrow(preserve_index=False)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2,3]]
b: [[4,5,6]]
"""

data = self.copy(deep=False)
Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -1922,6 +1922,10 @@ def to_arrow(self):
a: int64
b: int64
index: int64
----
a: [[1,2,3]]
b: [[4,5,6]]
index: [[1,2,3]]
"""
return pa.Table.from_pydict(
{name: col.to_arrow() for name, col in self._data.items()}
Expand Down

0 comments on commit 19dc46f

Please sign in to comment.