Skip to content

Commit

Permalink
Upgrade arrow & pyarrow to 5.0.0 (#8908)
Browse files Browse the repository at this point in the history
This PR upgrades arrow to `5.0.0`.

- [x] Upgrade & test arrow 5.0.0.
- [x] Fix pytest failures related to decimal arrays.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Ashwin Srinath (https://github.com/shwina)
  - Mark Sadang (https://github.com/msadang)
  - Dillon Cullinan (https://github.com/dillon-cullinan)

URL: #8908
  • Loading branch information
galipremsagar authored Aug 13, 2021
1 parent 59b84f3 commit 2c5a2ad
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 48 deletions.
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -42,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -42,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ requirements:
- setuptools
- numba >=0.53.1
- dlpack>=0.5,<0.6.0a0
- pyarrow 4.0.1 *cuda
- pyarrow 5.0.0 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 4.0.1 *cuda
- arrow-cpp 5.0.0 *cuda
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/CUDF_GetArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

endfunction()

set(CUDF_VERSION_Arrow 4.0.1)
set(CUDF_VERSION_Arrow 5.0.0)

find_and_configure_arrow(
${CUDF_VERSION_Arrow}
Expand Down
44 changes: 24 additions & 20 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2016,6 +2016,29 @@ def as_column(
memoryview(arbitrary), dtype=dtype, nan_as_null=nan_as_null
)
except TypeError:
if dtype is not None:
# Arrow throws a type error if the input is of
# mixed-precision and cannot fit into the provided
# decimal type properly, see:
# https://github.com/apache/arrow/pull/9948
# Hence we should let the exception propagate to
# the user.
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
data = pa.array(
arbitrary,
type=pa.decimal128(
precision=dtype.precision, scale=dtype.scale
),
)
return cudf.core.column.Decimal64Column.from_arrow(data)
if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
data = pa.array(
arbitrary,
type=pa.decimal128(
precision=dtype.precision, scale=dtype.scale
),
)
return cudf.core.column.Decimal32Column.from_arrow(data)
pa_type = None
np_type = None
try:
Expand All @@ -2034,26 +2057,7 @@ def as_column(
) and not isinstance(dtype, cudf.IntervalDtype):
data = pa.array(arbitrary, type=dtype.to_arrow())
return as_column(data, nan_as_null=nan_as_null)
if isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
data = pa.array(
arbitrary,
type=pa.decimal128(
precision=dtype.precision, scale=dtype.scale
),
)
return cudf.core.column.Decimal64Column.from_arrow(
data
)
if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
data = pa.array(
arbitrary,
type=pa.decimal128(
precision=dtype.precision, scale=dtype.scale
),
)
return cudf.core.column.Decimal32Column.from_arrow(
data
)

dtype = pd.api.types.pandas_dtype(dtype)
np_type = np.dtype(dtype).type
if np_type == np.bool_:
Expand Down
40 changes: 20 additions & 20 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1758,16 +1758,16 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.add,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
["3.0", "4.0"],
cudf.Decimal64Dtype(scale=2, precision=3),
["3.0", "4.0"],
cudf.Decimal64Dtype(scale=2, precision=4),
),
(
operator.add,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["2.25", "1.005"],
cudf.Decimal64Dtype(scale=3, precision=4),
["3.75", "3.005"],
Expand All @@ -1785,7 +1785,7 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.sub,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=1, precision=2),
["2.25", "1.005"],
cudf.Decimal64Dtype(scale=3, precision=4),
["-0.75", "0.995"],
Expand All @@ -1794,7 +1794,7 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.sub,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=1, precision=2),
["2.25", "1.005"],
cudf.Decimal64Dtype(scale=3, precision=4),
["-0.75", "0.995"],
Expand All @@ -1812,11 +1812,11 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.mul,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["1.5", "3.0"],
cudf.Decimal64Dtype(scale=3, precision=4),
["2.25", "6.0"],
cudf.Decimal64Dtype(scale=5, precision=7),
cudf.Decimal64Dtype(scale=5, precision=8),
),
(
operator.mul,
Expand Down Expand Up @@ -1866,16 +1866,16 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.add,
["1.5", None, "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=1, precision=2),
["1.5", None, "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=1, precision=2),
["3.0", None, "4.0"],
cudf.Decimal64Dtype(scale=2, precision=3),
cudf.Decimal64Dtype(scale=1, precision=3),
),
(
operator.add,
["1.5", None],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["2.25", "1.005"],
cudf.Decimal64Dtype(scale=3, precision=4),
["3.75", None],
Expand All @@ -1884,7 +1884,7 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.sub,
["1.5", None],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["2.25", None],
cudf.Decimal64Dtype(scale=3, precision=4),
["-0.75", None],
Expand All @@ -1893,7 +1893,7 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.sub,
["1.5", "2.0"],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["2.25", None],
cudf.Decimal64Dtype(scale=3, precision=4),
["-0.75", None],
Expand All @@ -1902,11 +1902,11 @@ def test_binops_with_NA_consistent(dtype, op):
(
operator.mul,
["1.5", None],
cudf.Decimal64Dtype(scale=2, precision=2),
cudf.Decimal64Dtype(scale=2, precision=3),
["1.5", None],
cudf.Decimal64Dtype(scale=3, precision=4),
["2.25", None],
cudf.Decimal64Dtype(scale=5, precision=7),
cudf.Decimal64Dtype(scale=5, precision=8),
),
(
operator.mul,
Expand Down Expand Up @@ -2432,10 +2432,10 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
(
operator.truediv,
["100", "200"],
cudf.Decimal64Dtype(scale=2, precision=4),
cudf.Decimal64Dtype(scale=2, precision=5),
decimal.Decimal(2),
["50", "100"],
cudf.Decimal64Dtype(scale=2, precision=6),
cudf.Decimal64Dtype(scale=2, precision=7),
False,
),
(
Expand All @@ -2459,10 +2459,10 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
(
operator.truediv,
["100", "200"],
cudf.Decimal64Dtype(scale=2, precision=3),
cudf.Decimal64Dtype(scale=2, precision=5),
1,
["0", "0"],
cudf.Decimal64Dtype(scale=-2, precision=5),
cudf.Decimal64Dtype(scale=-2, precision=7),
True,
),
(
Expand Down
10 changes: 9 additions & 1 deletion python/cudf/cudf/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
[1],
[-1],
[1, 2, 3, 4],
[42, 1729, 4104],
[42, 17, 41],
[1, 2, None, 4],
[None, None, None],
[],
Expand Down Expand Up @@ -347,3 +347,11 @@ def test_serialize_decimal_columns(data):
df = cudf.DataFrame(data)
recreated = df.__class__.deserialize(*df.serialize())
assert_eq(recreated, df)


def test_decimal_invalid_precision():
with pytest.raises(pa.ArrowInvalid):
_ = cudf.Series([10, 20, 30], dtype=cudf.Decimal64Dtype(2, 2))

with pytest.raises(pa.ArrowInvalid):
_ = cudf.Series([Decimal("300")], dtype=cudf.Decimal64Dtype(2, 1))

0 comments on commit 2c5a2ad

Please sign in to comment.