Skip to content

Commit

Permalink
Changes to support Numpy >= 1.24 (#13325)
Browse files Browse the repository at this point in the history
Closes #13301

Authors:
  - Ashwin Srinath (https://github.com/shwina)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #13325
  • Loading branch information
shwina authored May 25, 2023
1 parent ae375d2 commit c3dd1d6
Show file tree
Hide file tree
Showing 18 changed files with 64 additions and 63 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy>=1.21,<1.24
- numpy>=1.21
- numpydoc
- nvcc_linux-64=11.8
- nvtx>=0.2.1
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ requirements:
- pandas >=1.3,<1.6.0dev0
- cupy >=12.0.0
- numba >=0.57
- numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations
- numpy >=1.21
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
4 changes: 2 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ dependencies:
# Hard pin the patch version used during the build. This must be kept
# in sync with the version pinned in get_arrow.cmake.
- pyarrow==11.0.0.*
- numpy>=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations
- numpy>=1.21
build_python:
common:
- output_types: [conda, requirements, pyproject]
Expand Down Expand Up @@ -342,7 +342,7 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- fsspec>=0.6.0
- numpy>=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations
- numpy>=1.21
- pandas>=1.3,<1.6.0dev0
run_cudf:
common:
Expand Down
10 changes: 6 additions & 4 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,10 +768,12 @@ def _normalize_find_and_replace_input(
if len(col_to_normalize) == 1:
if cudf._lib.scalar._is_null_host_scalar(col_to_normalize[0]):
return normalized_column.astype(input_column_dtype)
else:
col_to_normalize_casted = input_column_dtype.type(
col_to_normalize[0]
)
if np.isinf(col_to_normalize[0]):
return normalized_column
col_to_normalize_casted = np.array(col_to_normalize[0]).astype(
input_column_dtype
)

if not np.isnan(col_to_normalize_casted) and (
col_to_normalize_casted != col_to_normalize[0]
):
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,8 @@ def test_column_view_string_slice(slc):
cudf.core.column.as_column([], dtype="uint8"),
),
(
cp.array([453], dtype="uint8"),
cudf.core.column.as_column([453], dtype="uint8"),
cp.array([255], dtype="uint8"),
cudf.core.column.as_column([255], dtype="uint8"),
),
],
)
Expand Down
6 changes: 4 additions & 2 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def make_all_numeric_extremes_dataframe():
if np.issubdtype(np_type, np.integer):
itype = np.iinfo(np_type)
extremes = [0, +1, -1, itype.min, itype.max]
df[gdf_dtype] = np.array(extremes * 4, dtype=np_type)[:20]
df[gdf_dtype] = np.array(extremes * 4).astype(np_type)[:20]
else:
ftype = np.finfo(np_type)
extremes = [
Expand Down Expand Up @@ -324,6 +324,7 @@ def test_csv_reader_dtype_dict(use_names):
assert_eq(gdf, pdf)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
@pytest.mark.parametrize("use_names", [True, False])
def test_csv_reader_dtype_extremes(use_names):
# Save with the column header if not explicitly specifying a list of names
Expand Down Expand Up @@ -1433,7 +1434,7 @@ def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):

gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])

expected = np.array(values, dtype=np_dtype)
expected = np.array(values).astype(np_dtype)
actual = gdf["hex_int"].to_numpy()
np.testing.assert_array_equal(expected, actual)

Expand Down Expand Up @@ -2149,6 +2150,7 @@ def test_default_integer_bitwidth_partial(
)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
def test_default_integer_bitwidth_extremes(
cudf_extreme_numeric_dataframe, default_integer_bitwidth
):
Expand Down
12 changes: 4 additions & 8 deletions python/cudf/cudf/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,19 @@
@pytest.fixture(params=[0, 1, 10, 100])
def pdf(request):
types = NUMERIC_TYPES + ["bool"]
typer = {"col_" + val: val for val in types}
ncols = len(types)
nrows = request.param

# Create a pandas dataframe with random data of mixed types
test_pdf = pd.DataFrame(
[list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
columns=pd.Index([f"col_{typ}" for typ in types], name="foo"),
{
f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ)
for typ in types
}
)
# Delete the name of the column index, and rename the row index
test_pdf.columns.name = None
test_pdf.index.name = "index"

# Cast all the column dtypes to objects, rename them, and then cast to
# appropriate types
test_pdf = test_pdf.astype("object").astype(typer)

# Create non-numeric categorical data otherwise may get typecasted
data = [ascii_letters[np.random.randint(0, 52)] for i in range(nrows)]
test_pdf["col_category"] = pd.Series(data, dtype="category")
Expand Down
12 changes: 4 additions & 8 deletions python/cudf/cudf/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,19 @@ def make_numeric_dataframe(nrows, dtype):
@pytest.fixture(params=[0, 1, 10, 100])
def pdf(request):
types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
typer = {"col_" + val: val for val in types}
ncols = len(types)
nrows = request.param

# Create a pandas dataframe with random data of mixed types
test_pdf = pd.DataFrame(
[list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
columns=pd.Index([f"col_{typ}" for typ in types], name="foo"),
{
f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ)
for typ in types
}
)
# Delete the name of the column index, and rename the row index
test_pdf.columns.name = None
test_pdf.index.name = "test_index"

# Cast all the column dtypes to objects, rename them, and then cast to
# appropriate types
test_pdf = test_pdf.astype("object").astype(typer)

return test_pdf


Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/tests/test_numerical.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -194,6 +194,7 @@ def test_to_numeric_downcast_int(data, downcast):
assert_eq(expected, got)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
@pytest.mark.parametrize(
"data",
[
Expand Down Expand Up @@ -223,6 +224,7 @@ def test_to_numeric_downcast_float(data, downcast):
assert_eq(expected, got)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
@pytest.mark.parametrize(
"data",
[
Expand All @@ -245,6 +247,7 @@ def test_to_numeric_downcast_large_float(data, downcast):
assert_eq(expected, got)


@pytest.mark.filterwarnings("ignore:overflow encountered in cast")
@pytest.mark.parametrize(
"data",
[
Expand Down Expand Up @@ -325,6 +328,7 @@ def test_to_numeric_downcast_string_float(data, downcast):
assert_eq(expected, got)


@pytest.mark.filterwarnings("ignore:overflow encountered in cast")
@pytest.mark.parametrize(
"data",
[
Expand Down
24 changes: 8 additions & 16 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ def simple_pdf(request):
"float32",
"float64",
]
typer = {"col_" + val: val for val in types}
ncols = len(types)
nrows = request.param

# Create a pandas dataframe with random data of mixed types
test_pdf = pd.DataFrame(
[list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
columns=pd.Index([f"col_{typ}" for typ in types], name="foo"),
{
f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ)
for typ in types
},
# Need to ensure that this index is not a RangeIndex to get the
# expected round-tripping behavior from Parquet reader/writer.
index=pd.Index(list(range(nrows))),
Expand All @@ -85,10 +85,6 @@ def simple_pdf(request):
test_pdf.columns.name = None
test_pdf.index.name = "test_index"

# Cast all the column dtypes to objects, rename them, and then cast to
# appropriate types
test_pdf = test_pdf.astype("object").astype(typer)

return test_pdf


Expand All @@ -115,14 +111,14 @@ def build_pdf(num_columns, day_resolution_timestamps):
"datetime64[us]",
"str",
]
typer = {"col_" + val: val for val in types}
ncols = len(types)
nrows = num_columns.param

# Create a pandas dataframe with random data of mixed types
test_pdf = pd.DataFrame(
[list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
columns=pd.Index([f"col_{typ}" for typ in types], name="foo"),
{
f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ)
for typ in types
},
# Need to ensure that this index is not a RangeIndex to get the
# expected round-tripping behavior from Parquet reader/writer.
index=pd.Index(list(range(nrows))),
Expand All @@ -131,10 +127,6 @@ def build_pdf(num_columns, day_resolution_timestamps):
test_pdf.columns.name = None
test_pdf.index.name = "test_index"

# Cast all the column dtypes to objects, rename them, and then cast to
# appropriate types
test_pdf = test_pdf.astype(typer)

# make datetime64's a little more interesting by increasing the range of
# dates note that pandas will convert these to ns timestamps, so care is
# taken to avoid overflowing a ns timestamp. There is also the ability to
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/tests/test_rank.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from itertools import chain, combinations_with_replacement, product

Expand Down Expand Up @@ -128,6 +128,7 @@ def test_rank_error_arguments(pdf):
sort_dtype_args = [np.int32, np.int64, np.float32, np.float64]


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
@pytest.mark.parametrize(
"elem,dtype",
list(
Expand All @@ -139,13 +140,12 @@ def test_rank_error_arguments(pdf):
)
def test_series_rank_combinations(elem, dtype):
np.random.seed(0)
aa = np.fromiter(chain.from_iterable(elem), np.float64).astype(dtype)
gdf = DataFrame()
gdf["a"] = aa = np.fromiter(chain.from_iterable(elem), np.float64).astype(
dtype
)
ranked_gs = gdf["a"].rank(method="first")
df = pd.DataFrame()
gdf["a"] = aa
df["a"] = aa
ranked_gs = gdf["a"].rank(method="first")
ranked_ps = df["a"].rank(method="first")
# Check
assert_eq(ranked_ps, ranked_gs.to_pandas())
assert_eq(ranked_ps, ranked_gs)
13 changes: 10 additions & 3 deletions python/cudf/cudf/tests/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,15 @@ def test_numeric_series_replace_dtype(series_dtype, replacement):
psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=series_dtype)
sr = cudf.from_pandas(psr)

if sr.dtype.kind in "ui":
can_replace = np.array([replacement])[0].is_integer() and np.can_cast(
int(replacement), sr.dtype
)
else:
can_replace = np.can_cast(replacement, sr.dtype)

# Both Scalar
if sr.dtype.type(replacement) != replacement:
if not can_replace:
with pytest.raises(TypeError):
sr.replace(1, replacement)
else:
Expand All @@ -954,7 +961,7 @@ def test_numeric_series_replace_dtype(series_dtype, replacement):
assert_eq(expect, got)

# to_replace is a list, replacement is a scalar
if sr.dtype.type(replacement) != replacement:
if not can_replace:
with pytest.raises(TypeError):

sr.replace([2, 3], replacement)
Expand All @@ -974,7 +981,7 @@ def test_numeric_series_replace_dtype(series_dtype, replacement):
# Both lists of equal length
if (
np.dtype(type(replacement)).kind == "f" and sr.dtype.kind in {"i", "u"}
) or (sr.dtype.type(replacement) != replacement):
) or (not can_replace):
with pytest.raises(TypeError):
sr.replace([2, 3], [replacement, replacement])
else:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_sparse_df.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

import numpy as np

Expand All @@ -7,7 +7,7 @@

def test_to_dense_array():
data = np.random.random(8)
mask = np.asarray([0b11010110], dtype=np.byte)
mask = np.asarray([0b11010110]).astype(np.byte)

sr = Series.from_masked_array(data=data, mask=mask, null_count=3)
assert sr.has_nulls
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/tests/test_unaops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.

import itertools
import operator
Expand Down Expand Up @@ -77,9 +77,10 @@ def generate_valid_scalar_unaop_combos():
return results


@pytest.mark.filterwarnings("ignore:overflow encountered in scalar negative")
@pytest.mark.parametrize("slr,dtype,op", generate_valid_scalar_unaop_combos())
def test_scalar_unary_operations(slr, dtype, op):
slr_host = cudf.dtype(dtype).type(slr)
slr_host = np.array([slr])[0].astype(cudf.dtype(dtype))
slr_device = cudf.Scalar(slr, dtype=dtype)

expect = op(slr_host)
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/utils/queryutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def query_compile(expr):
key "args" is a sequence of name of the arguments.
"""

funcid = f"queryexpr_{np.uintp(hash(expr)):x}"
# hash returns in the semi-open interval [-2**63, 2**63)
funcid = f"queryexpr_{(hash(expr) + 2**63):x}"
# Load cache
compiled = _cache.get(funcid)
# Cache not found
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires = [
"cmake>=3.23.1,!=3.25.0",
"cython>=0.29,<0.30",
"ninja",
"numpy>=1.21,<1.24",
"numpy>=1.21",
"protoc-wheel",
"pyarrow==11.0.0.*",
"rmm==23.6.*",
Expand All @@ -32,7 +32,7 @@ dependencies = [
"cupy-cuda11x>=12.0.0",
"fsspec>=0.6.0",
"numba>=0.57",
"numpy>=1.21,<1.24",
"numpy>=1.21",
"nvtx>=0.2.1",
"packaging",
"pandas>=1.3,<1.6.0dev0",
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_kafka/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

requires = [
"cython>=0.29,<0.30",
"numpy>=1.21,<1.24",
"numpy>=1.21",
"pyarrow==11.0.0.*",
"setuptools",
"wheel",
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies = [
"dask==2023.3.2",
"distributed==2023.3.2.1",
"fsspec>=0.6.0",
"numpy>=1.21,<1.24",
"numpy>=1.21",
"pandas>=1.3,<1.6.0dev0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down

0 comments on commit c3dd1d6

Please sign in to comment.