Skip to content

Commit

Permalink
Enable pytest failures on warnings from cudf (Ensure values being set…
Browse files Browse the repository at this point in the history
… are casted to the object's type) (#1358)

In 24.04, cudf issues a `FutureWarning` when setting a value to a column would change the data type of the original column. The fix involves casting the original column to the value's type first or vice versa. Could use a second eye on the correct direction to cast if a data type was used intentionally.

Additionally the test suite will now fail when a warning comes from cudf as apart of rapidsai/build-planning#26

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - H. Thomson Comer (https://github.com/thomcom)

URL: #1358
  • Loading branch information
mroeschke authored Mar 12, 2024
1 parent 47a7961 commit 6298419
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 21 deletions.
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/binpreds/basic_predicates.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import cudf

Expand Down Expand Up @@ -67,7 +67,7 @@ def _basic_intersects_count(lhs, rhs):
is_degenerate = _multipoints_is_degenerate(intersections)
# If all the points in the intersection are in the rhs
if len(is_degenerate) > 0:
sizes[is_degenerate] = 1
sizes[is_degenerate] = sizes.dtype.type(1)
return sizes


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import cupy as cp

Expand Down Expand Up @@ -231,6 +231,7 @@ def _postprocess_multipoint_rhs(
return count_result
hits = result_df["point_index_x"]
hits.index = count_result.iloc[result_df["rhs_index"]].index
count_result = count_result.astype(hits.dtype)
count_result.iloc[result_df["rhs_index"]] = hits
return count_result

Expand Down
14 changes: 7 additions & 7 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION
# Copyright (c) 2020-2024, NVIDIA CORPORATION

from functools import cached_property
from numbers import Integral
Expand Down Expand Up @@ -660,15 +660,15 @@ def _align_to_index(
index, how, sort, allow_non_unique
)
).astype("int32")
aligned_union_offsets[
aligned_union_offsets.isna()
] = Feature_Enum.NONE.value
aligned_union_offsets[aligned_union_offsets.isna()] = np.int32(
Feature_Enum.NONE.value
)
aligned_input_types = self._column._meta.input_types._align_to_index(
index, how, sort, allow_non_unique
).astype("int8")
aligned_input_types[
aligned_input_types.isna()
] = Feature_Enum.NONE.value
aligned_input_types[aligned_input_types.isna()] = np.int8(
Feature_Enum.NONE.value
)
column = GeoColumn(
(
self._column.points,
Expand Down
7 changes: 5 additions & 2 deletions python/cuspatial/cuspatial/tests/test_geodataframe.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
import sys

import geopandas as gpd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -321,8 +323,9 @@ def test_boolmask(gpdf, df_boolmask):


@pytest.mark.xfail(
sys.version_info.major >= 3 and sys.version_info.minor >= 11,
reason="Size discrepancies between Python versions. See "
"https://github.com/rapidsai/cuspatial/issues/1352"
"https://github.com/rapidsai/cuspatial/issues/1352",
)
def test_memory_usage(gs):
assert gs.memory_usage() == 224
Expand Down
10 changes: 5 additions & 5 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from enum import Enum
from numbers import Integral
Expand Down Expand Up @@ -27,7 +27,7 @@
np.random.seed(0)


class Test_Feature_Enum(Enum):
class Example_Feature_Enum(Enum):
POINT = 0
MULTIPOINT = 1
LINESTRING = 2
Expand All @@ -54,7 +54,7 @@ def random_multipolygon(size):


def generate_random_shapely_feature(
size: Integral, has_z: bool = False, obj_type: Test_Feature_Enum = None
size: Integral, has_z: bool = False, obj_type: Example_Feature_Enum = None
):
obj_type = obj_type.value if obj_type else np.random.randint(1, 7)
if obj_type == 1:
Expand All @@ -78,7 +78,7 @@ def generate_random_shapely_feature(
return random_multipolygon(size)


def generator(size: Integral, obj_type: Test_Feature_Enum = None):
def generator(size: Integral, obj_type: Example_Feature_Enum = None):
geos_list = []
for i in range(size):
geo = generate_random_shapely_feature(3, obj_type)
Expand Down Expand Up @@ -257,7 +257,7 @@ def test_getitem_lines():


def test_getitem_slice_same_index():
gps = gpd.GeoSeries(generator(3, Test_Feature_Enum.POINT))
gps = gpd.GeoSeries(generator(3, Example_Feature_Enum.POINT))
cus = cuspatial.from_geopandas(gps)
assert_eq_geo(cus[0:1].to_geopandas(), gps[0:1])
assert_eq_geo(cus[0:1].to_geopandas(), gps[0:1])
Expand Down
8 changes: 5 additions & 3 deletions python/cuspatial/cuspatial/utils/binpred_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -363,7 +363,7 @@ def _points_and_lines_to_multipoints(geoseries, offsets):
points = geoseries[points_mask]
lines = geoseries[lines_mask]
points_offsets = _zero_series(len(geoseries))
points_offsets[points_mask] = 1
points_offsets[points_mask] = points_offsets.dtype.type(1)
lines_series = geoseries[lines_mask]
lines_sizes = lines_series.sizes
xy = _zero_series(len(points.points.xy) + len(lines.lines.xy))
Expand All @@ -372,9 +372,10 @@ def _points_and_lines_to_multipoints(geoseries, offsets):
lines_sizes.index = points_offsets[lines_mask].index
points_offsets[lines_mask] = lines_series.sizes.values
sizes[lines_mask] = lines.sizes.values * 2
sizes[points_mask] = 2
sizes[points_mask] = sizes.dtype.type(2)
# TODO Inevitable host device copy
points_xy_mask = cp.array(np.repeat(points_mask, sizes.values_host))
xy = xy.astype(points.points.xy.dtype)
xy.iloc[points_xy_mask] = points.points.xy.reset_index(drop=True)
xy.iloc[~points_xy_mask] = lines.lines.xy.reset_index(drop=True)
collected_offsets = cudf.concat(
Expand Down Expand Up @@ -446,6 +447,7 @@ def _pli_features_rebuild_offsets(pli, features):
# Recompute the offsets for the new series
grouped_sizes = in_sizes.groupby(level=0).sum().sort_index()
out_sizes = _zero_series(len(pli[0]) - 1)
out_sizes = out_sizes.astype(grouped_sizes.dtype)
out_sizes.iloc[grouped_sizes.index] = grouped_sizes
offsets = cudf.concat([cudf.Series([0]), out_sizes.cumsum()])
return offsets
Expand Down
8 changes: 7 additions & 1 deletion python/cuspatial/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -120,3 +120,9 @@ wheel.packages = ["cuspatial"]
provider = "scikit_build_core.metadata.regex"
input = "cuspatial/VERSION"
regex = "(?P<value>.*)"

[tool.pytest.ini_options]
xfail_strict = true
filterwarnings = [
"error:::cudf"
]

0 comments on commit 6298419

Please sign in to comment.