Skip to content

Commit

Permalink
Reduce digitize tests counts, move to test_series.py
Browse files Browse the repository at this point in the history
  • Loading branch information
isVoid committed Apr 15, 2022
1 parent 1742805 commit 6a765c9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 29 deletions.
29 changes: 0 additions & 29 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,35 +2468,6 @@ def test_arrow_handle_no_index_name(pdf, gdf):
assert_eq(expect, got)


@pytest.mark.parametrize("num_rows", [1, 3, 10, 100])
@pytest.mark.parametrize("num_bins", [1, 2, 4, 20])
@pytest.mark.parametrize("right", [True, False])
@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
@pytest.mark.parametrize("series_bins", [True, False])
def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
data = np.random.randint(0, 100, num_rows).astype(dtype)
bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
s = cudf.Series(data)
if series_bins:
s_bins = cudf.Series(bins)
indices = s.digitize(s_bins, right)
else:
indices = s.digitize(bins, right)
np.testing.assert_array_equal(
np.digitize(data, bins, right), indices.to_numpy()
)


def test_series_digitize_invalid_bins():
s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
bins = cudf.Series([2, None, None, 50, 90], dtype="int32")

with pytest.raises(
ValueError, match="`bins` cannot contain null entries."
):
_ = s.digitize(bins)


def test_pandas_non_contiguious():
arr1 = np.random.sample([5000, 10])
assert arr1.flags["C_CONTIGUOUS"] is True
Expand Down
29 changes: 29 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,3 +1782,32 @@ def test_diff_many_dtypes(data):
gs = cudf.from_pandas(ps)
assert_eq(ps.diff(), gs.diff())
assert_eq(ps.diff(periods=2), gs.diff(periods=2))


@pytest.mark.parametrize("num_rows", [1, 100])
@pytest.mark.parametrize("num_bins", [1, 10])
@pytest.mark.parametrize("right", [True, False])
@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
@pytest.mark.parametrize("series_bins", [True, False])
def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
data = np.random.randint(0, 100, num_rows).astype(dtype)
bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
s = cudf.Series(data)
if series_bins:
s_bins = cudf.Series(bins)
indices = s.digitize(s_bins, right)
else:
indices = s.digitize(bins, right)
np.testing.assert_array_equal(
np.digitize(data, bins, right), indices.to_numpy()
)


def test_series_digitize_invalid_bins():
s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
bins = cudf.Series([2, None, None, 50, 90], dtype="int32")

with pytest.raises(
ValueError, match="`bins` cannot contain null entries."
):
_ = s.digitize(bins)

0 comments on commit 6a765c9

Please sign in to comment.