Skip to content

Commit

Permalink
Merge branch 'branch-23.04' into add_docs_build
Browse files Browse the repository at this point in the history
  • Loading branch information
AyodeAwe authored Feb 3, 2023
2 parents 8542b76 + 21ef256 commit 4841cf2
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 28 deletions.
1 change: 0 additions & 1 deletion docs/cudf/source/api_docs/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ Computations / descriptive stats
DataFrame.prod
DataFrame.product
DataFrame.quantile
DataFrame.quantiles
DataFrame.rank
DataFrame.round
DataFrame.skew
Expand Down
20 changes: 14 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5173,13 +5173,21 @@ def from_arrow(cls, table):

if index_col:
if isinstance(index_col[0], dict):
out = out.set_index(
cudf.RangeIndex(
index_col[0]["start"],
index_col[0]["stop"],
name=index_col[0]["name"],
)
idx = cudf.RangeIndex(
index_col[0]["start"],
index_col[0]["stop"],
name=index_col[0]["name"],
)
if len(idx) == len(out):
# `idx` is generated from arrow `pandas_metadata`
# which can get out of date with many of the
# arrow operations. Hence verifying if the
# lengths match, or else don't need to set
# an index at all i.e., Default RangeIndex
# will be set.
# See more about the discussion here:
# https://github.com/apache/arrow/issues/15178
out = out.set_index(idx)
else:
out = out.set_index(index_col[0])

Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2496,7 +2496,11 @@ def round(self, decimals=0, how="half_even"):

cols = {
name: col.round(decimals[name], how=how)
if (name in decimals and _is_non_decimal_numeric_dtype(col.dtype))
if (
name in decimals
and _is_non_decimal_numeric_dtype(col.dtype)
and not is_bool_dtype(col.dtype)
)
else col.copy(deep=True)
for name, col in self._data.items()
}
Expand Down
53 changes: 33 additions & 20 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3813,17 +3813,22 @@ def test_ndim():
-3,
0,
5,
pd.Series([1, 4, 3, -6], index=["w", "x", "y", "z"]),
cudf.Series([-4, -2, 12], index=["x", "y", "z"]),
{"w": -1, "x": 15, "y": 2},
pd.Series(
[1, 4, 3, -6],
index=["floats", "ints", "floats_with_nan", "floats_same"],
),
cudf.Series(
[-4, -2, 12], index=["ints", "floats_with_nan", "floats_same"]
),
{"floats": -1, "ints": 15, "floats_will_nan": 2},
],
)
def test_dataframe_round(decimals):
pdf = pd.DataFrame(
gdf = cudf.DataFrame(
{
"w": np.arange(0.5, 10.5, 1),
"x": np.random.normal(-100, 100, 10),
"y": np.array(
"floats": np.arange(0.5, 10.5, 1),
"ints": np.random.normal(-100, 100, 10),
"floats_with_na": np.array(
[
14.123,
2.343,
Expand All @@ -3832,31 +3837,25 @@ def test_dataframe_round(decimals):
-8.302,
np.nan,
94.313,
-112.236,
None,
-8.029,
np.nan,
]
),
"z": np.repeat([-0.6459412758761901], 10),
"floats_same": np.repeat([-0.6459412758761901], 10),
"bools": np.random.choice([True, None, False], 10),
"strings": np.random.choice(["abc", "xyz", None], 10),
"struct": np.random.choice([{"abc": 1}, {"xyz": 2}, None], 10),
"list": [[1], [2], None, [4], [3]] * 2,
}
)
gdf = cudf.DataFrame.from_pandas(pdf)
pdf = gdf.to_pandas()

if isinstance(decimals, cudf.Series):
pdecimals = decimals.to_pandas()
else:
pdecimals = decimals

result = gdf.round(decimals)
expected = pdf.round(pdecimals)
assert_eq(result, expected)

# with nulls, maintaining existing null mask
for c in pdf.columns:
arr = pdf[c].to_numpy().astype("float64") # for pandas nulls
arr.ravel()[np.random.choice(10, 5, replace=False)] = np.nan
pdf[c] = gdf[c] = arr

result = gdf.round(decimals)
expected = pdf.round(pdecimals)

Expand Down Expand Up @@ -10012,3 +10011,17 @@ def test_dataframe_transpose_complex_types(data):
actual = gdf.T

assert_eq(expected, actual)


def test_dataframe_from_arrow_slice():
table = pa.Table.from_pandas(
pd.DataFrame.from_dict(
{"a": ["aa", "bb", "cc"] * 3, "b": [1, 2, 3] * 3}
)
)
table_slice = table.slice(3, 7)

expected = table_slice.to_pandas()
actual = cudf.DataFrame.from_arrow(table_slice)

assert_eq(expected, actual)

0 comments on commit 4841cf2

Please sign in to comment.