Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement update() function #6883

Merged
merged 23 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2f58491
implementing update() function
skirui-source Dec 2, 2020
9c2b1ac
stash changes to switch branch
skirui-source Dec 9, 2020
7d31eb5
stash changes to switch branch
skirui-source Dec 9, 2020
b72a8d5
Made edits to update()
skirui-source Dec 15, 2020
a2bbab3
Made edits to update()
skirui-source Dec 15, 2020
73c3683
edits
skirui-source Dec 16, 2020
bff2cba
fixed conflicts in test_dataframe.py
skirui-source Dec 16, 2020
32730a0
All tests passing. ready for review
skirui-source Dec 18, 2020
abf4a58
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Dec 18, 2020
7e05ed5
Addressed Prem's review comments
skirui-source Jan 6, 2021
e587c52
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 6, 2021
b923e16
Addressed review comments. Ready for review
skirui-source Jan 8, 2021
56b852c
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 8, 2021
388b6f9
addressed review comments
skirui-source Jan 11, 2021
3448baf
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 11, 2021
c6d6d59
added test for unequal indices
skirui-source Jan 13, 2021
fbb4c52
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 13, 2021
d4a34a6
Fixed changes in Docstrings
skirui-source Jan 19, 2021
273c3af
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 19, 2021
cf2648b
fixed typo
skirui-source Jan 21, 2021
5347363
fixed merge conflicts
skirui-source Jan 21, 2021
b5ad501
Recovered two deleted tests
skirui-source Jan 21, 2021
994fe70
Merge branch 'branch-0.18' of https://github.com/rapidsai/cudf into u…
skirui-source Jan 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,90 @@ def add(self, other, axis="columns", level=None, fill_value=None):

return self._apply_op("add", other, fill_value)


def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
"""
Modify a DataFrame in place using non-NA values from another DataFrame.

Aligns on indices. There is no return value.

Parameters
----------
other : DataFrame, or object coercible into a DataFrame
Should have at least one matching index/column label with the
original DataFrame. If a Series is passed, its name attribute must
be set, and that will be used as the column name to align with the
original DataFrame.

join : {‘left’}, default ‘left’
skirui-source marked this conversation as resolved.
Show resolved Hide resolved
Only left join is implemented, keeping the index and
columns of the original object.

overwrite : {True, False}, default True
How to handle non-NA values for overlapping keys:
True: overwrite original DataFrame’s values with values from other.
skirui-source marked this conversation as resolved.
Show resolved Hide resolved
False: only update values that are NA in the original DataFrame.

filter_func : None
filter_func is not supported yet
Return True for values that should be updated.

errors : {‘raise’, ‘ignore’}, default ‘ignore’
If ‘raise’, will raise a ValueError if the DataFrame and other both contain non-NA data in the same place.


Returns
-------
None : method directly changes calling object

Raises
-------
ValueError
- When ``errors``= 'raise' and there’s overlapping non-NA data.
skirui-source marked this conversation as resolved.
Show resolved Hide resolved
- When ``errors`` is not either 'ignore' or 'raise'

NotImplementedError
- If ``join`` != ‘left’
skirui-source marked this conversation as resolved.
Show resolved Hide resolved

skirui-source marked this conversation as resolved.
Show resolved Hide resolved
"""
# TODO: Support other joins
if join != "left":
raise NotImplementedError("Only left join is supported")
if errors not in {"ignore", "raise"}:
raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
if filter_func != None:
raise NotImplementedError("filter_func is not supported yet")

if not isinstance(other, DataFrame):
other = DataFrame(other)

if self.columns.any() != other.columns.any():
other = other.reindex(self.columns, axis=1)
if self.index.any() != other.index.any():
other = other.reindex(self.index, axis=0)
skirui-source marked this conversation as resolved.
Show resolved Hide resolved

for col in self.columns:
this = self[col]
that = other[col]

if errors == "raise":
mask_this = that.notna()
mask_that = this.notna()
if ((mask_this & mask_that).any()):
raise ValueError("Data overlaps.")

if overwrite:
mask = that.isna()
else:
mask = this.notna()

# don't overwrite columns unnecessarily
if mask.all():
continue

self[col] = this.where(mask, that)


def __add__(self, other):
return self._apply_op("__add__", other)

Expand Down
60 changes: 60 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8191,3 +8191,63 @@ def test_agg_for_dataframe_with_string_columns(aggs):
),
):
gdf.agg(aggs)


@pytest.mark.parametrize("join",["left"],)
@pytest.mark.parametrize("overwrite",[True, False],)
@pytest.mark.parametrize("filter_func",[None],)
@pytest.mark.parametrize("errors",["ignore"],)
@pytest.mark.parametrize(
"data",
[
{"a": [1, 2, 3], "b": [3, 4, 5]},
{"a": [1.0, 2.0, 3.0], "b": [3.0, 4.0, 5.0]},
{"a": [False, False, True], "b": [True, True, False]},
{"a": [2.0, np.nan, 4.0], "b": [np.nan, np.nan, np.nan]},
{"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
],
)
@pytest.mark.parametrize(
"data2",
[
{"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
{"a": [True, False, True], "b":[3.0, 4.0, 5.0]},
{"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
{"a": [np.nan, 2.0, np.nan], "b": [2, np.nan, 5.0]},
],
)
def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors):
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
pdf = pd.DataFrame(data)
gdf = gd.DataFrame(data)

other_pd = pd.DataFrame(data2)
other_gd = gd.DataFrame(data2)

expect = pdf.update(other_pd, join, overwrite, filter_func, errors)
got = gdf.update(other_gd, join, overwrite, filter_func, errors)

assert_eq(expect, got)

@pytest.mark.parametrize("join",["right"],)
def test_update_for_right_join(join):
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})

other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})

with pytest.raises(
NotImplementedError,
match="Only left join is supported"
):
gdf.update(other_gd,join)

@pytest.mark.parametrize("errors",["raise"],)
def test_update_for_data_overlap(errors):
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})

other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})

assert_exceptions_equal(lambda: pdf.update(other_pd,errors), lambda: gdf.update(other_gd,errors))
skirui-source marked this conversation as resolved.
Show resolved Hide resolved