Skip to content

Commit

Permalink
FIX-modin-project#2442: fixed Series assignment with different indices
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev committed Dec 9, 2020
1 parent 7c46bdd commit 80fef5e
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 29 deletions.
1 change: 1 addition & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1984,6 +1984,7 @@ def __setitem__(self, key, value):
if len(self.columns) == 0:
self._query_compiler = value._query_compiler.copy()
else:
value = value.reindex(self.index)
self._create_or_update_from_compiler(
self._query_compiler.concat(
1,
Expand Down
62 changes: 33 additions & 29 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
int_arg_keys,
int_arg_values,
create_test_dfs,
eval_general,
)

pd.DEFAULT_NPARTITIONS = 4
Expand All @@ -42,6 +43,17 @@
matplotlib.use("Agg")


def eval_setitem(md_df, pd_df, value, col=None, loc=None):
if loc is not None:
col = pd_df.columns[loc]

value_getter = value if callable(value) else (lambda *args, **kwargs: value)

eval_general(
md_df, pd_df, lambda df: df.__setitem__(idx, value_getter(df)), __inplace__=True
)


@pytest.mark.parametrize(
"dates",
[
Expand Down Expand Up @@ -1075,38 +1087,20 @@ def test___getattr__(request, data):

@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test___setitem__(data):
modin_df = pd.DataFrame(data)
pandas_df = pandas.DataFrame(data)

modin_df.__setitem__(modin_df.columns[-1], 1)
pandas_df.__setitem__(pandas_df.columns[-1], 1)
df_equals(modin_df, pandas_df)

modin_df = pd.DataFrame(data)
pandas_df = pandas.DataFrame(data)

modin_df[modin_df.columns[-1]] = pd.DataFrame(modin_df[modin_df.columns[0]])
pandas_df[pandas_df.columns[-1]] = pandas.DataFrame(pandas_df[pandas_df.columns[0]])
df_equals(modin_df, pandas_df)

modin_df = pd.DataFrame(data)
pandas_df = pandas.DataFrame(data)
eval_setitem(*create_test_dfs(data), loc=-1, value=1)
eval_setitem(
*create_test_dfs(data), loc=-1, value=lambda df: type(df)(df[columns[0]])
)

rows = len(modin_df)
arr = np.arange(rows * 2).reshape(-1, 2)
modin_df[modin_df.columns[-1]] = arr
pandas_df[pandas_df.columns[-1]] = arr
df_equals(pandas_df, modin_df)
nrows = len(data[list(data.keys())[0]])
arr = np.arange(nrows * 2).reshape(-1, 2)

with pytest.raises(ValueError, match=r"Wrong number of items passed"):
modin_df["___NON EXISTENT COLUMN"] = arr
eval_setitem(*create_test_dfs(data), loc=-1, value=arr)
eval_setitem(*create_test_dfs(data), col="___NON EXISTENT COLUMN", value=arr)
eval_setitem(*create_test_dfs(data), loc=0, value=np.arange(nrows))

modin_df[modin_df.columns[0]] = np.arange(len(modin_df))
pandas_df[pandas_df.columns[0]] = np.arange(len(pandas_df))
df_equals(modin_df, pandas_df)

modin_df = pd.DataFrame(columns=modin_df.columns)
pandas_df = pandas.DataFrame(columns=pandas_df.columns)
modin_df = pd.DataFrame(columns=data.keys())
pandas_df = pandas.DataFrame(columns=data.keys())

for col in modin_df.columns:
modin_df[col] = np.arange(1000)
Expand Down Expand Up @@ -1168,6 +1162,16 @@ def test___setitem__(data):
pandas_df["b"] = pandas.Series([4, 5, 6, 7, 8])
df_equals(modin_df, pandas_df)

# from issue #2442
data = {"a": [1, 2, 3, 4]}
index = pandas.to_datetime(["2020-02-06", "2020-02-06", "2020-02-22", "2020-03-26"])

md_df, pd_df = create_test_dfs(data, index=index)
pd_df["b"] = pandas.Series(np.arange(4))
md_df["b"] = pd.Series(np.arange(4))

df_equals(md_df, pd_df)


def test___setitem__with_mismatched_partitions():
fname = "200kx99.csv"
Expand Down

0 comments on commit 80fef5e

Please sign in to comment.