Skip to content

Commit

Permalink
FIX-modin-project#2442: fixed pandas tests
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev committed Dec 9, 2020
1 parent 445b2bc commit d133223
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 60 deletions.
41 changes: 23 additions & 18 deletions modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2168,21 +2168,25 @@ def getitem_row_array(self, key):
"""
return self.__constructor__(self._modin_frame.mask(row_numeric_idx=key))

def insert_item(self, axis, loc, value, **kwargs):
def insert_item(self, axis, loc, value):
"""
Insert the column/row defined by `value` at the specified `loc`
Insert new column/row defined by `value` at the specified `loc`
Parameters
----------
axis:
axis: int, axis to insert along
loc: int, position to insert `value`
value: PandasQueryCompiler, value to insert
Returns
-------
A new PandasQueryCompiler
"""
assert isinstance(value, type(self))

how = kwargs.get("join", "left")

def execute_concat(left, middle, *right):
return self.__constructor__(
left._concat(axis, [middle, *right], how, sort=False)
left._concat(axis, [middle, *right], "left", sort=False)
)

def get_kwargs(value):
Expand All @@ -2201,14 +2205,6 @@ def get_kwargs(value):
return execute_concat(self._modin_frame, value._modin_frame)

def setitem(self, axis, key, value):
if isinstance(value, type(self)) and not value.get_axis(axis).equals(
self.get_axis(axis)
):
value = value.reindex(axis, self.get_axis(axis))

return self._setitem(axis=axis, key=key, value=value)

def _setitem(self, axis, key, value):
"""Set the column defined by `key` to the `value` provided.
Args:
Expand All @@ -2218,7 +2214,14 @@ def _setitem(self, axis, key, value):
Returns:
A new QueryCompiler
"""
if isinstance(value, type(self)) and not value.get_axis(axis).equals(
self.get_axis(axis)
):
value = value.reindex(axis, self.get_axis(axis))

return self._setitem(axis=axis, key=key, value=value)

def _setitem(self, axis, key, value):
def setitem_builder(df, internal_indices=[]):
df = df.copy()
if len(internal_indices) == 1:
Expand Down Expand Up @@ -2367,10 +2370,12 @@ def insert(self, loc, column, value):
Returns:
A new PandasQueryCompiler with new data inserted.
"""
if isinstance(value, (type(self), pandas.Series)) and not value.index.equals(
self.index
):
value = value.reindex(axis=0, labels=self.index)
if isinstance(value, (type(self), pandas.Series)):
value = (
value
if value.index.equals(self.index)
else value.reindex(axis=0, labels=self.index)
)
elif is_list_like(value):
value = list(value)
else:
Expand Down
21 changes: 4 additions & 17 deletions modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,8 @@ def applier(df, **kwargs):
def test_setitem_default(self):
def applier(df, lib, **kwargs):
df = df + 1
df["a"] = lib.Series(np.arange(3))
df["a"] = np.arange(3)
df["b"] = lib.Series(np.arange(3))
return df

run_and_compare(applier, data=self.data, force_lazy=False)
Expand All @@ -504,29 +505,15 @@ def applier(df, **kwargs):
df = df + 1
df.insert(2, "new_int", 10)
df.insert(2, "new_float", 5.5)
return df

run_and_compare(applier, data=self.data)

def test_insert_default(self):
def applier(df, **kwargs):
df = df + 1
df.insert(2, np.arange(3))
return df

run_and_compare(applier, data=self.data, force_lazy=False)

def test_insert_qc_lazy(self):
def applier(df, **kwargs):
df = df + 1
df.insert(loc=2, column="new_a", value=df["a"] + 1)
return df

run_and_compare(applier, data=self.data)

def test_insert_qc_default(self):
def test_insert_default(self):
def applier(df, lib, **kwargs):
df = df + 1
df.insert(2, np.arange(3))
df.insert(loc=2, column="new_a", value=lib.Series(np.arange(3)))
return df

Expand Down
21 changes: 9 additions & 12 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,24 +924,21 @@ def insert(self, loc, column, value, allow_duplicates=False):
value = value.squeeze(axis=1)

if not self._query_compiler.lazy_execution and len(self.index) == 0:
# Can't insert in distributed way in that case
value = try_cast_to_pandas(value)
try:
value = pandas.Series(value)
except (TypeError, ValueError, IndexError):
raise ValueError(
"Cannot insert into a DataFrame with no defined index "
"and a value that cannot be converted to a "
"Series"
)
if not hasattr(value, "index"):
try:
value = pandas.Series(value)
except (TypeError, ValueError, IndexError):
raise ValueError(
"Cannot insert into a DataFrame with no defined index "
"and a value that cannot be converted to a "
"Series"
)
new_index = value.index.copy()
new_columns = self.columns.insert(loc, column)
new_query_compiler = DataFrame(
value, index=new_index, columns=new_columns
)._query_compiler
elif len(self.columns) == 0 and loc == 0:
# if isinstance(value, (pandas.Series, Series)):
# value = value.reindex(index=self.index)
new_query_compiler = DataFrame(
data=value, columns=[column], index=self.index
)._query_compiler
Expand Down
7 changes: 7 additions & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,9 +1164,16 @@ def test___setitem__(data):

# from issue #2442
data = {"a": [1, 2, 3, 4]}
# Index with duplicated timestamp
index = pandas.to_datetime(["2020-02-06", "2020-02-06", "2020-02-22", "2020-03-26"])

md_df, pd_df = create_test_dfs(data, index=index)
# Setting new column
pd_df["b"] = pandas.Series(np.arange(4))
md_df["b"] = pd.Series(np.arange(4))

df_equals(md_df, pd_df)
# Setting existing column
pd_df["b"] = pandas.Series(np.arange(4))
md_df["b"] = pd.Series(np.arange(4))

Expand Down
14 changes: 1 addition & 13 deletions modin/pandas/test/dataframe/test_map_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,19 +921,7 @@ def test_dropna_subset_error(data, axis, subset):


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
"astype",
[
"category",
pytest.param(
"int32",
marks=pytest.mark.xfail(
reason="Modin astype() does not raises ValueError at non-numeric argument when Pandas does."
),
),
"float",
],
)
@pytest.mark.parametrize("astype", ["category", "int32", "float"])
def test_insert_dtypes(data, astype):
modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

Expand Down

0 comments on commit d133223

Please sign in to comment.