diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index a8a766406e1..7a5092fead9 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2168,21 +2168,25 @@ def getitem_row_array(self, key): """ return self.__constructor__(self._modin_frame.mask(row_numeric_idx=key)) - def insert_item(self, axis, loc, value, **kwargs): + def insert_item(self, axis, loc, value): """ - Insert the column/row defined by `value` at the specified `loc` + Insert new column/row defined by `value` at the specified `loc` Parameters ---------- - axis: + axis: int, axis to insert along + loc: int, position to insert `value` + value: PandasQueryCompiler, value to insert + + Returns + ------- + A new PandasQueryCompiler """ assert isinstance(value, type(self)) - how = kwargs.get("join", "left") - def execute_concat(left, middle, *right): return self.__constructor__( - left._concat(axis, [middle, *right], how, sort=False) + left._concat(axis, [middle, *right], "left", sort=False) ) def get_kwargs(value): @@ -2201,14 +2205,6 @@ def get_kwargs(value): return execute_concat(self._modin_frame, value._modin_frame) def setitem(self, axis, key, value): - if isinstance(value, type(self)) and not value.get_axis(axis).equals( - self.get_axis(axis) - ): - value = value.reindex(axis, self.get_axis(axis)) - - return self._setitem(axis=axis, key=key, value=value) - - def _setitem(self, axis, key, value): """Set the column defined by `key` to the `value` provided. Args: @@ -2218,7 +2214,14 @@ def _setitem(self, axis, key, value): Returns: A new QueryCompiler """ + if isinstance(value, type(self)) and not value.get_axis(axis).equals( + self.get_axis(axis) + ): + value = value.reindex(axis, self.get_axis(axis)) + return self._setitem(axis=axis, key=key, value=value) + + def _setitem(self, axis, key, value): def setitem_builder(df, internal_indices=[]): df = df.copy() if len(internal_indices) == 1: @@ -2367,10 +2370,12 @@ def insert(self, loc, column, value): Returns: A new PandasQueryCompiler with new data inserted. """ - if isinstance(value, (type(self), pandas.Series)) and not value.index.equals( - self.index - ): - value = value.reindex(axis=0, labels=self.index) + if isinstance(value, (type(self), pandas.Series)): + value = ( + value + if value.index.equals(self.index) + else value.reindex(axis=0, labels=self.index) + ) elif is_list_like(value): value = list(value) else: diff --git a/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py b/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py index 43ead91649c..4507e46f084 100644 --- a/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py +++ b/modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py @@ -494,7 +494,8 @@ def applier(df, **kwargs): def test_setitem_default(self): def applier(df, lib, **kwargs): df = df + 1 - df["a"] = lib.Series(np.arange(3)) + df["a"] = np.arange(3) + df["b"] = lib.Series(np.arange(3)) return df run_and_compare(applier, data=self.data, force_lazy=False) @@ -504,29 +505,15 @@ def applier(df, **kwargs): df = df + 1 df.insert(2, "new_int", 10) df.insert(2, "new_float", 5.5) - return df - - run_and_compare(applier, data=self.data) - - def test_insert_default(self): - def applier(df, **kwargs): - df = df + 1 - df.insert(2, np.arange(3)) - return df - - run_and_compare(applier, data=self.data, force_lazy=False) - - def test_insert_qc_lazy(self): - def applier(df, **kwargs): - df = df + 1 df.insert(loc=2, column="new_a", value=df["a"] + 1) return df run_and_compare(applier, data=self.data) - def test_insert_qc_default(self): + def test_insert_default(self): def applier(df, lib, **kwargs): df = df + 1 + df.insert(2, np.arange(3)) df.insert(loc=2, column="new_a", value=lib.Series(np.arange(3))) return df diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 3f9e3b408b6..5ed7b445672 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -924,24 +924,21 @@ def insert(self, loc, column, value, allow_duplicates=False): value = value.squeeze(axis=1) if not self._query_compiler.lazy_execution and len(self.index) == 0: - # Can't insert in distributed way in that case - value = try_cast_to_pandas(value) - try: - value = pandas.Series(value) - except (TypeError, ValueError, IndexError): - raise ValueError( - "Cannot insert into a DataFrame with no defined index " - "and a value that cannot be converted to a " - "Series" - ) + if not hasattr(value, "index"): + try: + value = pandas.Series(value) + except (TypeError, ValueError, IndexError): + raise ValueError( + "Cannot insert into a DataFrame with no defined index " + "and a value that cannot be converted to a " + "Series" + ) new_index = value.index.copy() new_columns = self.columns.insert(loc, column) new_query_compiler = DataFrame( value, index=new_index, columns=new_columns )._query_compiler elif len(self.columns) == 0 and loc == 0: - # if isinstance(value, (pandas.Series, Series)): - # value = value.reindex(index=self.index) new_query_compiler = DataFrame( data=value, columns=[column], index=self.index )._query_compiler diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py index a22b8f013e5..1f0966c1271 100644 --- a/modin/pandas/test/dataframe/test_indexing.py +++ b/modin/pandas/test/dataframe/test_indexing.py @@ -1164,9 +1164,16 @@ def test___setitem__(data): # from issue #2442 data = {"a": [1, 2, 3, 4]} + # Index with duplicated timestamp index = pandas.to_datetime(["2020-02-06", "2020-02-06", "2020-02-22", "2020-03-26"]) md_df, pd_df = create_test_dfs(data, index=index) + # Setting new column + pd_df["b"] = pandas.Series(np.arange(4)) + md_df["b"] = pd.Series(np.arange(4)) + + df_equals(md_df, pd_df) + # Setting existing column pd_df["b"] = pandas.Series(np.arange(4)) md_df["b"] = pd.Series(np.arange(4)) diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py index d0b7d1655d3..55e0e8e839c 100644 --- a/modin/pandas/test/dataframe/test_map_metadata.py +++ b/modin/pandas/test/dataframe/test_map_metadata.py @@ -921,19 +921,7 @@ def test_dropna_subset_error(data, axis, subset): @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) -@pytest.mark.parametrize( - "astype", - [ - "category", - pytest.param( - "int32", - marks=pytest.mark.xfail( - reason="Modin astype() does not raises ValueError at non-numeric argument when Pandas does." - ), - ), - "float", - ], -) +@pytest.mark.parametrize("astype", ["category", "int32", "float"]) def test_insert_dtypes(data, astype): modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)