modin-project · gshimansky · May 28, 2020 · May 27, 2020 · May 27, 2020
@@ -2059,6 +2059,41 @@ def __setattr__(self, key, value):
         object.__setattr__(self, key, value)
 
     def __setitem__(self, key, value):
+        if key not in self.columns:
+            # Handle new column case first
+            if isinstance(value, Series):
+                if len(self.columns) == 0:
+                    self._query_compiler = value._query_compiler.copy()
+                else:
+                    self._create_or_update_from_compiler(
+                        self._query_compiler.concat(1, value._query_compiler),
+                        inplace=True,
+                    )
+                # Now that the data is appended, we need to update the column name for
+                # that column to `key`, otherwise the name could be incorrect. Drop the
+                # last column name from the list (the appended value's name and append
+                # the new name.
+                self.columns = self.columns[:-1].append(pandas.Index([key]))
+            elif (
+                isinstance(value, np.ndarray)
+                and len(value.shape) > 1
+                and value.shape[1] != 1
+            ):
+                raise ValueError(
+                    "Wrong number of items passed %i, placement implies 1"
+                    % value.shape[1]
+                )
+            elif (
+                isinstance(value, (pandas.DataFrame, DataFrame)) and value.shape[1] != 1
+            ):
+                raise ValueError(
+                    "Wrong number of items passed %i, placement implies 1"
+                    % value.shape[1]
+                )
+            else:
+                self.insert(loc=len(self.columns), column=key, value=value)
+            return
+
         if not isinstance(key, str):
 
             def setitem_without_string_columns(df):
@@ -2073,22 +2108,8 @@ def setitem_without_string_columns(df):
             )
         if is_list_like(value):
             if isinstance(value, (pandas.DataFrame, DataFrame)):
-                if value.shape[1] != 1 and key not in self.columns:
-                    raise ValueError(
-                        "Wrong number of items passed %i, placement implies 1"
-                        % value.shape[1]
-                    )
                 value = value[value.columns[0]].values
             elif isinstance(value, np.ndarray):
-                if (
-                    len(value.shape) > 1
-                    and value.shape[1] != 1
-                    and key not in self.columns
-                ):
-                    raise ValueError(
-                        "Wrong number of items passed %i, placement implies 1"
-                        % value.shape[1]
-                    )
                 assert (
                     len(value.shape) < 3
                 ), "Shape of new values must be compatible with manager shape"
@@ -2097,23 +2118,8 @@ def setitem_without_string_columns(df):
                     value = value[: len(self)]
             if not isinstance(value, Series):
                 value = list(value)
-        if key not in self.columns:
-            if isinstance(value, Series):
-                if len(self.columns) == 0:
-                    self._query_compiler = value._query_compiler.copy()
-                else:
-                    self._create_or_update_from_compiler(
-                        self._query_compiler.concat(1, value._query_compiler),
-                        inplace=True,
-                    )
-                # Now that the data is appended, we need to update the column name for
-                # that column to `key`, otherwise the name could be incorrect. Drop the
-                # last column name from the list (the appended value's name and append
-                # the new name.
-                self.columns = self.columns[:-1].append(pandas.Index([key]))
-            else:
-                self.insert(loc=len(self.columns), column=key, value=value)
-        elif len(self.index) == 0:
+
+        if len(self.index) == 0:
             new_self = DataFrame({key: value}, columns=self.columns)
             self._update_inplace(new_self._query_compiler)
         else:

@@ -5131,14 +5131,30 @@ def test___setitem__(self, data):
 
         df_equals(modin_df, pandas_df)
 
-    def test_setitem_on_empty_df(self):
-        columns = ["id", "max_speed", "health"]
-        modin_df = pd.DataFrame(columns=columns)
-        pandas_df = pandas.DataFrame(columns=columns)
-        a = np.array(["one", "two"])
-
-        modin_df["id"] = a
-        pandas_df["id"] = a
+    @pytest.mark.parametrize(
+        "data",
+        [
+            {},
+            pytest.param(
+                {"id": [], "max_speed": [], "health": []},
+                marks=pytest.mark.xfail(
+                    reason="Throws an exception because generally assigning Series or other objects of length different from DataFrame does not work right now"
+                ),
+            ),
+        ],
+        ids=["empty", "empty_columns"],
+    )
+    @pytest.mark.parametrize(
+        "value", [np.array(["one", "two"]), [11, 22]], ids=["ndarray", "list"],
+    )
+    @pytest.mark.parametrize("convert_to_series", [False, True])
+    @pytest.mark.parametrize("new_col_id", [123, "new_col"], ids=["integer", "string"])
+    def test_setitem_on_empty_df(self, data, value, convert_to_series, new_col_id):
+        pandas_df = pandas.DataFrame(data)
+        modin_df = pd.DataFrame(data)
+
+        pandas_df[new_col_id] = pandas.Series(value) if convert_to_series else value
+        modin_df[new_col_id] = pd.Series(value) if convert_to_series else value
         df_equals(modin_df, pandas_df)
 
     @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)