REF/TST: method-specific files for test_append (pandas-dev#30503)

AlexKirko · Dec 29, 2019 · e997225 · e997225
1 parent 41eae40
commit e997225
Show file tree

Hide file tree

Showing 9 changed files with 369 additions and 370 deletions.
diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
@@ -0,0 +1,179 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, Series, Timestamp
+import pandas.util.testing as tm
+
+
+class TestDataFrameAppend:
+    def test_append_empty_list(self):
+        # GH 28769
+        df = DataFrame()
+        result = df.append([])
+        expected = df
+        tm.assert_frame_equal(result, expected)
+        assert result is not df
+
+        df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
+        result = df.append([])
+        expected = df
+        tm.assert_frame_equal(result, expected)
+        assert result is not df  # .append() should return a new object
+
+    def test_append_series_dict(self):
+        df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
+
+        series = df.loc[4]
+        msg = "Indexes have overlapping values"
+        with pytest.raises(ValueError, match=msg):
+            df.append(series, verify_integrity=True)
+
+        series.name = None
+        msg = "Can only append a Series if ignore_index=True"
+        with pytest.raises(TypeError, match=msg):
+            df.append(series, verify_integrity=True)
+
+        result = df.append(series[::-1], ignore_index=True)
+        expected = df.append(
+            DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # dict
+        result = df.append(series.to_dict(), ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+        result = df.append(series[::-1][:3], ignore_index=True)
+        expected = df.append(
+            DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True
+        )
+        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+
+        # can append when name set
+        row = df.loc[4]
+        row.name = 5
+        result = df.append(row)
+        expected = df.append(df[-1:], ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_list_of_series_dicts(self):
+        df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
+
+        dicts = [x.to_dict() for idx, x in df.iterrows()]
+
+        result = df.append(dicts, ignore_index=True)
+        expected = df.append(df, ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+        # different columns
+        dicts = [
+            {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4},
+            {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8},
+        ]
+        result = df.append(dicts, ignore_index=True, sort=True)
+        expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_missing_cols(self):
+        # GH22252
+        # exercise the conditional branch in append method where the data
+        # to be appended is a list and does not contain all columns that are in
+        # the target DataFrame
+        df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
+
+        dicts = [{"foo": 9}, {"bar": 10}]
+        with tm.assert_produces_warning(None):
+            result = df.append(dicts, ignore_index=True, sort=True)
+
+        expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_empty_dataframe(self):
+
+        # Empty df append empty df
+        df1 = DataFrame()
+        df2 = DataFrame()
+        result = df1.append(df2)
+        expected = df1.copy()
+        tm.assert_frame_equal(result, expected)
+
+        # Non-empty df append empty df
+        df1 = DataFrame(np.random.randn(5, 2))
+        df2 = DataFrame()
+        result = df1.append(df2)
+        expected = df1.copy()
+        tm.assert_frame_equal(result, expected)
+
+        # Empty df with columns append empty df
+        df1 = DataFrame(columns=["bar", "foo"])
+        df2 = DataFrame()
+        result = df1.append(df2)
+        expected = df1.copy()
+        tm.assert_frame_equal(result, expected)
+
+        # Non-Empty df with columns append empty df
+        df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
+        df2 = DataFrame()
+        result = df1.append(df2)
+        expected = df1.copy()
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_dtypes(self):
+
+        # GH 5754
+        # row appends of different dtypes (so need to do by-item)
+        # can sometimes infer the correct type
+
+        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
+        df2 = DataFrame()
+        result = df1.append(df2)
+        expected = df1.copy()
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
+        df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
+        result = df1.append(df2)
+        expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
+        df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
+        result = df1.append(df2)
+        expected = DataFrame(
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
+        )
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
+        df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
+        result = df1.append(df2)
+        expected = DataFrame(
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
+        )
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"bar": np.nan}, index=range(1))
+        df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
+        result = df1.append(df2)
+        expected = DataFrame(
+            {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
+        )
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
+        df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
+        result = df1.append(df2)
+        expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]
+    )
+    def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
+        # GH 30238
+        tz = tz_naive_fixture
+        df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)])
+        result = df.append(df.iloc[0]).iloc[-1]
+        expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0)
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -62,32 +62,15 @@ def test_cov(self, float_frame, float_string_frame):
 class TestDataFrameCorr:
     # DataFrame.corr(), as opposed to DataFrame.corrwith
 
-    @staticmethod
-    def _check_method(frame, method="pearson"):
-        correls = frame.corr(method=method)
-        expected = frame["A"].corr(frame["C"], method=method)
-        tm.assert_almost_equal(correls["A"]["C"], expected)
-
-    @td.skip_if_no_scipy
-    def test_corr_pearson(self, float_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
-
-        self._check_method(float_frame, "pearson")
-
+    @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"])
     @td.skip_if_no_scipy
-    def test_corr_kendall(self, float_frame):
+    def test_corr_scipy_method(self, float_frame, method):
         float_frame["A"][:5] = np.nan
         float_frame["B"][5:10] = np.nan
 
-        self._check_method(float_frame, "kendall")
-
-    @td.skip_if_no_scipy
-    def test_corr_spearman(self, float_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
-
-        self._check_method(float_frame, "spearman")
+        correls = float_frame.corr(method=method)
+        expected = float_frame["A"].corr(float_frame["C"], method=method)
+        tm.assert_almost_equal(correls["A"]["C"], expected)
 
     # ---------------------------------------------------------------------
 

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
@@ -113,6 +113,15 @@ def test_rank2(self):
         exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]})
         tm.assert_frame_equal(df.rank(), exp)
 
+    def test_rank_does_not_mutate(self):
+        # GH#18521
+        # Check rank does not mutate DataFrame
+        df = DataFrame(np.random.randn(10, 3), dtype="float64")
+        expected = df.copy()
+        df.rank()
+        result = df
+        tm.assert_frame_equal(result, expected)
+
     def test_rank_mixed_frame(self, float_string_frame):
         float_string_frame["datetime"] = datetime.now()
         float_string_frame["timedelta"] = timedelta(days=1, seconds=1)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1248,15 +1248,6 @@ def test_matmul(self):
     # ---------------------------------------------------------------------
     # Unsorted
 
-    def test_series_nat_conversion(self):
-        # GH 18521
-        # Check rank does not mutate DataFrame
-        df = DataFrame(np.random.randn(10, 3), dtype="float64")
-        expected = df.copy()
-        df.rank()
-        result = df
-        tm.assert_frame_equal(result, expected)
-
     def test_series_broadcasting(self):
         # smoke test for numpy warnings
         # GH 16378, GH 16306