Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF/TST: method-specific files for test_append #30503

Merged
merged 4 commits into from
Dec 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions pandas/tests/frame/methods/test_append.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Series, Timestamp
import pandas.util.testing as tm


class TestDataFrameAppend:
def test_append_empty_list(self):
# GH 28769
df = DataFrame()
result = df.append([])
expected = df
tm.assert_frame_equal(result, expected)
assert result is not df

df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
result = df.append([])
expected = df
tm.assert_frame_equal(result, expected)
assert result is not df # .append() should return a new object

def test_append_series_dict(self):
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])

series = df.loc[4]
msg = "Indexes have overlapping values"
with pytest.raises(ValueError, match=msg):
df.append(series, verify_integrity=True)

series.name = None
msg = "Can only append a Series if ignore_index=True"
with pytest.raises(TypeError, match=msg):
df.append(series, verify_integrity=True)

result = df.append(series[::-1], ignore_index=True)
expected = df.append(
DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True
)
tm.assert_frame_equal(result, expected)

# dict
result = df.append(series.to_dict(), ignore_index=True)
tm.assert_frame_equal(result, expected)

result = df.append(series[::-1][:3], ignore_index=True)
expected = df.append(
DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True
)
tm.assert_frame_equal(result, expected.loc[:, result.columns])

# can append when name set
row = df.loc[4]
row.name = 5
result = df.append(row)
expected = df.append(df[-1:], ignore_index=True)
tm.assert_frame_equal(result, expected)

def test_append_list_of_series_dicts(self):
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])

dicts = [x.to_dict() for idx, x in df.iterrows()]

result = df.append(dicts, ignore_index=True)
expected = df.append(df, ignore_index=True)
tm.assert_frame_equal(result, expected)

# different columns
dicts = [
{"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4},
{"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8},
]
result = df.append(dicts, ignore_index=True, sort=True)
expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
tm.assert_frame_equal(result, expected)

def test_append_missing_cols(self):
# GH22252
# exercise the conditional branch in append method where the data
# to be appended is a list and does not contain all columns that are in
# the target DataFrame
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])

dicts = [{"foo": 9}, {"bar": 10}]
with tm.assert_produces_warning(None):
result = df.append(dicts, ignore_index=True, sort=True)

expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
tm.assert_frame_equal(result, expected)

def test_append_empty_dataframe(self):

# Empty df append empty df
df1 = DataFrame()
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)

# Non-empty df append empty df
df1 = DataFrame(np.random.randn(5, 2))
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)

# Empty df with columns append empty df
df1 = DataFrame(columns=["bar", "foo"])
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)

# Non-Empty df with columns append empty df
df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)

def test_append_dtypes(self):

# GH 5754
# row appends of different dtypes (so need to do by-item)
# can sometimes infer the correct type

df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
tm.assert_frame_equal(result, expected)

df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
result = df1.append(df2)
expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
tm.assert_frame_equal(result, expected)

df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
result = df1.append(df2)
expected = DataFrame(
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
)
tm.assert_frame_equal(result, expected)

df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
result = df1.append(df2)
expected = DataFrame(
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
)
tm.assert_frame_equal(result, expected)

df1 = DataFrame({"bar": np.nan}, index=range(1))
df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
result = df1.append(df2)
expected = DataFrame(
{"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
)
tm.assert_frame_equal(result, expected)

df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
result = df1.append(df2)
expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]
)
def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
# GH 30238
tz = tz_naive_fixture
df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)])
result = df.append(df.iloc[0]).iloc[-1]
expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0)
tm.assert_series_equal(result, expected)
27 changes: 5 additions & 22 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,32 +62,15 @@ def test_cov(self, float_frame, float_string_frame):
class TestDataFrameCorr:
# DataFrame.corr(), as opposed to DataFrame.corrwith

@staticmethod
def _check_method(frame, method="pearson"):
correls = frame.corr(method=method)
expected = frame["A"].corr(frame["C"], method=method)
tm.assert_almost_equal(correls["A"]["C"], expected)

@td.skip_if_no_scipy
def test_corr_pearson(self, float_frame):
float_frame["A"][:5] = np.nan
float_frame["B"][5:10] = np.nan

self._check_method(float_frame, "pearson")

@pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"])
@td.skip_if_no_scipy
def test_corr_kendall(self, float_frame):
def test_corr_scipy_method(self, float_frame, method):
float_frame["A"][:5] = np.nan
float_frame["B"][5:10] = np.nan

self._check_method(float_frame, "kendall")

@td.skip_if_no_scipy
def test_corr_spearman(self, float_frame):
float_frame["A"][:5] = np.nan
float_frame["B"][5:10] = np.nan

self._check_method(float_frame, "spearman")
correls = float_frame.corr(method=method)
expected = float_frame["A"].corr(float_frame["C"], method=method)
tm.assert_almost_equal(correls["A"]["C"], expected)

# ---------------------------------------------------------------------

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/methods/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,15 @@ def test_rank2(self):
exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]})
tm.assert_frame_equal(df.rank(), exp)

def test_rank_does_not_mutate(self):
# GH#18521
# Check rank does not mutate DataFrame
df = DataFrame(np.random.randn(10, 3), dtype="float64")
expected = df.copy()
df.rank()
result = df
tm.assert_frame_equal(result, expected)

def test_rank_mixed_frame(self, float_string_frame):
float_string_frame["datetime"] = datetime.now()
float_string_frame["timedelta"] = timedelta(days=1, seconds=1)
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,15 +1248,6 @@ def test_matmul(self):
# ---------------------------------------------------------------------
# Unsorted

def test_series_nat_conversion(self):
# GH 18521
# Check rank does not mutate DataFrame
df = DataFrame(np.random.randn(10, 3), dtype="float64")
expected = df.copy()
df.rank()
result = df
tm.assert_frame_equal(result, expected)

def test_series_broadcasting(self):
# smoke test for numpy warnings
# GH 16378, GH 16306
Expand Down
Loading