Skip to content

Commit

Permalink
[ENH] remove empty (#1213)
Browse files Browse the repository at this point in the history
* fix for remove_empty

* fix interrogate

* fix for remove_empty

* fix for remove_empty

* fix for remove_empty

* add tests for reset_index=False

* Update tests/functions/test_remove_empty.py

Co-authored-by: Jeremy Goh <[email protected]>

Co-authored-by: Jeremy Goh <[email protected]>
Co-authored-by: Eric Ma <[email protected]>
  • Loading branch information
3 people authored Nov 28, 2022
1 parent e31bac3 commit 68b8bb0
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
22 changes: 10 additions & 12 deletions janitor/functions/remove_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,16 @@


@pf.register_dataframe_method
def remove_empty(df: pd.DataFrame) -> pd.DataFrame:
def remove_empty(df: pd.DataFrame, reset_index: bool = True) -> pd.DataFrame:
"""Drop all rows and columns that are completely null.
This method also resets the index (by default) since it doesn't make sense
to preserve the index of a completely empty row.
This method mutates the original DataFrame.
This method does not mutate the original DataFrame.
Implementation is inspired from [StackOverflow][so].
[so]: https://stackoverflow.com/questions/38884538/python-pandas-find-all-rows-where-all-values-are-nan
Example:
>>> import numpy as np
Expand All @@ -37,12 +35,12 @@ def remove_empty(df: pd.DataFrame) -> pd.DataFrame:
1 2.0 4.0
:param df: The pandas DataFrame object.
:param reset_index: Determines if the index is reset.
Default is `True`.
:returns: A pandas DataFrame.
""" # noqa: E501
nanrows = df.index[df.isna().all(axis=1)]
df = df.drop(index=nanrows).reset_index(drop=True)

nancols = df.columns[df.isna().all(axis=0)]
df = df.drop(columns=nancols)

return df
outcome = df.isna()
outcome = df.loc[~outcome.all(axis=1), ~outcome.all(axis=0)]
if reset_index:
return outcome.reset_index(drop=True)
return outcome
17 changes: 15 additions & 2 deletions tests/functions/test_remove_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@given(df=df_strategy())
@settings(deadline=None)
def test_remove_empty(df):
# This test ensures that there are no columns that are completely null.
"""This test ensures that there are no columns that are completely null"""
df = df.remove_empty()
for col in df.columns:
assert not pd.isna(df[col]).all()
Expand All @@ -21,11 +21,24 @@ def test_remove_empty(df):

@pytest.mark.functions
def test_index_after_remove_empty():
# This test ensures that the indexed is reset correctly.
"""This test ensures that the index is reset correctly"""
df = pd.DataFrame()
df["a"] = [1, np.nan, np.nan, 3, np.nan, 6]
df["b"] = [1, np.nan, 1, 3, np.nan, 6]
df_nonempty = df.remove_empty()
assert np.array_equal(
np.asarray(df_nonempty.index), np.asarray(range(0, len(df_nonempty)))
)


@pytest.mark.functions
def test_reset_index_false():
"""Test output when reset_index is False"""
df = pd.DataFrame()
df["a"] = [1, np.nan, np.nan, 3, np.nan, 6]
df["b"] = [1, np.nan, 1, 3, np.nan, 6]
df_nonempty = df.remove_empty(reset_index=False)
assert np.array_equal(
df.notna().any(axis=1).to_numpy().nonzero()[0],
df_nonempty.index.to_numpy(),
)

0 comments on commit 68b8bb0

Please sign in to comment.