Skip to content

Commit

Permalink
TEST-#2260: use recommended pandas testing api (#2273)
Browse files Browse the repository at this point in the history
* TEST-#2260: use recommended pandas testing api

Signed-off-by: Anatoly Myachev <[email protected]>

* TEST-#2260: replace getSeriesData with test_data

Signed-off-by: Anatoly Myachev <[email protected]>

* TEST-#2260: remove assert_categories_equal

Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored Oct 22, 2020
1 parent 544de0d commit b514d6f
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 71 deletions.
2 changes: 1 addition & 1 deletion modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ class TestDatasetSize(EnvironmentVariable, type=str):
"""

varname = "MODIN_TEST_DATASET_SIZE"
choices = ("small", "normal", "big")
choices = ("Small", "Normal", "Big")


def _check_vars():
Expand Down
92 changes: 49 additions & 43 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pytest
import numpy as np
import pandas
import pandas.util.testing as tm
from pandas.testing import assert_index_equal
import matplotlib
import modin.pandas as pd
import sys
Expand Down Expand Up @@ -521,13 +521,15 @@ def test_reindex_like():


def test_rename_sanity():
test_data = pandas.DataFrame(tm.getSeriesData())
mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
source_df = pandas.DataFrame(test_data["int_data"])[
["col1", "index", "col3", "col4"]
]
mapping = {"col1": "a", "index": "b", "col3": "c", "col4": "d"}

modin_df = pd.DataFrame(test_data)
df_equals(modin_df.rename(columns=mapping), test_data.rename(columns=mapping))
modin_df = pd.DataFrame(source_df)
df_equals(modin_df.rename(columns=mapping), source_df.rename(columns=mapping))

renamed2 = test_data.rename(columns=str.lower)
renamed2 = source_df.rename(columns=str.lower)
df_equals(modin_df.rename(columns=str.lower), renamed2)

modin_df = pd.DataFrame(renamed2)
Expand All @@ -539,20 +541,20 @@ def test_rename_sanity():
# gets sorted alphabetical
df = pandas.DataFrame(data)
modin_df = pd.DataFrame(data)
tm.assert_index_equal(
assert_index_equal(
modin_df.rename(index={"foo": "bar", "bar": "foo"}).index,
df.rename(index={"foo": "bar", "bar": "foo"}).index,
)

tm.assert_index_equal(
assert_index_equal(
modin_df.rename(index=str.upper).index, df.rename(index=str.upper).index
)

# Using the `mapper` functionality with `axis`
tm.assert_index_equal(
assert_index_equal(
modin_df.rename(str.upper, axis=0).index, df.rename(str.upper, axis=0).index
)
tm.assert_index_equal(
assert_index_equal(
modin_df.rename(str.upper, axis=1).columns,
df.rename(str.upper, axis=1).columns,
)
Expand All @@ -562,18 +564,18 @@ def test_rename_sanity():
modin_df.rename()

# partial columns
renamed = test_data.rename(columns={"C": "foo", "D": "bar"})
modin_df = pd.DataFrame(test_data)
tm.assert_index_equal(
modin_df.rename(columns={"C": "foo", "D": "bar"}).index,
test_data.rename(columns={"C": "foo", "D": "bar"}).index,
renamed = source_df.rename(columns={"col3": "foo", "col4": "bar"})
modin_df = pd.DataFrame(source_df)
assert_index_equal(
modin_df.rename(columns={"col3": "foo", "col4": "bar"}).index,
source_df.rename(columns={"col3": "foo", "col4": "bar"}).index,
)

# other axis
renamed = test_data.T.rename(index={"C": "foo", "D": "bar"})
tm.assert_index_equal(
test_data.T.rename(index={"C": "foo", "D": "bar"}).index,
modin_df.T.rename(index={"C": "foo", "D": "bar"}).index,
renamed = source_df.T.rename(index={"col3": "foo", "col4": "bar"})
assert_index_equal(
source_df.T.rename(index={"col3": "foo", "col4": "bar"}).index,
modin_df.T.rename(index={"col3": "foo", "col4": "bar"}).index,
)

# index with name
Expand All @@ -583,7 +585,7 @@ def test_rename_sanity():

renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
modin_renamed = modin_df.rename(index={"foo": "bar", "bar": "foo"})
tm.assert_index_equal(renamed.index, modin_renamed.index)
assert_index_equal(renamed.index, modin_renamed.index)

assert renamed.index.name == modin_renamed.index.name

Expand All @@ -608,13 +610,13 @@ def test_rename_multiindex():
index={"foo1": "foo3", "bar2": "bar3"},
columns={"fizz1": "fizz3", "buzz2": "buzz3"},
)
tm.assert_index_equal(renamed.index, modin_renamed.index)
assert_index_equal(renamed.index, modin_renamed.index)

renamed = df.rename(
index={"foo1": "foo3", "bar2": "bar3"},
columns={"fizz1": "fizz3", "buzz2": "buzz3"},
)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)
assert renamed.index.names == modin_renamed.index.names
assert renamed.columns.names == modin_renamed.columns.names

Expand All @@ -626,68 +628,72 @@ def test_rename_multiindex():
modin_renamed = modin_df.rename(
columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0
)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)
renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
modin_renamed = modin_df.rename(
columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz"
)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)

renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
modin_renamed = modin_df.rename(
columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1
)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)
renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
modin_renamed = modin_df.rename(
columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz"
)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)

# function
func = str.upper
renamed = df.rename(columns=func, level=0)
modin_renamed = modin_df.rename(columns=func, level=0)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)
renamed = df.rename(columns=func, level="fizz")
modin_renamed = modin_df.rename(columns=func, level="fizz")
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)

renamed = df.rename(columns=func, level=1)
modin_renamed = modin_df.rename(columns=func, level=1)
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)
renamed = df.rename(columns=func, level="buzz")
modin_renamed = modin_df.rename(columns=func, level="buzz")
tm.assert_index_equal(renamed.columns, modin_renamed.columns)
assert_index_equal(renamed.columns, modin_renamed.columns)

# index
renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
modin_renamed = modin_df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
tm.assert_index_equal(modin_renamed.index, renamed.index)
assert_index_equal(modin_renamed.index, renamed.index)


@pytest.mark.skip(reason="Pandas does not pass this test")
def test_rename_nocopy():
test_data = pandas.DataFrame(tm.getSeriesData())
modin_df = pd.DataFrame(test_data)
modin_renamed = modin_df.rename(columns={"C": "foo"}, copy=False)
source_df = pandas.DataFrame(test_data["int_data"])[
["col1", "index", "col3", "col4"]
]
modin_df = pd.DataFrame(source_df)
modin_renamed = modin_df.rename(columns={"col3": "foo"}, copy=False)
modin_renamed["foo"] = 1
assert (modin_df["C"] == 1).all()
assert (modin_df["col3"] == 1).all()


def test_rename_inplace():
test_data = pandas.DataFrame(tm.getSeriesData())
modin_df = pd.DataFrame(test_data)
source_df = pandas.DataFrame(test_data["int_data"])[
["col1", "index", "col3", "col4"]
]
modin_df = pd.DataFrame(source_df)

df_equals(
modin_df.rename(columns={"C": "foo"}),
test_data.rename(columns={"C": "foo"}),
modin_df.rename(columns={"col3": "foo"}),
source_df.rename(columns={"col3": "foo"}),
)

frame = test_data.copy()
frame = source_df.copy()
modin_frame = modin_df.copy()
frame.rename(columns={"C": "foo"}, inplace=True)
modin_frame.rename(columns={"C": "foo"}, inplace=True)
frame.rename(columns={"col3": "foo"}, inplace=True)
modin_frame.rename(columns={"col3": "foo"}, inplace=True)

df_equals(modin_frame, frame)

Expand Down Expand Up @@ -752,7 +758,7 @@ def test_rename_axis():


def test_rename_axis_inplace():
test_frame = pandas.DataFrame(tm.getSeriesData())
test_frame = pandas.DataFrame(test_data["int_data"])
modin_df = pd.DataFrame(test_frame)

result = test_frame.copy()
Expand Down
17 changes: 9 additions & 8 deletions modin/pandas/test/dataframe/test_map_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pytest
import numpy as np
import pandas
import pandas.util.testing as tm
from pandas.testing import assert_index_equal
import matplotlib
import modin.pandas as pd
from modin.utils import get_current_backend
Expand All @@ -27,6 +27,7 @@
df_is_empty,
arg_keys,
name_contains,
test_data,
test_data_values,
test_data_keys,
test_data_with_duplicates_values,
Expand Down Expand Up @@ -120,13 +121,13 @@ def test_indexing():
def test_empty_df():
df = pd.DataFrame(index=["a", "b"])
df_is_empty(df)
tm.assert_index_equal(df.index, pd.Index(["a", "b"]))
assert_index_equal(df.index, pd.Index(["a", "b"]))
assert len(df.columns) == 0

df = pd.DataFrame(columns=["a", "b"])
df_is_empty(df)
assert len(df.index) == 0
tm.assert_index_equal(df.columns, pd.Index(["a", "b"]))
assert_index_equal(df.columns, pd.Index(["a", "b"]))

df = pd.DataFrame()
df_is_empty(df)
Expand All @@ -135,13 +136,13 @@ def test_empty_df():

df = pd.DataFrame(index=["a", "b"])
df_is_empty(df)
tm.assert_index_equal(df.index, pd.Index(["a", "b"]))
assert_index_equal(df.index, pd.Index(["a", "b"]))
assert len(df.columns) == 0

df = pd.DataFrame(columns=["a", "b"])
df_is_empty(df)
assert len(df.index) == 0
tm.assert_index_equal(df.columns, pd.Index(["a", "b"]))
assert_index_equal(df.columns, pd.Index(["a", "b"]))

df = pd.DataFrame()
df_is_empty(df)
Expand Down Expand Up @@ -439,7 +440,7 @@ def test_append(data):


def test_astype():
td = pandas.DataFrame(tm.getSeriesData())
td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]]
modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)
expected_df = pandas.DataFrame(td.values, index=td.index, columns=td.columns)

Expand All @@ -459,13 +460,13 @@ def test_astype():
expected_df_casted = expected_df.astype("category")
df_equals(modin_df_casted, expected_df_casted)

dtype_dict = {"A": np.int32, "B": np.int64, "C": str}
dtype_dict = {"col1": np.int32, "index": np.int64, "col3": str}
modin_df_casted = modin_df.astype(dtype_dict)
expected_df_casted = expected_df.astype(dtype_dict)
df_equals(modin_df_casted, expected_df_casted)

# Ignore lint because this is testing bad input
bad_dtype_dict = {"B": np.int32, "B": np.int64, "B": str} # noqa F601
bad_dtype_dict = {"index": np.int32, "index": np.int64, "index": str} # noqa F601
modin_df_casted = modin_df.astype(bad_dtype_dict)
expected_df_casted = expected_df.astype(bad_dtype_dict)
df_equals(modin_df_casted, expected_df_casted)
Expand Down
30 changes: 11 additions & 19 deletions modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
import numpy as np
import math
import pandas
from pandas.util.testing import (
assert_almost_equal,
from pandas.testing import (
assert_series_equal,
assert_frame_equal,
assert_categorical_equal,
assert_index_equal,
assert_extension_array_equal,
)
import modin.pandas as pd
from modin.utils import to_pandas
Expand Down Expand Up @@ -422,8 +423,7 @@

def categories_equals(left, right):
assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered)
is_category_ordered = left.ordered
assert_categorical_equal(left, right, check_category_order=is_category_ordered)
assert_extension_array_equal(left, right)


def df_categories_equals(df1, df2):
Expand All @@ -439,12 +439,10 @@ def df_categories_equals(df1, df2):

categories_columns = df1.select_dtypes(include="category").columns
for column in categories_columns:
is_category_ordered = df1[column].dtype.ordered
assert_categorical_equal(
assert_extension_array_equal(
df1[column].values,
df2[column].values,
check_dtype=False,
check_category_order=is_category_ordered,
)


Expand All @@ -458,12 +456,6 @@ def df_equals(df1, df2):
Returns:
True if df1 is equal to df2.
"""
types_for_almost_equals = (
pandas.core.indexes.range.RangeIndex,
pandas.core.indexes.base.Index,
np.recarray,
)

# Gets AttributError if modin's groupby object is not import like this
from modin.pandas.groupby import DataFrameGroupBy

Expand Down Expand Up @@ -522,12 +514,10 @@ def df_equals(df1, df2):
check_categorical=False,
)
df_categories_equals(df1, df2)
elif isinstance(df1, types_for_almost_equals) and isinstance(
df2, types_for_almost_equals
):
assert_almost_equal(df1, df2, check_dtype=False)
elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index):
assert_index_equal(df1, df2)
elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series):
assert_almost_equal(df1, df2, check_dtype=False, check_series_type=False)
assert_series_equal(df1, df2, check_dtype=False, check_series_type=False)
elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types):
for g1, g2 in zip(df1, df2):
assert g1[0] == g2[0]
Expand All @@ -543,6 +533,8 @@ def df_equals(df1, df2):
elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray):
assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray)
assert df1 == df2
elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
np.testing.assert_array_equal(df1, df2)
else:
if df1 != df2:
np.testing.assert_almost_equal(df1, df2)
Expand Down

0 comments on commit b514d6f

Please sign in to comment.