diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 167fe746af4..5dbf65f2070 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -204,7 +204,7 @@ class TestDatasetSize(EnvironmentVariable, type=str): """ varname = "MODIN_TEST_DATASET_SIZE" - choices = ("small", "normal", "big") + choices = ("Small", "Normal", "Big") def _check_vars(): diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py index fe2128b8782..4604f03ffbf 100644 --- a/modin/pandas/test/dataframe/test_indexing.py +++ b/modin/pandas/test/dataframe/test_indexing.py @@ -14,7 +14,7 @@ import pytest import numpy as np import pandas -import pandas.util.testing as tm +from pandas.testing import assert_index_equal import matplotlib import modin.pandas as pd import sys @@ -521,13 +521,15 @@ def test_reindex_like(): def test_rename_sanity(): - test_data = pandas.DataFrame(tm.getSeriesData()) - mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + source_df = pandas.DataFrame(test_data["int_data"])[ + ["col1", "index", "col3", "col4"] + ] + mapping = {"col1": "a", "index": "b", "col3": "c", "col4": "d"} - modin_df = pd.DataFrame(test_data) - df_equals(modin_df.rename(columns=mapping), test_data.rename(columns=mapping)) + modin_df = pd.DataFrame(source_df) + df_equals(modin_df.rename(columns=mapping), source_df.rename(columns=mapping)) - renamed2 = test_data.rename(columns=str.lower) + renamed2 = source_df.rename(columns=str.lower) df_equals(modin_df.rename(columns=str.lower), renamed2) modin_df = pd.DataFrame(renamed2) @@ -539,20 +541,20 @@ def test_rename_sanity(): # gets sorted alphabetical df = pandas.DataFrame(data) modin_df = pd.DataFrame(data) - tm.assert_index_equal( + assert_index_equal( modin_df.rename(index={"foo": "bar", "bar": "foo"}).index, df.rename(index={"foo": "bar", "bar": "foo"}).index, ) - tm.assert_index_equal( + assert_index_equal( modin_df.rename(index=str.upper).index, df.rename(index=str.upper).index ) # Using the `mapper` functionality with `axis` - tm.assert_index_equal( + assert_index_equal( modin_df.rename(str.upper, axis=0).index, df.rename(str.upper, axis=0).index ) - tm.assert_index_equal( + assert_index_equal( modin_df.rename(str.upper, axis=1).columns, df.rename(str.upper, axis=1).columns, ) @@ -562,18 +564,18 @@ def test_rename_sanity(): modin_df.rename() # partial columns - renamed = test_data.rename(columns={"C": "foo", "D": "bar"}) - modin_df = pd.DataFrame(test_data) - tm.assert_index_equal( - modin_df.rename(columns={"C": "foo", "D": "bar"}).index, - test_data.rename(columns={"C": "foo", "D": "bar"}).index, + renamed = source_df.rename(columns={"col3": "foo", "col4": "bar"}) + modin_df = pd.DataFrame(source_df) + assert_index_equal( + modin_df.rename(columns={"col3": "foo", "col4": "bar"}).index, + source_df.rename(columns={"col3": "foo", "col4": "bar"}).index, ) # other axis - renamed = test_data.T.rename(index={"C": "foo", "D": "bar"}) - tm.assert_index_equal( - test_data.T.rename(index={"C": "foo", "D": "bar"}).index, - modin_df.T.rename(index={"C": "foo", "D": "bar"}).index, + renamed = source_df.T.rename(index={"col3": "foo", "col4": "bar"}) + assert_index_equal( + source_df.T.rename(index={"col3": "foo", "col4": "bar"}).index, + modin_df.T.rename(index={"col3": "foo", "col4": "bar"}).index, ) # index with name @@ -583,7 +585,7 @@ def test_rename_sanity(): renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) modin_renamed = modin_df.rename(index={"foo": "bar", "bar": "foo"}) - tm.assert_index_equal(renamed.index, modin_renamed.index) + assert_index_equal(renamed.index, modin_renamed.index) assert renamed.index.name == modin_renamed.index.name @@ -608,13 +610,13 @@ def test_rename_multiindex(): index={"foo1": "foo3", "bar2": "bar3"}, columns={"fizz1": "fizz3", "buzz2": "buzz3"}, ) - tm.assert_index_equal(renamed.index, modin_renamed.index) + assert_index_equal(renamed.index, modin_renamed.index) renamed = df.rename( index={"foo1": "foo3", "bar2": "bar3"}, columns={"fizz1": "fizz3", "buzz2": "buzz3"}, ) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) assert renamed.index.names == modin_renamed.index.names assert renamed.columns.names == modin_renamed.columns.names @@ -626,68 +628,72 @@ def test_rename_multiindex(): modin_renamed = modin_df.rename( columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0 ) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") modin_renamed = modin_df.rename( columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz" ) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) modin_renamed = modin_df.rename( columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1 ) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") modin_renamed = modin_df.rename( columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz" ) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) # function func = str.upper renamed = df.rename(columns=func, level=0) modin_renamed = modin_df.rename(columns=func, level=0) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns=func, level="fizz") modin_renamed = modin_df.rename(columns=func, level="fizz") - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns=func, level=1) modin_renamed = modin_df.rename(columns=func, level=1) - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) renamed = df.rename(columns=func, level="buzz") modin_renamed = modin_df.rename(columns=func, level="buzz") - tm.assert_index_equal(renamed.columns, modin_renamed.columns) + assert_index_equal(renamed.columns, modin_renamed.columns) # index renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) modin_renamed = modin_df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) - tm.assert_index_equal(modin_renamed.index, renamed.index) + assert_index_equal(modin_renamed.index, renamed.index) @pytest.mark.skip(reason="Pandas does not pass this test") def test_rename_nocopy(): - test_data = pandas.DataFrame(tm.getSeriesData()) - modin_df = pd.DataFrame(test_data) - modin_renamed = modin_df.rename(columns={"C": "foo"}, copy=False) + source_df = pandas.DataFrame(test_data["int_data"])[ + ["col1", "index", "col3", "col4"] + ] + modin_df = pd.DataFrame(source_df) + modin_renamed = modin_df.rename(columns={"col3": "foo"}, copy=False) modin_renamed["foo"] = 1 - assert (modin_df["C"] == 1).all() + assert (modin_df["col3"] == 1).all() def test_rename_inplace(): - test_data = pandas.DataFrame(tm.getSeriesData()) - modin_df = pd.DataFrame(test_data) + source_df = pandas.DataFrame(test_data["int_data"])[ + ["col1", "index", "col3", "col4"] + ] + modin_df = pd.DataFrame(source_df) df_equals( - modin_df.rename(columns={"C": "foo"}), - test_data.rename(columns={"C": "foo"}), + modin_df.rename(columns={"col3": "foo"}), + source_df.rename(columns={"col3": "foo"}), ) - frame = test_data.copy() + frame = source_df.copy() modin_frame = modin_df.copy() - frame.rename(columns={"C": "foo"}, inplace=True) - modin_frame.rename(columns={"C": "foo"}, inplace=True) + frame.rename(columns={"col3": "foo"}, inplace=True) + modin_frame.rename(columns={"col3": "foo"}, inplace=True) df_equals(modin_frame, frame) @@ -752,7 +758,7 @@ def test_rename_axis(): def test_rename_axis_inplace(): - test_frame = pandas.DataFrame(tm.getSeriesData()) + test_frame = pandas.DataFrame(test_data["int_data"]) modin_df = pd.DataFrame(test_frame) result = test_frame.copy() diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py index 5960612d37c..d0b7d1655d3 100644 --- a/modin/pandas/test/dataframe/test_map_metadata.py +++ b/modin/pandas/test/dataframe/test_map_metadata.py @@ -14,7 +14,7 @@ import pytest import numpy as np import pandas -import pandas.util.testing as tm +from pandas.testing import assert_index_equal import matplotlib import modin.pandas as pd from modin.utils import get_current_backend @@ -27,6 +27,7 @@ df_is_empty, arg_keys, name_contains, + test_data, test_data_values, test_data_keys, test_data_with_duplicates_values, @@ -120,13 +121,13 @@ def test_indexing(): def test_empty_df(): df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) - tm.assert_index_equal(df.index, pd.Index(["a", "b"])) + assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 - tm.assert_index_equal(df.columns, pd.Index(["a", "b"])) + assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) @@ -135,13 +136,13 @@ def test_empty_df(): df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) - tm.assert_index_equal(df.index, pd.Index(["a", "b"])) + assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 - tm.assert_index_equal(df.columns, pd.Index(["a", "b"])) + assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) @@ -439,7 +440,7 @@ def test_append(data): def test_astype(): - td = pandas.DataFrame(tm.getSeriesData()) + td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]] modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns) expected_df = pandas.DataFrame(td.values, index=td.index, columns=td.columns) @@ -459,13 +460,13 @@ def test_astype(): expected_df_casted = expected_df.astype("category") df_equals(modin_df_casted, expected_df_casted) - dtype_dict = {"A": np.int32, "B": np.int64, "C": str} + dtype_dict = {"col1": np.int32, "index": np.int64, "col3": str} modin_df_casted = modin_df.astype(dtype_dict) expected_df_casted = expected_df.astype(dtype_dict) df_equals(modin_df_casted, expected_df_casted) # Ignore lint because this is testing bad input - bad_dtype_dict = {"B": np.int32, "B": np.int64, "B": str} # noqa F601 + bad_dtype_dict = {"index": np.int32, "index": np.int64, "index": str} # noqa F601 modin_df_casted = modin_df.astype(bad_dtype_dict) expected_df_casted = expected_df.astype(bad_dtype_dict) df_equals(modin_df_casted, expected_df_casted) diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 93d96737d56..4704542ad34 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -15,10 +15,11 @@ import numpy as np import math import pandas -from pandas.util.testing import ( - assert_almost_equal, +from pandas.testing import ( + assert_series_equal, assert_frame_equal, - assert_categorical_equal, + assert_index_equal, + assert_extension_array_equal, ) import modin.pandas as pd from modin.utils import to_pandas @@ -422,8 +423,7 @@ def categories_equals(left, right): assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered) - is_category_ordered = left.ordered - assert_categorical_equal(left, right, check_category_order=is_category_ordered) + assert_extension_array_equal(left, right) def df_categories_equals(df1, df2): @@ -439,12 +439,10 @@ def df_categories_equals(df1, df2): categories_columns = df1.select_dtypes(include="category").columns for column in categories_columns: - is_category_ordered = df1[column].dtype.ordered - assert_categorical_equal( + assert_extension_array_equal( df1[column].values, df2[column].values, check_dtype=False, - check_category_order=is_category_ordered, ) @@ -458,12 +456,6 @@ def df_equals(df1, df2): Returns: True if df1 is equal to df2. """ - types_for_almost_equals = ( - pandas.core.indexes.range.RangeIndex, - pandas.core.indexes.base.Index, - np.recarray, - ) - # Gets AttributError if modin's groupby object is not import like this from modin.pandas.groupby import DataFrameGroupBy @@ -522,12 +514,10 @@ def df_equals(df1, df2): check_categorical=False, ) df_categories_equals(df1, df2) - elif isinstance(df1, types_for_almost_equals) and isinstance( - df2, types_for_almost_equals - ): - assert_almost_equal(df1, df2, check_dtype=False) + elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index): + assert_index_equal(df1, df2) elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series): - assert_almost_equal(df1, df2, check_dtype=False, check_series_type=False) + assert_series_equal(df1, df2, check_dtype=False, check_series_type=False) elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types): for g1, g2 in zip(df1, df2): assert g1[0] == g2[0] @@ -543,6 +533,8 @@ def df_equals(df1, df2): elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray): assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray) assert df1 == df2 + elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray): + np.testing.assert_array_equal(df1, df2) else: if df1 != df2: np.testing.assert_almost_equal(df1, df2)