diff --git a/modin/pandas/io.py b/modin/pandas/io.py index b2349769be9..8e2c1bdd85d 100644 --- a/modin/pandas/io.py +++ b/modin/pandas/io.py @@ -103,9 +103,15 @@ def parser_func( memory_map=False, float_precision=None, ): - _, _, _, kwargs = inspect.getargvalues(inspect.currentframe()) - if kwargs.get("sep", sep) is False: - kwargs["sep"] = "\t" + # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args + _pd_read_csv_signature = { + val.name for val in inspect.signature(pandas.read_csv).parameters.values() + } + _, _, _, f_locals = inspect.getargvalues(inspect.currentframe()) + if f_locals.get("sep", sep) is False: + f_locals["sep"] = "\t" + + kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature} return _read(**kwargs) return parser_func diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index a5709c230f8..93429806fbc 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -43,6 +43,7 @@ IO_OPS_DATA_DIR, io_ops_bad_exc, eval_io_from_str, + dummy_decorator, ) from modin.config import Engine, Backend, IsExperimental @@ -1460,6 +1461,28 @@ def test_from_sas(): df_equals(modin_df, pandas_df) +def test_from_csv_within_decorator(make_csv_file): + make_csv_file() + + @dummy_decorator() + def wrapped_read_csv(file, method): + if method == "pandas": + return pandas.read_csv(file) + + if method == "modin": + return pd.read_csv(file) + + pandas_df = wrapped_read_csv(TEST_CSV_FILENAME, method="pandas") + modin_df = wrapped_read_csv(TEST_CSV_FILENAME, method="modin") + + df_equals(modin_df, pandas_df) + + pandas_df = wrapped_read_csv(Path(TEST_CSV_FILENAME), method="pandas") + modin_df = wrapped_read_csv(Path(TEST_CSV_FILENAME), method="modin") + + df_equals(modin_df, pandas_df) + + @pytest.mark.parametrize("nrows", [35, None]) def test_from_csv_sep_none(make_csv_file, nrows): make_csv_file() @@ -1642,6 +1665,28 @@ def test_from_table(make_csv_file): df_equals(modin_df, pandas_df) +def test_from_table_within_decorator(make_csv_file): + make_csv_file(delimiter="\t") + + @dummy_decorator() + def wrapped_read_table(file, method): + if method == "pandas": + return pandas.read_table(file) + + if method == "modin": + return pd.read_table(file) + + pandas_df = wrapped_read_table(TEST_CSV_FILENAME, method="pandas") + modin_df = wrapped_read_table(TEST_CSV_FILENAME, method="modin") + + df_equals(modin_df, pandas_df) + + pandas_df = wrapped_read_table(Path(TEST_CSV_FILENAME), method="pandas") + modin_df = wrapped_read_table(Path(TEST_CSV_FILENAME), method="modin") + + df_equals(modin_df, pandas_df) + + @pytest.mark.skipif(Engine.get() == "Python", reason="Using pandas implementation") def test_from_csv_s3(make_csv_file): dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv" diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 22a47401bf4..6a007ab6d99 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -1045,3 +1045,18 @@ def check(*a, **kw): ), f"Unexpected open handles left for: {', '.join(item[0] for item in leaks)}" return check + + +def dummy_decorator(): + """A problematic decorator that does not use `functools.wraps`. This introduces unwanted local variables for + inspect.currentframe. This decorator is used in test_io to test `read_csv` and `read_table` + """ + + def wrapper(method): + def wrapped_function(self, *args, **kwargs): + result = method(self, *args, **kwargs) + return result + + return wrapped_function + + return wrapper