From b6acdc596294b0a56bfd5cb006d0b3b44144017c Mon Sep 17 00:00:00 2001 From: Alexander Myskov Date: Wed, 25 Nov 2020 07:30:17 -0600 Subject: [PATCH] TEST-#2294: add iteration parameters tests Signed-off-by: Alexander Myskov --- modin/pandas/test/test_io.py | 63 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index 8f67858f311..8d2625c17e3 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -759,9 +759,9 @@ def test_read_csv_nans_handling( skip_blank_lines, ): eval_io( - filepath_or_buffer=pytest.csvs_names["test_read_csv_nans"], fn_name="read_csv", # read_csv kwargs + filepath_or_buffer=pytest.csvs_names["test_read_csv_nans"], na_values=na_values, keep_default_na=keep_default_na, na_filter=na_filter, @@ -829,6 +829,37 @@ def test_read_csv_datetime( cache_dates=cache_dates, ) + # Iteration tests + @pytest.mark.parametrize("iterator", [True, False]) + def test_read_csv_iteration(self, make_csv_file, iterator): + filename = pytest.csvs_names["test_read_csv_regular"] + + # Tests __next__ and correctness of reader as an iterator + # Use larger chunksize to read through file quicker + rdf_reader = pd.read_csv(filename, chunksize=500, iterator=iterator) + pd_reader = pandas.read_csv(filename, chunksize=500, iterator=iterator) + + for modin_df, pd_df in zip(rdf_reader, pd_reader): + df_equals(modin_df, pd_df) + + # Tests that get_chunk works correctly + rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator) + pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator) + + modin_df = rdf_reader.get_chunk(1) + pd_df = pd_reader.get_chunk(1) + + df_equals(modin_df, pd_df) + + # Tests that read works correctly + rdf_reader = pd.read_csv(filename, chunksize=1, iterator=iterator) + pd_reader = pandas.read_csv(filename, chunksize=1, iterator=iterator) + + modin_df = rdf_reader.read() + pd_df = pd_reader.read() + + df_equals(modin_df, pd_df) + def test_from_parquet(make_parquet_file): make_parquet_file(NROWS) @@ -1459,36 +1490,6 @@ def test_from_csv_default(make_csv_file): df_equals(modin_df, pandas_df) -def test_from_csv_chunksize(make_csv_file): - make_csv_file() - - # Tests __next__ and correctness of reader as an iterator - # Use larger chunksize to read through file quicker - rdf_reader = pd.read_csv(TEST_CSV_FILENAME, chunksize=500) - pd_reader = pandas.read_csv(TEST_CSV_FILENAME, chunksize=500) - - for modin_df, pd_df in zip(rdf_reader, pd_reader): - df_equals(modin_df, pd_df) - - # Tests that get_chunk works correctly - rdf_reader = pd.read_csv(TEST_CSV_FILENAME, chunksize=1) - pd_reader = pandas.read_csv(TEST_CSV_FILENAME, chunksize=1) - - modin_df = rdf_reader.get_chunk(1) - pd_df = pd_reader.get_chunk(1) - - df_equals(modin_df, pd_df) - - # Tests that read works correctly - rdf_reader = pd.read_csv(TEST_CSV_FILENAME, chunksize=1) - pd_reader = pandas.read_csv(TEST_CSV_FILENAME, chunksize=1) - - modin_df = rdf_reader.read() - pd_df = pd_reader.read() - - df_equals(modin_df, pd_df) - - @pytest.mark.parametrize("names", [list("XYZ"), None]) @pytest.mark.parametrize("skiprows", [1, 2, 3, 4, None]) def test_from_csv_skiprows_names(names, skiprows):