From df828190a434b5e6f7870b2874bef126d7881a7b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Oct 2023 02:26:56 -0700 Subject: [PATCH] TST: change pyarrow skips to xfails (#55637) Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .../tests/io/parser/common/test_chunksize.py | 23 +++++++++++++++++-- .../io/parser/common/test_file_buffer_url.py | 22 ++++++++++++++---- pandas/tests/io/parser/common/test_float.py | 7 +++++- .../tests/io/parser/common/test_iterator.py | 9 +++++++- pandas/tests/io/parser/common/test_verbose.py | 4 +++- pandas/tests/io/parser/dtypes/test_empty.py | 11 +++++++-- pandas/tests/io/parser/test_comment.py | 10 +++++++- pandas/tests/io/parser/test_converters.py | 10 +++++++- pandas/tests/io/parser/test_dialect.py | 9 +++++++- pandas/tests/io/parser/test_multi_thread.py | 5 +++- pandas/tests/io/parser/test_quoting.py | 22 +++++++++++++++--- pandas/tests/io/parser/test_skiprows.py | 19 +++++++++++++-- .../io/parser/usecols/test_parse_dates.py | 17 ++++++++++++-- 13 files changed, 146 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index d407f98029e8d..0f42aa81e4b37 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -16,9 +16,13 @@ ) import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +@xfail_pyarrow # The 'chunksize' option is not supported @pytest.mark.parametrize("index_col", [0, "index"]) def test_read_chunksize_with_index(all_parsers, index_col): parser = all_parsers @@ -51,6 +55,7 @@ def test_read_chunksize_with_index(all_parsers, index_col): tm.assert_frame_equal(chunks[2], expected[4:]) +@xfail_pyarrow # AssertionError: Regex pattern did not match @pytest.mark.parametrize("chunksize", [1.3, "foo", 0]) def test_read_chunksize_bad(all_parsers, chunksize): data = """index,A,B,C,D @@ -69,6 +74,7 @@ def test_read_chunksize_bad(all_parsers, chunksize): pass +@xfail_pyarrow # The 'nrows' option is not supported @pytest.mark.parametrize("chunksize", [2, 8]) def test_read_chunksize_and_nrows(all_parsers, chunksize): # see gh-15755 @@ -88,6 +94,7 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize): tm.assert_frame_equal(concat(reader), expected) +@xfail_pyarrow # The 'chunksize' option is not supported def test_read_chunksize_and_nrows_changing_size(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 @@ -109,6 +116,7 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers): reader.get_chunk(size=3) +@xfail_pyarrow # The 'chunksize' option is not supported def test_get_chunk_passed_chunksize(all_parsers): parser = all_parsers data = """A,B,C @@ -124,6 +132,7 @@ def test_get_chunk_passed_chunksize(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # The 'chunksize' option is not supported @pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}]) def test_read_chunksize_compat(all_parsers, kwargs): # see gh-12185 @@ -141,6 +150,7 @@ def test_read_chunksize_compat(all_parsers, kwargs): tm.assert_frame_equal(concat(reader), result) +@xfail_pyarrow # The 'chunksize' option is not supported def test_read_chunksize_jagged_names(all_parsers): # see gh-23509 parser = all_parsers @@ -171,7 +181,11 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch): data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers) # Coercions should work without warnings. - with tm.assert_produces_warning(None): + warn = None + if parser.engine == "pyarrow": + warn = DeprecationWarning + depr_msg = "Passing a BlockManager to DataFrame" + with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False): with monkeypatch.context() as m: m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic) result = parser.read_csv(StringIO(data)) @@ -180,6 +194,7 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch): assert result.a.dtype == float +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_warn_if_chunks_have_mismatched_type(all_parsers): warning_type = None parser = all_parsers @@ -207,6 +222,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers): assert df.a.dtype == object +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported @pytest.mark.parametrize("iterator", [True, False]) def test_empty_with_nrows_chunksize(all_parsers, iterator): # see gh-9535 @@ -225,6 +241,7 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_read_csv_memory_growth_chunksize(all_parsers): # see gh-24805 # @@ -242,6 +259,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers): pass +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_chunksize_with_usecols_second_block_shorter(all_parsers): # GH#21211 parser = all_parsers @@ -267,6 +285,7 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers): tm.assert_frame_equal(result, expected_frames[i]) +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_chunksize_second_block_shorter(all_parsers): # GH#21211 parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 5ee629947db48..5d5814e880f8b 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -22,9 +22,11 @@ from pandas import DataFrame import pandas._testing as tm -# TODO(1.4) Please xfail individual tests at release time -# instead of skip -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) + +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.network @@ -60,6 +62,7 @@ def test_local_file(all_parsers, csv_dir_path): pytest.skip("Failing on: " + " ".join(platform.uname())) +@xfail_pyarrow # AssertionError: DataFrame.index are different def test_path_path_lib(all_parsers): parser = all_parsers df = tm.makeDataFrame() @@ -67,6 +70,7 @@ def test_path_path_lib(all_parsers): tm.assert_frame_equal(df, result) +@xfail_pyarrow # AssertionError: DataFrame.index are different def test_path_local_path(all_parsers): parser = all_parsers df = tm.makeDataFrame() @@ -206,10 +210,14 @@ def test_no_permission(all_parsers): "in-quoted-field", ], ) -def test_eof_states(all_parsers, data, kwargs, expected, msg): +def test_eof_states(all_parsers, data, kwargs, expected, msg, request): # see gh-10728, gh-10548 parser = all_parsers + if parser.engine == "pyarrow" and "\r" not in data: + mark = pytest.mark.xfail(reason="The 'comment' option is not supported") + request.applymarker(mark) + if expected is None: with pytest.raises(ParserError, match=msg): parser.read_csv(StringIO(data), **kwargs) @@ -218,6 +226,7 @@ def test_eof_states(all_parsers, data, kwargs, expected, msg): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators def test_temporary_file(all_parsers): # see gh-13398 parser = all_parsers @@ -347,6 +356,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding): assert not handle.closed +@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_memory_map_compression(all_parsers, compression): """ Support memory map for compressed files. @@ -365,6 +375,7 @@ def test_memory_map_compression(all_parsers, compression): ) +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_context_manager(all_parsers, datapath): # make sure that opened files are closed parser = all_parsers @@ -381,6 +392,7 @@ def test_context_manager(all_parsers, datapath): assert reader.handles.handle.closed +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_context_manageri_user_provided(all_parsers, datapath): # make sure that user-provided handles are not closed parser = all_parsers @@ -396,6 +408,7 @@ def test_context_manageri_user_provided(all_parsers, datapath): assert not reader.handles.handle.closed +@xfail_pyarrow # ParserError: Empty CSV file def test_file_descriptor_leak(all_parsers, using_copy_on_write): # GH 31488 parser = all_parsers @@ -404,6 +417,7 @@ def test_file_descriptor_leak(all_parsers, using_copy_on_write): parser.read_csv(path) +@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_memory_map(all_parsers, csv_dir_path): mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 8ec372420a0f0..63ad3bcb249ea 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -12,9 +12,13 @@ from pandas import DataFrame import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block def test_float_parser(all_parsers): # see gh-9565 parser = all_parsers @@ -46,6 +50,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): # GH#38753 diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 58e5886aedd6b..26619857bd231 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -12,9 +12,13 @@ ) import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # ValueError: The 'iterator' option is not supported def test_iterator(all_parsers): # see gh-6607 data = """index,A,B,C,D @@ -37,6 +41,7 @@ def test_iterator(all_parsers): tm.assert_frame_equal(last_chunk, expected[3:]) +@xfail_pyarrow # ValueError: The 'iterator' option is not supported def test_iterator2(all_parsers): parser = all_parsers data = """A,B,C @@ -56,6 +61,7 @@ def test_iterator2(all_parsers): tm.assert_frame_equal(result[0], expected) +@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_iterator_stop_on_chunksize(all_parsers): # gh-3967: stopping iteration when chunksize is specified parser = all_parsers @@ -77,6 +83,7 @@ def test_iterator_stop_on_chunksize(all_parsers): tm.assert_frame_equal(concat(result), expected) +@xfail_pyarrow # AssertionError: Regex pattern did not match @pytest.mark.parametrize( "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}] ) diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py index 335065db974dc..bcfb9cd4032ad 100644 --- a/pandas/tests/io/parser/common/test_verbose.py +++ b/pandas/tests/io/parser/common/test_verbose.py @@ -6,9 +6,10 @@ import pytest -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # ValueError: The 'verbose' option is not supported def test_verbose_read(all_parsers, capsys): parser = all_parsers data = """a,b,c,d @@ -32,6 +33,7 @@ def test_verbose_read(all_parsers, capsys): assert captured.out == "Filled 3 NA values in column a\n" +@xfail_pyarrow # ValueError: The 'verbose' option is not supported def test_verbose_read2(all_parsers, capsys): parser = all_parsers data = """a,b,c,d diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index 1f709a3cd8f28..8759c52485533 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -17,10 +17,10 @@ ) import pandas._testing as tm -# TODO(1.4): Change me into individual xfails at release time -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_dtype_all_columns_empty(all_parsers): # see gh-12048 parser = all_parsers @@ -30,6 +30,7 @@ def test_dtype_all_columns_empty(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_pass_dtype(all_parsers): parser = all_parsers @@ -42,6 +43,7 @@ def test_empty_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_index_pass_dtype(all_parsers): parser = all_parsers @@ -56,6 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_multi_index_pass_dtype(all_parsers): parser = all_parsers @@ -72,6 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): parser = all_parsers @@ -84,6 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): parser = all_parsers @@ -96,6 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): # see gh-9424 parser = all_parsers @@ -165,6 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): ), ], ) +@xfail_pyarrow # CSV parse error: Empty CSV file or block def test_empty_dtype(all_parsers, dtype, expected): # see gh-14712 parser = all_parsers diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py index 5b738446ea441..1724f3390ea46 100644 --- a/pandas/tests/io/parser/test_comment.py +++ b/pandas/tests/io/parser/test_comment.py @@ -10,9 +10,10 @@ from pandas import DataFrame import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # ValueError: The 'comment' option is not supported @pytest.mark.parametrize("na_values", [None, ["NaN"]]) def test_comment(all_parsers, na_values): parser = all_parsers @@ -27,6 +28,7 @@ def test_comment(all_parsers, na_values): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported @pytest.mark.parametrize( "read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}] ) @@ -58,6 +60,7 @@ def test_line_comment(all_parsers, read_kwargs, request): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported def test_comment_skiprows(all_parsers): parser = all_parsers data = """# empty @@ -76,6 +79,7 @@ def test_comment_skiprows(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported def test_comment_header(all_parsers): parser = all_parsers data = """# empty @@ -93,6 +97,7 @@ def test_comment_header(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported def test_comment_skiprows_header(all_parsers): parser = all_parsers data = """# empty @@ -114,6 +119,7 @@ def test_comment_skiprows_header(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported @pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"]) def test_custom_comment_char(all_parsers, comment_char): parser = all_parsers @@ -126,6 +132,7 @@ def test_custom_comment_char(all_parsers, comment_char): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported @pytest.mark.parametrize("header", ["infer", None]) def test_comment_first_line(all_parsers, header): # see gh-4623 @@ -141,6 +148,7 @@ def test_comment_first_line(all_parsers, header): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'comment' option is not supported def test_comment_char_in_default_value(all_parsers, request): # GH#34002 if all_parsers.engine == "c": diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py index 85f3db0398080..b16542bbdecec 100644 --- a/pandas/tests/io/parser/test_converters.py +++ b/pandas/tests/io/parser/test_converters.py @@ -15,9 +15,10 @@ ) import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converters_type_must_be_dict(all_parsers): parser = all_parsers data = """index,A,B,C,D @@ -28,6 +29,7 @@ def test_converters_type_must_be_dict(all_parsers): parser.read_csv(StringIO(data), converters=0) +@xfail_pyarrow # ValueError: The 'converters' option is not supported @pytest.mark.parametrize("column", [3, "D"]) @pytest.mark.parametrize( "converter", [parse, lambda x: int(x.split("/")[2])] # Produce integer. @@ -47,6 +49,7 @@ def test_converters(all_parsers, column, converter): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converters_no_implicit_conv(all_parsers): # see gh-2184 parser = all_parsers @@ -60,6 +63,7 @@ def test_converters_no_implicit_conv(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converters_euro_decimal_format(all_parsers): # see gh-583 converters = {} @@ -85,6 +89,7 @@ def test_converters_euro_decimal_format(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converters_corner_with_nans(all_parsers): parser = all_parsers data = """id,score,days @@ -152,6 +157,7 @@ def convert_score(x): tm.assert_frame_equal(results[0], results[1]) +@xfail_pyarrow # ValueError: The 'converters' option is not supported @pytest.mark.parametrize("conv_f", [lambda x: x, str]) def test_converter_index_col_bug(all_parsers, conv_f): # see gh-1835 , GH#40589 @@ -166,6 +172,7 @@ def test_converter_index_col_bug(all_parsers, conv_f): tm.assert_frame_equal(rs, xp) +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converter_identity_object(all_parsers): # GH#40589 parser = all_parsers @@ -177,6 +184,7 @@ def test_converter_identity_object(all_parsers): tm.assert_frame_equal(rs, xp) +@xfail_pyarrow # ValueError: The 'converters' option is not supported def test_converter_multi_index(all_parsers): # GH 42446 parser = all_parsers diff --git a/pandas/tests/io/parser/test_dialect.py b/pandas/tests/io/parser/test_dialect.py index 7d2bb6c083cda..fbea895435699 100644 --- a/pandas/tests/io/parser/test_dialect.py +++ b/pandas/tests/io/parser/test_dialect.py @@ -13,7 +13,10 @@ from pandas import DataFrame import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @pytest.fixture @@ -30,6 +33,7 @@ def custom_dialect(): return dialect_name, dialect_kwargs +@xfail_pyarrow # ValueError: The 'dialect' option is not supported def test_dialect(all_parsers): parser = all_parsers data = """\ @@ -52,6 +56,7 @@ def test_dialect(all_parsers): tm.assert_frame_equal(df, exp) +@xfail_pyarrow # ValueError: The 'dialect' option is not supported def test_dialect_str(all_parsers): dialect_name = "mydialect" parser = all_parsers @@ -79,6 +84,7 @@ class InvalidDialect: parser.read_csv(StringIO(data), dialect=InvalidDialect) +@xfail_pyarrow # ValueError: The 'dialect' option is not supported @pytest.mark.parametrize( "arg", [None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"], @@ -118,6 +124,7 @@ def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, val tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'dialect' option is not supported @pytest.mark.parametrize( "kwargs,warning_klass", [ diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index c5b757d619e7a..da9b9bddd30cd 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -13,15 +13,17 @@ from pandas import DataFrame import pandas._testing as tm +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") + # We'll probably always skip these for pyarrow # Maybe we'll add our own tests for pyarrow too pytestmark = [ pytest.mark.single_cpu, pytest.mark.slow, - pytest.mark.usefixtures("pyarrow_skip"), ] +@xfail_pyarrow # ValueError: Found non-unique column index def test_multi_thread_string_io_read_csv(all_parsers): # see gh-11786 parser = all_parsers @@ -116,6 +118,7 @@ def reader(arg): return final_dataframe +@xfail_pyarrow # ValueError: The 'nrows' option is not supported def test_multi_thread_path_multipart_read_csv(all_parsers): # see gh-11786 num_tasks = 4 diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index b8b05af609aa2..a677d9caa4b19 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -14,7 +14,10 @@ from pandas import DataFrame import pandas._testing as tm -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize( @@ -28,6 +31,7 @@ ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'), ], ) +@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block def test_bad_quote_char(all_parsers, kwargs, msg): data = "1,2,3" parser = all_parsers @@ -43,6 +47,7 @@ def test_bad_quote_char(all_parsers, kwargs, msg): (10, 'bad "quoting" value'), # quoting must be in the range [0, 3] ], ) +@xfail_pyarrow # ValueError: The 'quoting' option is not supported def test_bad_quoting(all_parsers, quoting, msg): data = "1,2,3" parser = all_parsers @@ -60,6 +65,7 @@ def test_quote_char_basic(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'quoting' option is not supported @pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"]) def test_quote_char_various(all_parsers, quote_char): parser = all_parsers @@ -72,6 +78,7 @@ def test_quote_char_various(all_parsers, quote_char): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'quoting' option is not supported @pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE]) @pytest.mark.parametrize("quote_char", ["", None]) def test_null_quote_char(all_parsers, quoting, quote_char): @@ -112,6 +119,7 @@ def test_null_quote_char(all_parsers, quoting, quote_char): ({"quotechar": '"', "quoting": csv.QUOTE_NONNUMERIC}, [[1.0, 2.0, "foo"]]), ], ) +@xfail_pyarrow # ValueError: The 'quoting' option is not supported def test_quoting_various(all_parsers, kwargs, exp_data): data = '1,2,"foo"' parser = all_parsers @@ -125,10 +133,14 @@ def test_quoting_various(all_parsers, kwargs, exp_data): @pytest.mark.parametrize( "doublequote,exp_data", [(True, [[3, '4 " 5']]), (False, [[3, '4 " 5"']])] ) -def test_double_quote(all_parsers, doublequote, exp_data): +def test_double_quote(all_parsers, doublequote, exp_data, request): parser = all_parsers data = 'a,b\n3,"4 "" 5"' + if parser.engine == "pyarrow" and not doublequote: + mark = pytest.mark.xfail(reason="Mismatched result") + request.applymarker(mark) + result = parser.read_csv(StringIO(data), quotechar='"', doublequote=doublequote) expected = DataFrame(exp_data, columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -146,11 +158,15 @@ def test_quotechar_unicode(all_parsers, quotechar): @pytest.mark.parametrize("balanced", [True, False]) -def test_unbalanced_quoting(all_parsers, balanced): +def test_unbalanced_quoting(all_parsers, balanced, request): # see gh-22789. parser = all_parsers data = 'a,b,c\n1,2,"3' + if parser.engine == "pyarrow" and not balanced: + mark = pytest.mark.xfail(reason="Mismatched result") + request.applymarker(mark) + if balanced: # Re-balance the quoting and read in without errors. expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 4b509edc36925..9146af3f969e6 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -17,10 +17,13 @@ ) import pandas._testing as tm -# XFAIL ME PLS once hanging tests issues identified -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +@xfail_pyarrow # ValueError: skiprows argument must be an integer @pytest.mark.parametrize("skiprows", [list(range(6)), 6]) def test_skip_rows_bug(all_parsers, skiprows): # see gh-505 @@ -48,6 +51,7 @@ def test_skip_rows_bug(all_parsers, skiprows): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_deep_skip_rows(all_parsers): # see gh-4382 parser = all_parsers @@ -63,6 +67,7 @@ def test_deep_skip_rows(all_parsers): tm.assert_frame_equal(result, condensed_result) +@xfail_pyarrow def test_skip_rows_blank(all_parsers): # see gh-9832 parser = all_parsers @@ -122,6 +127,7 @@ def test_skip_rows_blank(all_parsers): ), ], ) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_row_with_newline(all_parsers, data, kwargs, expected): # see gh-12775 and gh-10911 parser = all_parsers @@ -129,6 +135,7 @@ def test_skip_row_with_newline(all_parsers, data, kwargs, expected): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_row_with_quote(all_parsers): # see gh-12775 and gh-10911 parser = all_parsers @@ -170,6 +177,7 @@ def test_skip_row_with_quote(all_parsers): ), ], ) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data): # see gh-12775 and gh-10911 parser = all_parsers @@ -179,6 +187,7 @@ def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: The 'delim_whitespace' option is not supported @pytest.mark.parametrize( "lineterminator", ["\n", "\r\n", "\r"] # "LF" # "CRLF" # "CR" ) @@ -216,6 +225,7 @@ def test_skiprows_lineterminator(all_parsers, lineterminator, request): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_skiprows_infield_quote(all_parsers): # see gh-14459 parser = all_parsers @@ -226,6 +236,7 @@ def test_skiprows_infield_quote(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: skiprows argument must be an integer @pytest.mark.parametrize( "kwargs,expected", [ @@ -241,6 +252,7 @@ def test_skip_rows_callable(all_parsers, kwargs, expected): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_rows_callable_not_in(all_parsers): parser = all_parsers data = "0,a\n1,b\n2,c\n3,d\n4,e" @@ -252,6 +264,7 @@ def test_skip_rows_callable_not_in(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_rows_skip_all(all_parsers): parser = all_parsers data = "a\n1\n2\n3\n4\n5" @@ -261,6 +274,7 @@ def test_skip_rows_skip_all(all_parsers): parser.read_csv(StringIO(data), skiprows=lambda x: True) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_rows_bad_callable(all_parsers): msg = "by zero" parser = all_parsers @@ -270,6 +284,7 @@ def test_skip_rows_bad_callable(all_parsers): parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0) +@xfail_pyarrow # ValueError: skiprows argument must be an integer def test_skip_rows_and_n_rows(all_parsers): # GH#44021 data = """a,b diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index 069ac2a69d224..f223810772225 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -15,9 +15,15 @@ # TODO(1.4): Change these to xfails whenever parse_dates support(which was # intentionally disable to keep small PR sizes) is added back -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +# pytestmark = pytest.mark.usefixtures("pyarrow_skip") + +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" +) +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +@xfail_pyarrow # TypeError: expected bytes, int found @pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]]) def test_usecols_with_parse_dates(all_parsers, usecols): # see gh-9755 @@ -36,6 +42,7 @@ def test_usecols_with_parse_dates(all_parsers, usecols): tm.assert_frame_equal(result, expected) +@xfail_pyarrow # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns def test_usecols_with_parse_dates2(all_parsers): # see gh-13604 parser = all_parsers @@ -131,13 +138,19 @@ def test_usecols_with_parse_dates4(all_parsers): list("acd"), # Names span only the selected columns. ], ) -def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names): +def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names, request): # see gh-9755 s = """0,1,2014-01-01,09:00,4 0,1,2014-01-02,10:00,4""" parse_dates = [[1, 2]] parser = all_parsers + if parser.engine == "pyarrow" and not (len(names) == 3 and usecols[0] == 0): + mark = pytest.mark.xfail( + reason="Length mismatch in some cases, UserWarning in other" + ) + request.applymarker(mark) + cols = { "a": [0, 0], "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],