From cc7c717796cab19ba0830283f5c4f4e596dcc298 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 22 Jun 2023 08:52:12 -0700 Subject: [PATCH] TST/CLN: use fixture for data path in all xml tests (#53790) TST/CLN: use fixture path for all xml tests --- pandas/tests/io/xml/conftest.py | 5 ++ pandas/tests/io/xml/test_to_xml.py | 64 +++++++++++--------------- pandas/tests/io/xml/test_xml_dtypes.py | 29 ++++++------ 3 files changed, 47 insertions(+), 51 deletions(-) diff --git a/pandas/tests/io/xml/conftest.py b/pandas/tests/io/xml/conftest.py index 53725ebef3616..510e22fb32e77 100644 --- a/pandas/tests/io/xml/conftest.py +++ b/pandas/tests/io/xml/conftest.py @@ -29,3 +29,8 @@ def kml_cta_rail_lines(xml_data_path): @pytest.fixture def xsl_flatten_doc(xml_data_path): return xml_data_path / "flatten_doc.xsl" + + +@pytest.fixture +def xsl_row_field_output(xml_data_path): + return xml_data_path / "row_field_output.xsl" diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 04194a68ed512..3a16e8c2b94ae 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -171,9 +171,8 @@ def parser(request): # FILE OUTPUT -def test_file_output_str_read(datapath, parser, from_file_expected): - filename = datapath("io", "data", "xml", "books.xml") - df_file = read_xml(filename, parser=parser) +def test_file_output_str_read(xml_books, parser, from_file_expected): + df_file = read_xml(xml_books, parser=parser) with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, parser=parser) @@ -185,9 +184,8 @@ def test_file_output_str_read(datapath, parser, from_file_expected): assert output == from_file_expected -def test_file_output_bytes_read(datapath, parser, from_file_expected): - filename = datapath("io", "data", "xml", "books.xml") - df_file = read_xml(filename, parser=parser) +def test_file_output_bytes_read(xml_books, parser, from_file_expected): + df_file = read_xml(xml_books, parser=parser) with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, parser=parser) @@ -199,9 +197,8 @@ def test_file_output_bytes_read(datapath, parser, from_file_expected): assert output == from_file_expected -def test_str_output(datapath, parser, from_file_expected): - filename = datapath("io", "data", "xml", "books.xml") - df_file = read_xml(filename, parser=parser) +def test_str_output(xml_books, parser, from_file_expected): + df_file = read_xml(xml_books, parser=parser) output = df_file.to_xml(parser=parser) output = equalize_decl(output) @@ -222,7 +219,7 @@ def test_wrong_file_path(parser, geom_df): # INDEX -def test_index_false(datapath, parser): +def test_index_false(xml_books, parser): expected = """\ @@ -249,8 +246,7 @@ def test_index_false(datapath, parser): """ - filename = datapath("io", "data", "xml", "books.xml") - df_file = read_xml(filename, parser=parser) + df_file = read_xml(xml_books, parser=parser) with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, index=False, parser=parser) @@ -262,7 +258,7 @@ def test_index_false(datapath, parser): assert output == expected -def test_index_false_rename_row_root(datapath, parser): +def test_index_false_rename_row_root(xml_books, parser): expected = """\ @@ -289,8 +285,7 @@ def test_index_false_rename_row_root(datapath, parser): """ - filename = datapath("io", "data", "xml", "books.xml") - df_file = read_xml(filename, parser=parser) + df_file = read_xml(xml_books, parser=parser) with tm.ensure_clean("test.xml") as path: df_file.to_xml( @@ -831,9 +826,8 @@ def test_namespace_prefix_and_default(parser, geom_df): """ -def test_encoding_option_str(datapath, parser): - filename = datapath("io", "data", "xml", "baby_names.xml") - df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5) +def test_encoding_option_str(xml_baby_names, parser): + df_file = read_xml(xml_baby_names, parser=parser, encoding="ISO-8859-1").head(5) output = df_file.to_xml(encoding="ISO-8859-1", parser=parser) @@ -848,9 +842,8 @@ def test_encoding_option_str(datapath, parser): @td.skip_if_no("lxml") -def test_correct_encoding_file(datapath): - filename = datapath("io", "data", "xml", "baby_names.xml") - df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml") +def test_correct_encoding_file(xml_baby_names): + df_file = read_xml(xml_baby_names, encoding="ISO-8859-1", parser="lxml") with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, index=False, encoding="ISO-8859-1", parser="lxml") @@ -858,9 +851,8 @@ def test_correct_encoding_file(datapath): @td.skip_if_no("lxml") @pytest.mark.parametrize("encoding", ["UTF-8", "UTF-16", "ISO-8859-1"]) -def test_wrong_encoding_option_lxml(datapath, parser, encoding): - filename = datapath("io", "data", "xml", "baby_names.xml") - df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml") +def test_wrong_encoding_option_lxml(xml_baby_names, parser, encoding): + df_file = read_xml(xml_baby_names, encoding="ISO-8859-1", parser="lxml") with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, index=False, encoding=encoding, parser=parser) @@ -988,22 +980,22 @@ def test_unknown_parser(geom_df): @td.skip_if_no("lxml") -def test_stylesheet_file_like(datapath, mode, geom_df): - xsl = datapath("io", "data", "xml", "row_field_output.xsl") - - with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: +def test_stylesheet_file_like(xsl_row_field_output, mode, geom_df): + with open( + xsl_row_field_output, mode, encoding="utf-8" if mode == "r" else None + ) as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected @td.skip_if_no("lxml") -def test_stylesheet_io(datapath, mode, geom_df): - xsl_path = datapath("io", "data", "xml", "row_field_output.xsl") - +def test_stylesheet_io(xsl_row_field_output, mode, geom_df): # note: By default the bodies of untyped functions are not checked, # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl_path, mode, encoding="utf-8" if mode == "r" else None) as f: + with open( + xsl_row_field_output, mode, encoding="utf-8" if mode == "r" else None + ) as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1015,10 +1007,10 @@ def test_stylesheet_io(datapath, mode, geom_df): @td.skip_if_no("lxml") -def test_stylesheet_buffered_reader(datapath, mode, geom_df): - xsl = datapath("io", "data", "xml", "row_field_output.xsl") - - with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: +def test_stylesheet_buffered_reader(xsl_row_field_output, mode, geom_df): + with open( + xsl_row_field_output, mode, encoding="utf-8" if mode == "r" else None + ) as f: xsl_obj = f.read() output = geom_df.to_xml(stylesheet=xsl_obj) diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index d62b9fa27e264..911b540dbc380 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -194,12 +194,13 @@ def test_dtype_float(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_wrong_dtype(datapath, parser, iterparse): - filename = datapath("io", "data", "xml", "books.xml") +def test_wrong_dtype(xml_books, parser, iterparse): with pytest.raises( ValueError, match=('Unable to parse string "Everyday Italian" at position 0') ): - read_xml(filename, dtype={"title": "Int64"}, parser=parser, iterparse=iterparse) + read_xml( + xml_books, dtype={"title": "Int64"}, parser=parser, iterparse=iterparse + ) def test_both_dtype_converters(parser): @@ -279,25 +280,24 @@ def test_converters_date(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_wrong_converters_type(datapath, parser, iterparse): - filename = datapath("io", "data", "xml", "books.xml") +def test_wrong_converters_type(xml_books, parser, iterparse): with pytest.raises(TypeError, match=("Type converters must be a dict or subclass")): - read_xml(filename, converters={"year", str}, parser=parser, iterparse=iterparse) + read_xml( + xml_books, converters={"year", str}, parser=parser, iterparse=iterparse + ) -def test_callable_func_converters(datapath, parser, iterparse): - filename = datapath("io", "data", "xml", "books.xml") +def test_callable_func_converters(xml_books, parser, iterparse): with pytest.raises(TypeError, match=("'float' object is not callable")): read_xml( - filename, converters={"year": float()}, parser=parser, iterparse=iterparse + xml_books, converters={"year": float()}, parser=parser, iterparse=iterparse ) -def test_callable_str_converters(datapath, parser, iterparse): - filename = datapath("io", "data", "xml", "books.xml") +def test_callable_str_converters(xml_books, parser, iterparse): with pytest.raises(TypeError, match=("'str' object is not callable")): read_xml( - filename, converters={"year": "float"}, parser=parser, iterparse=iterparse + xml_books, converters={"year": "float"}, parser=parser, iterparse=iterparse ) @@ -471,9 +471,8 @@ def test_day_first_parse_dates(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_wrong_parse_dates_type(datapath, parser, iterparse): - filename = datapath("io", "data", "xml", "books.xml") +def test_wrong_parse_dates_type(xml_books, parser, iterparse): with pytest.raises( TypeError, match=("Only booleans, lists, and dictionaries are accepted") ): - read_xml(filename, parse_dates={"date"}, parser=parser, iterparse=iterparse) + read_xml(xml_books, parse_dates={"date"}, parser=parser, iterparse=iterparse)