From 95d2e832e6ba786d4d509d2ecd9660b6680712df Mon Sep 17 00:00:00 2001 From: rjzamora Date: Wed, 6 Oct 2021 08:18:54 -0700 Subject: [PATCH] address code review --- python/cudf/cudf/tests/test_gcs.py | 2 +- python/cudf/cudf/tests/test_s3.py | 22 ++++++---------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index 290402cfd25..db53529b22f 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -47,7 +47,7 @@ def mock_size(*args): # Since we are monkey-patching, we cannot use # use_python_file_object=True, because the pyarrow # `open_input_file` command will fail (since it doesn't - # use the monkey-pathced `open` definition) + # use the monkey-patched `open` definition) got = cudf.read_csv("gcs://{}".format(fpath), use_python_file_object=False) assert_eq(pdf, got) diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index 050a5b2e5c2..added0887f1 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -168,12 +168,14 @@ def test_read_csv_arrow_nativefile(s3_base, s3so, pdf): @pytest.mark.parametrize("bytes_per_thread", [32, 1024]) -def test_read_csv_byte_range(s3_base, s3so, pdf, bytes_per_thread): +@pytest.mark.parametrize("use_python_file_object", [True, False]) +def test_read_csv_byte_range( + s3_base, s3so, pdf, bytes_per_thread, use_python_file_object +): # Write to buffer fname = "test_csv_reader_byte_range.csv" bname = "csv" buffer = pdf.to_csv(index=False) - expect = pdf.iloc[-2:].reset_index(drop=True) # Use fsspec file object with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): @@ -184,22 +186,10 @@ def test_read_csv_byte_range(s3_base, s3so, pdf, bytes_per_thread): bytes_per_thread=bytes_per_thread, header=None, names=["Integer", "Float", "Integer2", "String", "Boolean"], - use_python_file_object=False, + use_python_file_object=use_python_file_object, ) - assert_eq(expect, got) - # Use Arrow PythonFile object - with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}): - got = cudf.read_csv( - "s3://{}/{}".format(bname, fname), - storage_options=s3so, - byte_range=(74, 73), - bytes_per_thread=bytes_per_thread, - header=None, - names=["Integer", "Float", "Integer2", "String", "Boolean"], - use_python_file_object=True, - ) - assert_eq(expect, got) + assert_eq(pdf.iloc[-2:].reset_index(drop=True), got) @pytest.mark.parametrize("chunksize", [None, 3])