From 81494a977138ea481cd5236440b55f9bb6edc620 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 5 Jul 2021 14:14:21 +0530 Subject: [PATCH] add unit test for repeated column names --- python/cudf/cudf/tests/test_csv.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index c19fde8b5d6..5511a65d0a4 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1261,6 +1261,17 @@ def test_csv_reader_column_names(names): assert list(df) == list(names) +def test_csv_reader_repeated_column_name(): + buffer = """A,A,A.1,A,A.2,A,A.4,A,A + 1,2,3.1,4,a.2,a,a.4,a,a + 2,4,6.1,8,b.2,b,b.4,b,b""" + + # pandas and cudf to have same repeated column names + pdf = pd.read_csv(StringIO(buffer)) + gdf = cudf.read_csv(StringIO(buffer)) + assert_eq(pdf.columns, gdf.columns) + + def test_csv_reader_bools_false_positives(tmpdir): # values that are equal to ["True", "TRUE", "False", "FALSE"] # when using ints to detect bool values