Skip to content

Commit

Permalink
pandas vs cudf csv parser differences
Browse files Browse the repository at this point in the history
bool literals give parsing errors as int
"0" and "1" give parsing errors as bool in pandas
  • Loading branch information
karthikeyann committed Nov 2, 2022
1 parent 99eedc0 commit dc228e5
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,17 +767,18 @@ def test_csv_reader_bools(tmpdir, names, dtypes, data, trues, falses):


def test_csv_reader_bools_custom():
names = ["text", "int"]
dtypes = ["str", "int"]
trues = ["foo"]
falses = ["bar"]
names = ["text", "bool"]
dtypes = {"text": "str", "bool": "bool"}
trues = ["foo", "1"]
falses = ["bar", "0"]
lines = [
",".join(names),
"true,true",
"false,false",
"foo,foo",
"bar,bar",
"0,0",
"1,1",
]
buffer = "\n".join(lines)

Expand All @@ -789,6 +790,9 @@ def test_csv_reader_bools_custom():
true_values=trues,
false_values=falses,
)

# Note: bool literals give parsing errors as int
# "0" and "1" give parsing errors as bool in pandas
expected = pd.read_csv(
StringIO(buffer),
names=names,
Expand Down

0 comments on commit dc228e5

Please sign in to comment.