Skip to content

Commit

Permalink
Add test for reading orc data with empty sum statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
ayushdg committed Sep 7, 2021
1 parent 77ae6cb commit f7758a1
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions python/cudf/cudf/tests/test_orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,3 +1203,24 @@ def test_names_in_struct_dtype_nesting(datadir):
edf = cudf.DataFrame(expect.to_pandas())
# test schema
assert edf.dtypes.equals(got.dtypes)


def test_statistics_sum_overflow():
maxint64 = np.iinfo(np.int64).max
minint64 = np.iinfo(np.int64).min

buff = BytesIO()
with po.Writer(
buff, po.Struct(a=po.BigInt(), b=po.BigInt(), c=po.BigInt())
) as writer:
writer.write((maxint64, minint64, minint64))
writer.write((1, -1, 1))

file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
assert file_stats[0]["a"]["sum"] is None
assert file_stats[0]["b"]["sum"] is None
assert file_stats[0]["c"]["sum"] == minint64 + 1

assert stripe_stats[0]["a"]["sum"] is None
assert stripe_stats[0]["b"]["sum"] is None
assert stripe_stats[0]["c"]["sum"] == minint64 + 1

0 comments on commit f7758a1

Please sign in to comment.