From 5184a8ac6761b88003b709c55ce7e90f6b523214 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Mon, 6 Feb 2023 11:09:29 +0000 Subject: [PATCH] Catch OverflowError in to_csv and provide useful advice Closes #12690. --- python/cudf/cudf/_lib/csv.pyx | 10 ++++++++-- python/cudf/cudf/utils/ioutils.py | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index eb6683aed31..dc856c14081 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -533,8 +533,14 @@ def write_csv( .build() ) - with nogil: - cpp_write_csv(options) + try: + with nogil: + cpp_write_csv(options) + except OverflowError as e: + raise OverflowError( + f"Writing CSV file with chunksize={rows_per_chunk} failed. " + "Consider providing a smaller chunksize argument." + ) cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +: diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 56e2e539e01..924cc62fb15 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1245,7 +1245,10 @@ Notes ----- - Follows the standard of Pandas csv.QUOTE_NONNUMERIC for all output. -- If `to_csv` leads to memory errors consider setting the `chunksize` argument. +- The default behaviour is to write all rows of the dataframe at once. + This can lead to memory or overflow errors for large tables. If this + happens, consider setting the ``chunksize`` argument to some + reasonable fraction of the total rows in the dataframe. Examples --------