diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index eb6683aed31..dc856c14081 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -533,8 +533,14 @@ def write_csv( .build() ) - with nogil: - cpp_write_csv(options) + try: + with nogil: + cpp_write_csv(options) + except OverflowError as e: + raise OverflowError( + f"Writing CSV file with chunksize={rows_per_chunk} failed. " + "Consider providing a smaller chunksize argument." + ) cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +: diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 56e2e539e01..924cc62fb15 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1245,7 +1245,10 @@ Notes ----- - Follows the standard of Pandas csv.QUOTE_NONNUMERIC for all output. -- If `to_csv` leads to memory errors consider setting the `chunksize` argument. +- The default behaviour is to write all rows of the dataframe at once. + This can lead to memory or overflow errors for large tables. If this + happens, consider setting the ``chunksize`` argument to some + reasonable fraction of the total rows in the dataframe. Examples --------