From 5184a8ac6761b88003b709c55ce7e90f6b523214 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 6 Feb 2023 11:09:29 +0000
Subject: [PATCH] Catch OverflowError in to_csv and provide useful advice

Closes #12690.
---
 python/cudf/cudf/_lib/csv.pyx     | 10 ++++++++--
 python/cudf/cudf/utils/ioutils.py |  5 ++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index eb6683aed31..dc856c14081 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -533,8 +533,14 @@ def write_csv(
         .build()
     )
 
-    with nogil:
-        cpp_write_csv(options)
+    try:
+        with nogil:
+            cpp_write_csv(options)
+    except OverflowError as e:
+        raise OverflowError(
+            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
+            "Consider providing a smaller chunksize argument."
+        )
 
 
 cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +:
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 56e2e539e01..924cc62fb15 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1245,7 +1245,10 @@
 Notes
 -----
 - Follows the standard of Pandas csv.QUOTE_NONNUMERIC for all output.
-- If `to_csv` leads to memory errors consider setting the `chunksize` argument.
+- The default behaviour is to write all rows of the dataframe at once.
+  This can lead to memory or overflow errors for large tables. If this
+  happens, consider setting the ``chunksize`` argument to some
+  reasonable fraction of the total rows in the dataframe.
 
 Examples
 --------