Catch OverflowError in to_csv and provide useful advice

Closes #12690.
rapidsai · Feb 6, 2023 · 5184a8a · 5184a8a
1 parent 69b6c07
commit 5184a8a
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 3 deletions.
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
@@ -533,8 +533,14 @@ def write_csv(
         .build()
     )
 
-    with nogil:
-        cpp_write_csv(options)
+    try:
+        with nogil:
+            cpp_write_csv(options)
+    except OverflowError as e:
+        raise OverflowError(
+            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
+            "Consider providing a smaller chunksize argument."
+        )
 
 
 cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +:

diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
@@ -1245,7 +1245,10 @@
 Notes
 -----
 - Follows the standard of Pandas csv.QUOTE_NONNUMERIC for all output.
-- If `to_csv` leads to memory errors consider setting the `chunksize` argument.
+- The default behaviour is to write all rows of the dataframe at once.
+  This can lead to memory or overflow errors for large tables. If this
+  happens, consider setting the ``chunksize`` argument to some
+  reasonable fraction of the total rows in the dataframe.
 
 Examples
 --------