Skip to content
/ cudf Public
forked from rapidsai/cudf

Commit

Permalink
Deprecate line_terminator in to_csv
Browse files Browse the repository at this point in the history
Pandas 1.5 deprecated line_terminator in favour of lineterminator (to
align with read_csv), and removed it in 2.0. Align the cuDF API by
preferring lineterminator and providing a deprecation warning for use
of line_terminator. Closes rapidsai#12894.
  • Loading branch information
wence- committed Mar 7, 2023
1 parent 6d1f8e3 commit 0831da2
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 58 deletions.
14 changes: 7 additions & 7 deletions python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

import sys
from io import StringIO
Expand Down Expand Up @@ -54,12 +54,12 @@ def csv_writer_test(pdf):
],
"columns": ALL_POSSIBLE_VALUES,
"index": [True, False],
"line_terminator": ["\n", "\r", "\r\n"],
"lineterminator": ["\n", "\r", "\r\n"],
"chunksize": ALL_POSSIBLE_VALUES,
},
)
def csv_writer_test_params(
pdf, sep, header, na_rep, columns, index, line_terminator, chunksize
pdf, sep, header, na_rep, columns, index, lineterminator, chunksize
):
gdf = cudf.from_pandas(pdf)

Expand All @@ -69,7 +69,7 @@ def csv_writer_test_params(
na_rep=na_rep,
columns=columns,
index=index,
line_terminator=line_terminator,
lineterminator=lineterminator,
chunksize=chunksize,
)
gd_buffer = gdf.to_csv(
Expand All @@ -78,7 +78,7 @@ def csv_writer_test_params(
na_rep=na_rep,
columns=columns,
index=index,
line_terminator=line_terminator,
lineterminator=lineterminator,
chunksize=chunksize,
)

Expand All @@ -90,13 +90,13 @@ def csv_writer_test_params(
StringIO(gd_buffer),
delimiter=sep,
na_values=na_rep,
lineterminator=line_terminator,
lineterminator=lineterminator,
)
expected = pd.read_csv(
StringIO(pd_buffer),
delimiter=sep,
na_values=na_rep,
lineterminator=line_terminator,
lineterminator=lineterminator,
)
if not header:
# TODO: Remove renaming columns once the following bug is fixed:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def write_csv(
object sep=",",
object na_rep="",
bool header=True,
object line_terminator="\n",
object lineterminator="\n",
int rows_per_chunk=8,
bool index=True,
):
Expand All @@ -488,7 +488,7 @@ def write_csv(
)
cdef bool include_header_c = header
cdef char delim_c = ord(sep)
cdef string line_term_c = line_terminator.encode()
cdef string line_term_c = lineterminator.encode()
cdef string na_c = na_rep.encode()
cdef int rows_per_chunk_c = rows_per_chunk
cdef vector[string] col_names
Expand Down
27 changes: 20 additions & 7 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import inspect
import itertools
import numbers
import os
import pickle
import re
import sys
Expand Down Expand Up @@ -604,7 +605,6 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
def __init__(
self, data=None, index=None, columns=None, dtype=None, nan_as_null=True
):

super().__init__()

if isinstance(columns, (Series, cudf.BaseIndex)):
Expand Down Expand Up @@ -918,7 +918,7 @@ def _init_from_dict_like(

if len(data):
self._data.multiindex = True
for (i, col_name) in enumerate(data):
for i, col_name in enumerate(data):
self._data.multiindex = self._data.multiindex and isinstance(
col_name, tuple
)
Expand Down Expand Up @@ -1199,7 +1199,6 @@ def __setitem__(self, arg, value):
if is_scalar(value):
self._data[col_name][scatter_map] = value
else:

self._data[col_name][scatter_map] = column.as_column(
value
)[scatter_map]
Expand Down Expand Up @@ -5445,7 +5444,6 @@ def interpolate(
downcast=None,
**kwargs,
):

if all(dt == np.dtype("object") for dt in self.dtypes):
raise TypeError(
"Cannot interpolate with all object-dtype "
Expand Down Expand Up @@ -6358,13 +6356,29 @@ def to_csv(
index=True,
encoding=None,
compression=None,
line_terminator="\n",
lineterminator=None,
line_terminator=None,
chunksize=None,
storage_options=None,
):
"""{docstring}"""
from cudf.io import csv

if line_terminator is not None:
warnings.warn(
"line_terminator is a deprecated keyword argument, "
"use lineterminator instead.",
FutureWarning,
)
if lineterminator is not None:
warnings.warn(
f"Ignoring {line_terminator=} in favour "
f"of {lineterminator=}"
)
else:
lineterminator = line_terminator
if lineterminator is None:
lineterminator = os.linesep
return csv.to_csv(
self,
path_or_buf=path_or_buf,
Expand All @@ -6373,7 +6387,7 @@ def to_csv(
columns=columns,
header=header,
index=index,
line_terminator=line_terminator,
lineterminator=line_terminator,
chunksize=chunksize,
encoding=encoding,
compression=compression,
Expand Down Expand Up @@ -6738,7 +6752,6 @@ def append(
current_cols = self._data.to_pandas_index()
combined_columns = other.index.to_pandas()
if len(current_cols):

if cudf.utils.dtypes.is_mixed_with_object_dtype(
current_cols, combined_columns
):
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/io/csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

from collections import abc
from io import BytesIO, StringIO
Expand Down Expand Up @@ -155,7 +155,7 @@ def to_csv(
index=True,
encoding=None,
compression=None,
line_terminator="\n",
lineterminator="\n",
chunksize=None,
storage_options=None,
):
Expand Down Expand Up @@ -233,7 +233,7 @@ def to_csv(
sep=sep,
na_rep=na_rep,
header=header,
line_terminator=line_terminator,
lineterminator=lineterminator,
rows_per_chunk=rows_per_chunk,
index=index,
)
Expand All @@ -244,7 +244,7 @@ def to_csv(
sep=sep,
na_rep=na_rep,
header=header,
line_terminator=line_terminator,
lineterminator=lineterminator,
rows_per_chunk=rows_per_chunk,
index=index,
)
Expand Down
Loading

0 comments on commit 0831da2

Please sign in to comment.