From 77ae61049ca9dab114493875e7806068930c6df6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 18:32:42 -0800 Subject: [PATCH 1/2] CLN: remove py2-legacy UnicodeReader, UnicodeWriter --- pandas/io/common.py | 16 ---------------- pandas/io/formats/csvs.py | 7 ++----- pandas/io/parsers.py | 25 +++++++------------------ 3 files changed, 9 insertions(+), 39 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index a01011cd7d4e4..d8fb338dd519b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,7 +2,6 @@ import bz2 import codecs -import csv import gzip from io import BufferedIOBase, BytesIO import mmap @@ -17,9 +16,7 @@ List, Mapping, Optional, - TextIO, Tuple, - Type, Union, ) from urllib.parse import ( # noqa @@ -597,16 +594,3 @@ def next(self) -> bytes: def close(self): self.reader.close() - - -# Keeping these class for now because it provides a necessary convenience -# for "dropping" the "encoding" argument from our I/O arguments when -# creating a Unicode I/O object. -def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): - return csv.reader(f, dialect=dialect, **kwds) - - -def UnicodeWriter( - f: TextIO, dialect: Type[csv.Dialect] = csv.excel, encoding: str = "utf-8", **kwds -): - return csv.writer(f, dialect=dialect, **kwds) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index ae5d1d30bcddb..f25e2ff6fc4ad 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -22,7 +22,6 @@ from pandas.core.dtypes.missing import notna from pandas.io.common import ( - UnicodeWriter, _get_compression_method, _get_handle, _infer_compression, @@ -196,10 +195,8 @@ def save(self): escapechar=self.escapechar, quotechar=self.quotechar, ) - if self.encoding == "ascii": - self.writer = csvlib.writer(f, **writer_kwargs) - else: - self.writer = UnicodeWriter(f, encoding=self.encoding, **writer_kwargs) + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer(f, **writer_kwargs) self._save() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a887a537a2201..6cd806f2e8a7e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -34,6 +34,7 @@ is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, + is_file_like, is_float, is_integer, is_integer_dtype, @@ -62,13 +63,11 @@ from pandas.io.common import ( _NA_VALUES, BaseIterator, - UnicodeReader, UTF8Recoder, _get_handle, _infer_compression, _validate_header_arg, get_filepath_or_buffer, - is_file_like, ) from pandas.io.date_converters import generic_parser @@ -2431,23 +2430,13 @@ class MyDialect(csv.Dialect): self.line_pos += 1 sniffed = csv.Sniffer().sniff(line) dia.delimiter = sniffed.delimiter - if self.encoding is not None: - self.buf.extend( - list( - UnicodeReader( - StringIO(line), dialect=dia, encoding=self.encoding - ) - ) - ) - else: - self.buf.extend(list(csv.reader(StringIO(line), dialect=dia))) - if self.encoding is not None: - reader = UnicodeReader( - f, dialect=dia, encoding=self.encoding, strict=True - ) - else: - reader = csv.reader(f, dialect=dia, strict=True) + # Note: self.encoding is irrelevant here + line_rdr = csv.reader(StringIO(line), dialect=dia) + self.buf.extend(list(line_rdr)) + + # Note: self.encoding is irrelevant here + reader = csv.reader(f, dialect=dia, strict=True) else: From 2512d8f1c988664830f99d3c2f9800d4c027c6f9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 08:12:07 -0800 Subject: [PATCH 2/2] remove unnecessar y writer_kwargs --- pandas/io/formats/csvs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index f25e2ff6fc4ad..6a837f5a3a79d 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -5,7 +5,7 @@ import csv as csvlib from io import StringIO import os -from typing import Any, Dict, List +from typing import List import warnings from zipfile import ZipFile @@ -187,7 +187,9 @@ def save(self): close = True try: - writer_kwargs: Dict[str, Any] = dict( + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer( + f, lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -195,8 +197,6 @@ def save(self): escapechar=self.escapechar, quotechar=self.quotechar, ) - # Note: self.encoding is irrelevant here - self.writer = csvlib.writer(f, **writer_kwargs) self._save()