Skip to content

Commit

Permalink
DEPR: pd.read_table
Browse files Browse the repository at this point in the history
- pd.read_table is deprecated and replaced by pd.read_csv.

- add whatsnew note

- change tests to test for warning messages

- change DataFrame.from_csv to use pandas.read_csv instead of
  pandas.read_table

- Change pandas.read_clipboard to use pandas.read_csv instead
  of pandas.read_table
  • Loading branch information
dahlbaek committed Jul 20, 2018
1 parent 1f6ddc4 commit c75f12a
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 43 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ Deprecations
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`).
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
- :func:`pandas.read_table` is deprecated. Use :func:`pandas.read_csv` instead (:issue:`21948`)
-

.. _whatsnew_0240.prior_deprecations:
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,11 +1592,11 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
"for from_csv when changing your function calls",
FutureWarning, stacklevel=2)

from pandas.io.parsers import read_table
return read_table(path, header=header, sep=sep,
parse_dates=parse_dates, index_col=index_col,
encoding=encoding, tupleize_cols=tupleize_cols,
infer_datetime_format=infer_datetime_format)
from pandas.io.parsers import read_csv
return read_csv(path, header=header, sep=sep,
parse_dates=parse_dates, index_col=index_col,
encoding=encoding, tupleize_cols=tupleize_cols,
infer_datetime_format=infer_datetime_format)

def to_sparse(self, fill_value=None, kind='block'):
"""
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
r"""
Read text from clipboard and pass to read_table. See read_table for the
Read text from clipboard and pass to read_csv. See read_csv for the
full argument list
Parameters
Expand All @@ -31,7 +31,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
'reading from clipboard only supports utf-8 encoding')

from pandas.io.clipboard import clipboard_get
from pandas.io.parsers import read_table
from pandas.io.parsers import read_csv
text = clipboard_get()

# try to decode (if needed on PY3)
Expand All @@ -51,7 +51,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
# that this came from excel and set 'sep' accordingly
lines = text[:10000].split('\n')[:-1][:10]

# Need to remove leading white space, since read_table
# Need to remove leading white space, since read_csv
# accepts:
# a b
# 0 1 2
Expand Down Expand Up @@ -80,7 +80,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
if kwargs.get('engine') == 'python' and PY2:
text = text.encode('utf-8')

return read_table(StringIO(text), sep=sep, **kwargs)
return read_csv(StringIO(text), sep=sep, **kwargs)


def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
Expand Down
31 changes: 26 additions & 5 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@
""" % (_parser_params % (_sep_doc.format(default="','"), _engine_doc))

_read_table_doc = """
.. deprecated:: 0.24.0
Use :func:`pandas.read_csv` instead, passing `sep='\t'` if necessary.
Read general delimited file into DataFrame
%s
Expand Down Expand Up @@ -535,9 +539,13 @@ def _read(filepath_or_buffer, kwds):
}


def _make_parser_function(name, sep=','):
def _make_parser_function(name, default_sep=','):

default_sep = sep
# prepare read_table deprecation
if name == "read_table":
sep = False
else:
sep = default_sep

def parser_f(filepath_or_buffer,
sep=sep,
Expand Down Expand Up @@ -606,11 +614,24 @@ def parser_f(filepath_or_buffer,
memory_map=False,
float_precision=None):

# deprecate read_table
if name == "read_table":
if sep is False and delimiter is None:
warnings.warn("read_table is deprecated, use read_csv "
"instead, passing sep='\\t'.",
FutureWarning, stacklevel=2)
else:
warnings.warn("read_table is deprecated, use read_csv "
"instead.",
FutureWarning, stacklevel=2)
if sep is False:
sep = default_sep

# Alias sep -> delimiter.
if delimiter is None:
delimiter = sep

if delim_whitespace and delimiter is not default_sep:
if delim_whitespace and delimiter != default_sep:
raise ValueError("Specified a delimiter with both sep and"
" delim_whitespace=True; you can only"
" specify one.")
Expand Down Expand Up @@ -682,10 +703,10 @@ def parser_f(filepath_or_buffer,
return parser_f


read_csv = _make_parser_function('read_csv', sep=',')
read_csv = _make_parser_function('read_csv', default_sep=',')
read_csv = Appender(_read_csv_doc)(read_csv)

read_table = _make_parser_function('read_table', sep='\t')
read_table = _make_parser_function('read_table', default_sep='\t')
read_table = Appender(_read_table_doc)(read_table)


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from pandas.io.parsers import read_table
from pandas.io.parsers import read_csv


@pytest.fixture
Expand All @@ -17,7 +17,7 @@ def jsonl_file(datapath):
@pytest.fixture
def salaries_table(datapath):
"""DataFrame with the salaries dataset"""
return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
return read_csv(datapath('io', 'parser', 'data', 'salaries.csv'), sep='\t')


@pytest.fixture
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import numpy as np
import pandas as pd
from pandas import (DataFrame, Series, Index, Timestamp, MultiIndex,
date_range, NaT, read_table)
date_range, NaT, read_csv)
from pandas.compat import (range, zip, lrange, StringIO, PY3,
u, lzip, is_platform_windows,
is_platform_32bit)
Expand Down Expand Up @@ -1225,8 +1225,8 @@ def test_to_string(self):
lines = result.split('\n')
header = lines[0].strip().split()
joined = '\n'.join(re.sub(r'\s+', ' ', x).strip() for x in lines[1:])
recons = read_table(StringIO(joined), names=header,
header=None, sep=' ')
recons = read_csv(StringIO(joined), names=header,
header=None, sep=' ')
tm.assert_series_equal(recons['B'], biggie['B'])
assert recons['A'].count() == biggie['A'].count()
assert (np.abs(recons['A'].dropna() -
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pandas.util.testing as tm
import pandas.util._test_decorators as td
from pandas import DataFrame
from pandas.io.parsers import read_csv, read_table
from pandas.io.parsers import read_csv
from pandas.compat import BytesIO, StringIO


Expand Down Expand Up @@ -44,7 +44,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
if mode != 'explicit':
compression = mode

url_table = read_table(url, compression=compression, engine=engine)
url_table = read_csv(url, sep='\t', compression=compression, engine=engine)
tm.assert_frame_equal(url_table, salaries_table)


Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/io/parser/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = self.low_memory
return read_table(*args, **kwds)
with tm.assert_produces_warning(FutureWarning):
df = read_table(*args, **kwds)
return df


class TestCParserLowMemory(BaseParser, CParserTests):
Expand All @@ -88,7 +90,9 @@ def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
kwds['low_memory'] = True
return read_table(*args, **kwds)
with tm.assert_produces_warning(FutureWarning):
df = read_table(*args, **kwds)
return df


class TestPythonParser(BaseParser, PythonParserTests):
Expand All @@ -103,7 +107,9 @@ def read_csv(self, *args, **kwds):
def read_table(self, *args, **kwds):
kwds = kwds.copy()
kwds['engine'] = self.engine
return read_table(*args, **kwds)
with tm.assert_produces_warning(FutureWarning):
df = read_table(*args, **kwds)
return df


class TestUnsortedUsecols(object):
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/io/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pandas.compat import StringIO
from pandas.errors import ParserError
from pandas.io.parsers import read_csv, read_table
from pandas.io.parsers import read_csv

import pytest

Expand Down Expand Up @@ -43,24 +43,24 @@ def test_c_engine(self):

# specify C engine with unsupported options (raise)
with tm.assert_raises_regex(ValueError, msg):
read_table(StringIO(data), engine='c',
sep=None, delim_whitespace=False)
read_csv(StringIO(data), engine='c',
sep=None, delim_whitespace=False)
with tm.assert_raises_regex(ValueError, msg):
read_table(StringIO(data), engine='c', sep=r'\s')
read_csv(StringIO(data), engine='c', sep=r'\s')
with tm.assert_raises_regex(ValueError, msg):
read_table(StringIO(data), engine='c', quotechar=chr(128))
read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128))
with tm.assert_raises_regex(ValueError, msg):
read_table(StringIO(data), engine='c', skipfooter=1)
read_csv(StringIO(data), engine='c', skipfooter=1)

# specify C-unsupported options without python-unsupported options
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep=None, delim_whitespace=False)
read_csv(StringIO(data), sep=None, delim_whitespace=False)
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), quotechar=chr(128))
read_csv(StringIO(data), sep=r'\s')
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), sep=r'\s')
read_csv(StringIO(data), sep='\t', quotechar=chr(128))
with tm.assert_produces_warning(parsers.ParserWarning):
read_table(StringIO(data), skipfooter=1)
read_csv(StringIO(data), skipfooter=1)

text = """ A B C D E
one two three four
Expand All @@ -70,9 +70,9 @@ def test_c_engine(self):
msg = 'Error tokenizing data'

with tm.assert_raises_regex(ParserError, msg):
read_table(StringIO(text), sep='\\s+')
read_csv(StringIO(text), sep='\\s+')
with tm.assert_raises_regex(ParserError, msg):
read_table(StringIO(text), engine='c', sep='\\s+')
read_csv(StringIO(text), engine='c', sep='\\s+')

msg = "Only length-1 thousands markers supported"
data = """A|B|C
Expand Down
27 changes: 25 additions & 2 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def test_iterator(self):

@pytest.mark.parametrize('reader, module, error_class, fn_ext', [
(pd.read_csv, 'os', FileNotFoundError, 'csv'),
(pd.read_table, 'os', FileNotFoundError, 'csv'),
(pd.read_fwf, 'os', FileNotFoundError, 'txt'),
(pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
(pd.read_feather, 'feather', Exception, 'feather'),
Expand All @@ -148,9 +147,16 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
with pytest.raises(error_class):
reader(path)

def test_read_non_existant_read_table(self):
pytest.importorskip('os')

path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv')
with pytest.raises(FileNotFoundError):
with tm.assert_produces_warning(FutureWarning):
pd.read_table(path)

@pytest.mark.parametrize('reader, module, path', [
(pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
(pd.read_table, 'os', ('io', 'data', 'iris.csv')),
(pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
(pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
(pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
Expand All @@ -169,6 +175,23 @@ def test_read_fspath_all(self, reader, module, path, datapath):
mypath = CustomFSPath(path)
result = reader(mypath)
expected = reader(path)

if path.endswith('.pickle'):
# categorical
tm.assert_categorical_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)

def test_read_fspath_all_read_table(self, datapath):
pytest.importorskip('os')
path = datapath('io', 'data', 'iris.csv')

mypath = CustomFSPath(path)
with tm.assert_produces_warning(FutureWarning):
result = pd.read_table(mypath)
with tm.assert_produces_warning(FutureWarning):
expected = pd.read_table(path)

if path.endswith('.pickle'):
# categorical
tm.assert_categorical_equal(result, expected)
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

from pandas.core.index import Index, MultiIndex
from pandas import Panel, DataFrame, Series, notna, isna, Timestamp
from pandas import Panel, DataFrame, Series, notna, isna, Timestamp, read_csv

from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
import pandas.core.common as com
Expand Down Expand Up @@ -512,14 +512,13 @@ def f(x):
pytest.raises(com.SettingWithCopyError, f, result)

def test_xs_level_multiple(self):
from pandas import read_table
text = """ A B C D E
one two three four
a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""

df = read_table(StringIO(text), sep=r'\s+', engine='python')
df = read_csv(StringIO(text), sep=r'\s+', engine='python')

result = df.xs(('a', 4), level=['one', 'four'])
expected = df.xs('a').xs(4, level='four')
Expand Down Expand Up @@ -547,14 +546,13 @@ def f(x):
tm.assert_frame_equal(rs, xp)

def test_xs_level0(self):
from pandas import read_table
text = """ A B C D E
one two three four
a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""

df = read_table(StringIO(text), sep=r'\s+', engine='python')
df = read_csv(StringIO(text), sep=r'\s+', engine='python')

result = df.xs('a', level=0)
expected = df.xs('a')
Expand Down

0 comments on commit c75f12a

Please sign in to comment.