Skip to content

Commit

Permalink
BUG: wrong index name during read_csv if using usecols
Browse files Browse the repository at this point in the history
Closes #4201

If user passes usecols and not names, then ensure that the
inferred names refer to the used columns, not the document's
columns.
  • Loading branch information
guyrt committed Sep 27, 2013
1 parent 2b5e525 commit 4bef0e0
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ Bug Fixes
- Fixed wrong check for overlapping in ``DatetimeIndex.union`` (:issue:`4564`)
- Fixed conflict between thousands separator and date parser in csv_parser (:issue:`4678`)
- Fix appending when dtypes are not the same (error showing mixing float/np.datetime64) (:issue:`4993`)
- Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`)

pandas 0.12.0
-------------
Expand Down
19 changes: 15 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Module contains tools for processing files into DataFrames or other objects
"""
from __future__ import print_function
from pandas.compat import range, lrange, StringIO, lzip, zip
from pandas.compat import range, lrange, StringIO, lzip, zip, string_types
from pandas import compat
import re
import csv
Expand All @@ -15,7 +15,6 @@
import datetime
import pandas.core.common as com
from pandas.core.config import get_option
from pandas import compat
from pandas.io.date_converters import generic_parser
from pandas.io.common import get_filepath_or_buffer

Expand All @@ -24,7 +23,7 @@
import pandas.lib as lib
import pandas.tslib as tslib
import pandas.parser as _parser
from pandas.tseries.period import Period


_parser_params = """Also supports optionally iterating or breaking of the file
into chunks.
Expand Down Expand Up @@ -982,7 +981,19 @@ def __init__(self, src, **kwds):
else:
self.names = lrange(self._reader.table_width)

# XXX
# If the names were inferred (not passed by user) and usedcols is defined,
# then ensure names refers to the used columns, not the document's columns.
if self.usecols and passed_names:
col_indices = []
for u in self.usecols:
if isinstance(u, string_types):
col_indices.append(self.names.index(u))
else:
col_indices.append(u)
self.names = [n for i, n in enumerate(self.names) if i in col_indices]
if len(self.names) < len(self.usecols):
raise ValueError("Usecols do not match names.")

self._set_noconvert_columns()

self.orig_names = self.names
Expand Down
26 changes: 26 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1865,6 +1865,32 @@ def test_parse_integers_above_fp_precision(self):

self.assertTrue(np.array_equal(result['Numbers'], expected['Numbers']))

def test_usecols_index_col_conflict(self):
# Issue 4201 Test that index_col as integer reflects usecols
data = """SecId,Time,Price,P2,P3
10000,2013-5-11,100,10,1
500,2013-5-12,101,11,1
"""
expected = DataFrame({'Price': [100, 101]}, index=[datetime(2013, 5, 11), datetime(2013, 5, 12)])
expected.index.name = 'Time'

df = pd.read_csv(StringIO(data), usecols=['Time', 'Price'], parse_dates=True, index_col=0)
tm.assert_frame_equal(expected, df)

df = pd.read_csv(StringIO(data), usecols=['Time', 'Price'], parse_dates=True, index_col='Time')
tm.assert_frame_equal(expected, df)

df = pd.read_csv(StringIO(data), usecols=[1, 2], parse_dates=True, index_col='Time')
tm.assert_frame_equal(expected, df)

df = pd.read_csv(StringIO(data), usecols=[1, 2], parse_dates=True, index_col=0)
tm.assert_frame_equal(expected, df)

expected = DataFrame({'P3': [1, 1], 'Price': (100, 101), 'P2': (10, 11)})
expected = expected.set_index(['Price', 'P2'])
df = pd.read_csv(StringIO(data), usecols=['Price', 'P2', 'P3'], parse_dates=True, index_col=['Price', 'P2'])
tm.assert_frame_equal(expected, df)


class TestPythonParser(ParserTests, unittest.TestCase):

Expand Down

0 comments on commit 4bef0e0

Please sign in to comment.