Skip to content

Commit

Permalink
BUG: pandas-dev#7757 Fix CSV parsing of singleton list header
Browse files Browse the repository at this point in the history
Write test for header with len 1 that fails

Fix header list manipulation causing problems with singleton lists
  • Loading branch information
Carter Green committed Jul 27, 2017
1 parent f9a552d commit 4648a7c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 14 deletions.
31 changes: 19 additions & 12 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -535,18 +535,25 @@ cdef class TextReader:
self.parser_start = 0
self.header = []
else:
if isinstance(header, list) and len(header):
# need to artifically skip the final line
# which is still a header line
header = list(header)
header.append(header[-1] + 1)

self.parser.header_start = header[0]
self.parser.header_end = header[-1]
self.parser.header = header[0]
self.parser_start = header[-1] + 1
self.has_mi_columns = 1
self.header = header
if isinstance(header, list):
if len(header) > 1:
# need to artifically skip the final line
# which is still a header line
header = list(header)
header.append(header[-1] + 1)

self.parser.header_start = header[0]
self.parser.header_end = header[-1]
self.parser.header = header[0]
self.parser_start = header[-1] + 1
self.has_mi_columns = 1
self.header = header
else:
self.parser.header_start = header[0]
self.parser.header_end = header[0]
self.parser.header = header[0]
self.parser_start = header[0] + 1
self.header = header
else:
self.parser.header_start = header
self.parser.header_end = header
Expand Down
8 changes: 6 additions & 2 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2281,8 +2281,12 @@ def _infer_columns(self):

# we have a mi columns, so read an extra line
if isinstance(header, (list, tuple, np.ndarray)):
have_mi_columns = True
header = list(header) + [header[-1] + 1]
if len(header) > 1:
have_mi_columns = True
header = list(header) + [header[-1] + 1]
# Don't add second element to header if it's of length 1
else:
have_mi_columns = False
else:
have_mi_columns = False
header = [header]
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/io/parser/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,10 @@ def test_non_int_header(self):
self.read_csv(StringIO(data), sep=',', header=['a', 'b'])
with tm.assert_raises_regex(ValueError, msg):
self.read_csv(StringIO(data), sep=',', header='string_header')

def test_singleton_header(self):
# See GH #7757
data = """a,b,c\n0,1,2\n1,2,3"""
df = self.read_csv(StringIO(data), header=[0])
expected = pd.DataFrame({'a': [0, 1], 'b': [1, 2], 'c': [2, 3]})
tm.assert_frame_equal(df, expected)

0 comments on commit 4648a7c

Please sign in to comment.