Skip to content

Commit

Permalink
Merge pull request #9834 from evanpw/skip_rows_blank
Browse files Browse the repository at this point in the history
BUG: skiprows doesn't handle blank lines properly when engine='c'
  • Loading branch information
jreback committed Apr 8, 2015
2 parents a4ae0cf + e67893f commit 9e4e447
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 12 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,4 @@ Bug Fixes
- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)

- Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`)
- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
22 changes: 22 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,28 @@ def test_deep_skiprows(self):
condensed_data = self.read_csv(StringIO(condensed_text))
tm.assert_frame_equal(data, condensed_data)

def test_skiprows_blank(self):
# GH 9832
text = """#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
1/1/2000,1.,2.,3.
1/2/2000,4,5,6
1/3/2000,7,8,9
"""
data = self.read_csv(StringIO(text), skiprows=6, header=None,
index_col=0, parse_dates=True)

expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
columns=[1, 2, 3],
index=[datetime(2000, 1, 1), datetime(2000, 1, 2),
datetime(2000, 1, 3)])
expected.index.name = 0
tm.assert_frame_equal(data, expected)

def test_detect_string_na(self):
data = """A,B
foo,bar
Expand Down
18 changes: 6 additions & 12 deletions pandas/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,11 +757,9 @@ int tokenize_delimited(parser_t *self, size_t line_limit)
case START_RECORD:
// start of record
if (skip_this_line(self, self->file_lines)) {
self->state = SKIP_LINE;
if (c == '\n') {
END_LINE()
}
else {
self->state = SKIP_LINE;
END_LINE();
}
break;
}
Expand Down Expand Up @@ -1093,11 +1091,9 @@ int tokenize_delim_customterm(parser_t *self, size_t line_limit)
case START_RECORD:
// start of record
if (skip_this_line(self, self->file_lines)) {
self->state = SKIP_LINE;
if (c == self->lineterminator) {
END_LINE()
}
else {
self->state = SKIP_LINE;
END_LINE();
}
break;
}
Expand Down Expand Up @@ -1391,11 +1387,9 @@ int tokenize_whitespace(parser_t *self, size_t line_limit)
case START_RECORD:
// start of record
if (skip_this_line(self, self->file_lines)) {
self->state = SKIP_LINE;
if (c == '\n') {
END_LINE()
}
else {
self->state = SKIP_LINE;
END_LINE();
}
break;
} else if (c == '\n') {
Expand Down

0 comments on commit 9e4e447

Please sign in to comment.