Skip to content

Commit

Permalink
fread parse error (#2708)
Browse files Browse the repository at this point in the history
At `src/core/read/fread/fread_thread_context.cc:210,` If `j<ncols` and if `*tch` reaches the end of line with the current quote rule, the while loop exits ending up in IOError. 

With the code added in this PR, one way to fix this is to decrement tch`(tch--)` to the previous `sep` and try parsing again with the next quote rule. If the number of columns in the current row is only one, decrementing tch will never hit the sep in the current row and end up running into the previous row, hence the `if` logic inside the `while` loop to prevent this.

Closes #2680
  • Loading branch information
pradkrish authored May 25, 2021
1 parent de8b0f8 commit fe64f42
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
15 changes: 15 additions & 0 deletions src/core/read/fread/fread_thread_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,21 @@ void FreadThreadContext::read_chunk(
}
parse_ctx_.target ++;
j++;
if (tch < parse_ctx_.eof && j < ncols && *tch=='\n' && sep!=' ') {
const char* prev_tch = tch;
while (*tch!=sep) {
tch--;
if (*tch=='\n') {
tch = prev_tch;
break;
}
}
if (*tch==sep) {
tch++;
++ptype_iter;
continue;
}
}
if (tch < parse_ctx_.eof && *tch==sep) { tch++; continue; }
if (fill && (tch == parse_ctx_.eof || *tch=='\n' || *tch=='\r') && j <= ncols) {
// All parsers have already stored NA to target; except for string
Expand Down
5 changes: 5 additions & 0 deletions tests/fread/test-fread-issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,11 @@ def test_issue2523():
with pytest.raises(IOError):
dt.fread("{\n \"cells\": [\n {\n\"import numpy \\n\",\n")

# Note: Realised after adding this test that it's similar to test_issue1036
def test_issue2680():
src = '1\tWild Hogs (2007)\tAdevnture\n' * 500 + '2\t"Great Performances" Cats (1998)\tMusical\n' * 500
DT = dt.fread(src, fill=True)
assert DT.to_tuples()[900] == (2, '"Great Performances" Cats (1998)', 'Musical')

def test_issue934():
DT = dt.fread("A,B,C\n1,2,3\n3,4,5\n0,0,\"moo\n\n")
Expand Down
4 changes: 3 additions & 1 deletion tests/fread/test-fread-large.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ def test_h2o3_smalldata(f):
return
if "test_pubdev3589" in f:
params["sep"] = "\n"
if "single_quotes_mixed.csv" in f or "single_quotes_with_escaped_quotes.csv" in f:
if ("single_quotes_mixed.csv" in f or
"single_quotes_with_escaped_quotes.csv" in f or
"single_quotes_with_escaped_quotes_custom_escapechar.csv" in f):
params["quotechar"] = "'"
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand Down

0 comments on commit fe64f42

Please sign in to comment.