From d75728fa6957d105ee8bf3dced3c5487ba4af09b Mon Sep 17 00:00:00 2001 From: arunsrinivasan Date: Sun, 11 Oct 2015 14:30:28 +0200 Subject: [PATCH] Closes #1384. fread handles empty lines with just spaces correctly. --- README.md | 2 ++ inst/tests/tests.Rraw | 5 +++++ src/fread.c | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 63153953a5..174d9d5573 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ 12. `merge.data.table` always resets class to `c("data.table", "data.frame")` in result to be consistent with `merge.data.frame`, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks @ladida771. + 13. `fread` reads text input with empty newline but with just spaces properly, for e.g., fread('a,b\n1,2\n '), [#1384](https://github.com/Rdatatable/data.table/issues/1384). Thanks to @ladida771. + #### NOTES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 04925923fb..07692d74e0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7047,6 +7047,11 @@ test(1571, names(tstrsplit(X$a, "", fixed=TRUE, give.names=TRUE)), paste("V", 1: test(1572, fread('"abcd efgh." ijkl.\tmnop "qrst uvwx."\t45\n', quote=""), setDT(read.table(text='"abcd efgh." ijkl.\tmnop "qrst uvwx."\t45\n', sep="\t", stringsAsFactors=FALSE, quote=""))) +# Fix for #1384, fread with empty new line, initial checks failed due to extra spaces. +test(1573, fread('a,b + 1,2 + '), data.table(a=1L, b=2L)) + ########################## diff --git a/src/fread.c b/src/fread.c index 9d587270b9..696eed36ab 100644 --- a/src/fread.c +++ b/src/fread.c @@ -984,7 +984,7 @@ SEXP readfile(SEXP input, SEXP separg, SEXP nrowsarg, SEXP headerarg, SEXP nastr case 1: ch = pos + 1*(eof-pos)/3; str="+ middle"; break; case 2: ch = pos + 2*(eof-pos)/3; str="+ last"; break; // 2/3 way through rather than end ... easier } - if (j) { + if (j) { // we may have landed inside quoted field containing embedded sep and/or embedded \n // find next \n and see if 5 good lines follow. If not try next \n, and so on, until we find the real \n // We don't know which line number this is because we jumped straight to it @@ -1041,6 +1041,7 @@ SEXP readfile(SEXP input, SEXP separg, SEXP nrowsarg, SEXP headerarg, SEXP nastr } while (ch