diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5146bd35dff30..1c4effc31833f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -201,7 +201,8 @@ I/O ^^^ - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - +- Bug in :func:`read_csv` in which passing a CSV with at least one very large (i.e. more than 2^31 - 1 bytes) column along with ``low_memory=False`` would cause an integer overflow. The result was an always unsuccessful attempt to allocate an enourmous buffer and then reporting "Out of memory." (:issue:`16798`). +- Bug in :func:`read_csv` in which some errors paths were assigning error messages to the internal tokenizer's ``error_msg`` field without first allocating the memory. When this happened as part of exception handling, it resulted in a double ``free`` and the program halted due to a ``SIGSEGV`` (:issue:`16798`). - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) Plotting diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 88c695a3faf27..72e5fb7f12c7f 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -137,7 +137,7 @@ cdef extern from "parser/tokenizer.h": int64_t words_len int64_t words_cap - char *pword_start # pointer to stream start of current field + char *pword_start # pointer to stream start of current field int64_t word_start # position start of current field int64_t *line_start # position in words for start of line @@ -177,9 +177,9 @@ cdef extern from "parser/tokenizer.h": # thousands separator (comma, period) char thousands - int header # Boolean: 1: has header, 0: no header - int64_t header_start # header row start - int64_t header_end # header row end + int header # Boolean: 1: has header, 0: no header + int64_t header_start # header row start + int64_t header_end # header row end void *skipset PyObject *skipfunc