diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index d5166b76892..44581bbc184 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -193,16 +193,18 @@ bool OpenZipArchive(zip_archive_s *dst, const uint8_t *raw, size_t len) memset(dst, 0, sizeof(zip_archive_s)); // Find the end of central directory if (len >= sizeof(zip_eocd_s) + 2) { - for (size_t i = len - sizeof(zip_eocd_s) - 2; i + sizeof(zip_eocd_s) + 2 + 0xffff >= len; i--) { + for (ptrdiff_t i = len - sizeof(zip_eocd_s) - 2; + i + sizeof(zip_eocd_s) + 2 + 0xffff >= len && i >= 0; + i--) { const zip_eocd_s *eocd = reinterpret_cast(raw + i); if (eocd->sig == 0x06054b50 && eocd->disk_id == eocd->start_disk // multi-file archives not supported && eocd->num_entries == eocd->total_entries && eocd->cdir_size >= sizeof(zip_cdfh_s) * eocd->num_entries && eocd->cdir_offset < len && - i + *reinterpret_cast(eocd + 1) <= len) { + i + *reinterpret_cast(eocd + 1) <= static_cast(len)) { const zip_cdfh_s *cdfh = reinterpret_cast(raw + eocd->cdir_offset); dst->eocd = eocd; - if (i >= sizeof(zip64_eocdl)) { + if (i >= static_cast(sizeof(zip64_eocdl))) { const zip64_eocdl *eocdl = reinterpret_cast(raw + i - sizeof(zip64_eocdl)); if (eocdl->sig == 0x07064b50) { dst->eocdl = eocdl; } diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 2bea6cc46bd..925369048cb 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -979,6 +979,22 @@ def test_csv_reader_filepath_or_buffer(tmpdir, path_or_buf, src): assert_eq(expect, got) +def test_small_zip(tmpdir): + df = pd.DataFrame( + { + "a": [1997] * 2, + "b": ["Ford"] * 2, + "c": ["Super, luxurious truck"] * 2, + } + ) + + fname = tmpdir.join("small_zip_file.zip") + df.to_csv(fname, index=False) + + got = cudf.read_csv(fname) + assert_eq(df, got) + + def test_csv_reader_carriage_return(tmpdir): rows = 1000 names = ["int_row", "int_double_row"]