Skip to content

Commit

Permalink
gzip_stream: workaround camlzip bug with 4GB+ inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
rr0gi committed Jun 14, 2024
1 parent efe0842 commit 1b2ef50
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
7 changes: 7 additions & 0 deletions dune
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
memory_gperftools
memory_jemalloc
test
test_gzip
test_httpev)
(preprocess
(per_module
Expand Down Expand Up @@ -70,6 +71,12 @@
(libraries devkit extlib extunix libevent ocamlnet_lite ounit2 unix yojson)
(modules test test_httpev))

; uses 8GB+ RAM, so do not run as part of test suite
(executable
(name test_gzip)
(libraries devkit extlib)
(modules test_gzip))

(rule
(alias runtest)
(action (run ./test.exe)))
9 changes: 6 additions & 3 deletions gzip_stream.ml
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,18 @@ let open_out ?(level = 6) oc =
out_crc = Int32.zero;
char_buffer = Bytes.create 1 }

let rec output oz buf pos len =
if pos < 0 || len < 0 || pos + len > Bytes.length buf then
let workaround_camlzip_zlib_uint32_limit = 2 * 1024 * 1024 * 1024

let rec output oz buf pos full_len =
if pos < 0 || full_len < 0 || pos + full_len > Bytes.length buf then
invalid_arg "Gzip_stream.output";
(* If output buffer is full, flush it *)
if oz.out_avail = 0 then begin
ignore (IO.really_output oz.out_chan oz.out_buffer 0 oz.out_pos);
oz.out_pos <- 0;
oz.out_avail <- Bytes.length oz.out_buffer
end;
let len = min full_len workaround_camlzip_zlib_uint32_limit in
let (_, used_in, used_out) =
try
Zlib.deflate oz.out_stream buf pos len
Expand All @@ -208,7 +211,7 @@ let rec output oz buf pos len =
oz.out_avail <- oz.out_avail - used_out;
oz.out_size <- Int32.add oz.out_size (Int32.of_int used_in);
oz.out_crc <- Zlib.update_crc oz.out_crc buf pos used_in;
if used_in < len then output oz buf (pos + used_in) (len - used_in)
if used_in < full_len then output oz buf (pos + used_in) (full_len - used_in)

let output_char oz c =
Bytes.set oz.char_buffer 0 c;
Expand Down
23 changes: 23 additions & 0 deletions test_gzip.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
open ExtLib
open Devkit

let max_u32 = 4*1024*1024*1024

let test len =
let data = String.make len 'a' in
let oc = Gzip_io.output (IO.output_string ()) in
IO.nwrite_string oc data;
let compressed = IO.close_out oc in
let ic = Gzip_io.input (IO.input_string compressed) in
let data = IO.read_all ic in
IO.close_in ic;
Memory.reclaim ();
Log.main #info "original length %d compressed length %d uncompressed length %d" len (String.length compressed) (String.length data);
if len <> String.length data then failwith @@ Printf.sprintf "test %d failed" len;
(* let io = Gzip_io.output_ch (Out_channel.open_bin "tempfile.gz") in
IO.nwrite_string io data;
IO.close_out io; *)
String.iter (fun c -> if c <> 'a' then failwith @@ Printf.sprintf "test %d failed" len) data

let () =
[ 0; 1; 1023; 1024; 1025; max_u32-1; max_u32; max_u32+1 ] |> List.iter test

0 comments on commit 1b2ef50

Please sign in to comment.