From bda40f805ace44df932571b4de93c3d79400993b Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Sun, 29 Jan 2017 00:41:09 -0300 Subject: [PATCH] Zlib: split into Adler32, CRC32, Flate, Gzip and Zlib types Crystal provides access to the Adler32, CRC32, DEFLATE, gzip and zlib algorithms/formats. It currently does so by binding to zlib (also known as libz). However, zlib is just an implementation detail: this shouldn't leak to type and method names because if we eventually decide to change the library used to implement these, or maybe implement stuff in pure Crystal, we'll be stuck with this name. So, here we split the contents of the Zlib module into: - Adler32, for the Adler32 checksum algorithm - CRC32, for the CRC32 checksum algorithm - Flate: for the DEFLATE compression format (RFC 1951), providing Reader and Writer types (Inflate and Deflate could also work, but Reader and Writer are more obvious and consistent.) Flate is also the name used by Go to provide the same functionality, so it will be familiar to some. - Gzip: for the gzip archive format (RFC 1952), which is just a small wrapper (header and checksum) around the DEFLATE format. Reader and Writer are provided, together with access to the first gzip header. - Zlib: for the zlib archive format (RFC 1950), which is just a small wrapper around the DEFLATE format too (here the format is also named the same as the C library, which brings a lot of confusion). Reader and Writer are provided. By doing this we also remove the need to know how to use the zlib C library, which requires users to provide a cryptic `windowBits` argument to choose the desierd format. Finally, we rename HTTP::DeflateHandler to HTTP::CompressHandler because that's what it does: it compress responses in either gzip or DEFLATE, but not always DEFLATE (so the name was misleading). All of this is a big breaking change, but should be easy to upgrade existing code and makes the standard library more consistent and organized. --- spec/std/adler32_spec.cr | 16 ++ spec/std/crc32_spec.cr | 16 ++ spec/std/flate/flate_spec.cr | 47 ++++++ spec/std/gzip/gzip_spec.cr | 46 ++++++ ...ndler_spec.cr => compress_handler_spec.cr} | 12 +- spec/std/http/server/server_spec.cr | 4 +- spec/std/zip/zip_spec.cr | 6 +- spec/std/zlib/deflate_spec.cr | 65 --------- spec/std/zlib/inflate_spec.cr | 69 --------- spec/std/zlib/reader_spec.cr | 69 +++++++++ spec/std/zlib/stress_spec.cr | 24 +-- spec/std/zlib/writer_spec.cr | 71 +++++++++ spec/std/zlib/zlib_spec.cr | 28 ---- src/adler32/adler32.cr | 27 ++++ src/crc32/crc32.cr | 27 ++++ src/docs_main.cr | 5 + src/flate/flate.cr | 33 +++++ src/flate/reader.cr | 138 ++++++++++++++++++ src/flate/writer.cr | 97 ++++++++++++ src/gzip/gzip.cr | 20 +++ src/gzip/header.cr | 103 +++++++++++++ src/gzip/reader.cr | 138 ++++++++++++++++++ src/gzip/writer.cr | 114 +++++++++++++++ src/http/common.cr | 7 +- src/http/server.cr | 2 +- ...deflate_handler.cr => compress_handler.cr} | 9 +- src/{zlib/lib_zlib.cr => lib_z/lib_z.cr} | 63 +------- src/zip/checksum_reader.cr | 9 +- src/zip/checksum_writer.cr | 6 +- src/zip/file_info.cr | 5 +- src/zip/reader.cr | 16 +- src/zip/writer.cr | 2 +- src/zip/zip.cr | 3 +- src/zlib.cr | 55 ------- src/zlib/deflate.cr | 132 ----------------- src/zlib/inflate.cr | 132 ----------------- src/zlib/reader.cr | 99 +++++++++++++ src/zlib/writer.cr | 120 +++++++++++++++ src/zlib/zlib.cr | 15 ++ 39 files changed, 1266 insertions(+), 584 deletions(-) create mode 100644 spec/std/adler32_spec.cr create mode 100644 spec/std/crc32_spec.cr create mode 100644 spec/std/flate/flate_spec.cr create mode 100644 spec/std/gzip/gzip_spec.cr rename spec/std/http/server/handlers/{deflate_handler_spec.cr => compress_handler_spec.cr} (89%) delete mode 100644 spec/std/zlib/deflate_spec.cr delete mode 100644 spec/std/zlib/inflate_spec.cr create mode 100644 spec/std/zlib/reader_spec.cr create mode 100644 spec/std/zlib/writer_spec.cr delete mode 100644 spec/std/zlib/zlib_spec.cr create mode 100644 src/adler32/adler32.cr create mode 100644 src/crc32/crc32.cr create mode 100644 src/flate/flate.cr create mode 100644 src/flate/reader.cr create mode 100644 src/flate/writer.cr create mode 100644 src/gzip/gzip.cr create mode 100644 src/gzip/header.cr create mode 100644 src/gzip/reader.cr create mode 100644 src/gzip/writer.cr rename src/http/server/handlers/{deflate_handler.cr => compress_handler.cr} (77%) rename src/{zlib/lib_zlib.cr => lib_z/lib_z.cr} (55%) delete mode 100644 src/zlib.cr delete mode 100644 src/zlib/deflate.cr delete mode 100644 src/zlib/inflate.cr create mode 100644 src/zlib/reader.cr create mode 100644 src/zlib/writer.cr create mode 100644 src/zlib/zlib.cr diff --git a/spec/std/adler32_spec.cr b/spec/std/adler32_spec.cr new file mode 100644 index 000000000000..450ee398d3d8 --- /dev/null +++ b/spec/std/adler32_spec.cr @@ -0,0 +1,16 @@ +require "spec" +require "adler32" + +describe Adler32 do + it "should be able to calculate adler32" do + adler = Adler32.checksum("foo").to_s(16) + adler.should eq("2820145") + end + + it "should be able to calculate adler32 combined" do + adler1 = Adler32.checksum("hello") + adler2 = Adler32.checksum(" world!") + combined = Adler32.combine(adler1, adler2, " world!".size) + Adler32.checksum("hello world!").should eq(combined) + end +end diff --git a/spec/std/crc32_spec.cr b/spec/std/crc32_spec.cr new file mode 100644 index 000000000000..cfc5ef1b0853 --- /dev/null +++ b/spec/std/crc32_spec.cr @@ -0,0 +1,16 @@ +require "spec" +require "crc32" + +describe CRC32 do + it "should be able to calculate crc32" do + crc = CRC32.checksum("foo").to_s(16) + crc.should eq("8c736521") + end + + it "should be able to calculate crc32 combined" do + crc1 = CRC32.checksum("hello") + crc2 = CRC32.checksum(" world!") + combined = CRC32.combine(crc1, crc2, " world!".size) + CRC32.checksum("hello world!").should eq(combined) + end +end diff --git a/spec/std/flate/flate_spec.cr b/spec/std/flate/flate_spec.cr new file mode 100644 index 000000000000..43984e8a0232 --- /dev/null +++ b/spec/std/flate/flate_spec.cr @@ -0,0 +1,47 @@ +require "spec" +require "flate" + +module Flate + describe Writer do + it "should be able to write" do + message = "this is a test string !!!!\n" + io = IO::Memory.new + writer = Writer.new(io) + writer.print message + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq(message) + end + + it "can be closed without sync" do + io = IO::Memory.new + writer = Writer.new(io) + writer.close + writer.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + writer.print "a" + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new + writer = Writer.new(io, sync_close: true) + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new + writer = Writer.new(io) + writer.sync_close = true + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + end +end diff --git a/spec/std/gzip/gzip_spec.cr b/spec/std/gzip/gzip_spec.cr new file mode 100644 index 000000000000..750fd91a6c0f --- /dev/null +++ b/spec/std/gzip/gzip_spec.cr @@ -0,0 +1,46 @@ +require "spec" +require "gzip" + +describe Gzip do + it "writes and reads to memory" do + io = IO::Memory.new + + time = Time.new(2016, 1, 2) + os = 4_u8 + extra = Bytes[1, 2, 3] + name = "foo.txt" + comment = "some comment" + contents = "hello world" + + Gzip::Writer.open(io) do |gzip| + header = gzip.header + header.modification_time = time + header.os = os + header.extra = extra + header.name = name + header.comment = comment + + io.bytesize.should eq(0) + gzip.flush + io.bytesize.should_not eq(0) + + gzip.print contents + end + + io.rewind + + Gzip::Reader.open(io) do |gzip| + header = gzip.header.not_nil! + header.modification_time.should eq(time) + header.os.should eq(os) + header.extra.should eq(extra) + header.name.should eq(name) + header.comment.should eq(comment) + + # Reading zero bytes is OK + gzip.read(Bytes.empty).should eq(0) + + gzip.gets_to_end.should eq(contents) + end + end +end diff --git a/spec/std/http/server/handlers/deflate_handler_spec.cr b/spec/std/http/server/handlers/compress_handler_spec.cr similarity index 89% rename from spec/std/http/server/handlers/deflate_handler_spec.cr rename to spec/std/http/server/handlers/compress_handler_spec.cr index 60e2765d045d..a676708c7181 100644 --- a/spec/std/http/server/handlers/deflate_handler_spec.cr +++ b/spec/std/http/server/handlers/compress_handler_spec.cr @@ -1,14 +1,14 @@ require "spec" require "http/server" -describe HTTP::DeflateHandler do +describe HTTP::CompressHandler do it "doesn't deflates if doesn't have 'deflate' in Accept-Encoding header" do io = IO::Memory.new request = HTTP::Request.new("GET", "/") response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -27,7 +27,7 @@ describe HTTP::DeflateHandler do response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -39,7 +39,7 @@ describe HTTP::DeflateHandler do body = response2.body io2 = IO::Memory.new - deflate = Zlib::Deflate.new(io2) + deflate = Flate::Writer.new(io2) deflate.print "Hello" deflate.close io2.rewind @@ -55,7 +55,7 @@ describe HTTP::DeflateHandler do response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -67,7 +67,7 @@ describe HTTP::DeflateHandler do body = response2.body io2 = IO::Memory.new - deflate = Zlib::Deflate.gzip(io2) + deflate = Gzip::Writer.new(io2) deflate.print "Hello" deflate.close io2.rewind diff --git a/spec/std/http/server/server_spec.cr b/spec/std/http/server/server_spec.cr index 682467c1fb80..236ce34ad21c 100644 --- a/spec/std/http/server/server_spec.cr +++ b/spec/std/http/server/server_spec.cr @@ -310,7 +310,7 @@ module HTTP server = Server.new("0.0.0.0", 0, [ ErrorHandler.new, LogHandler.new, - DeflateHandler.new, + CompressHandler.new, StaticFileHandler.new("."), ] ) @@ -329,7 +329,7 @@ module HTTP server = Server.new(0, [ ErrorHandler.new, LogHandler.new, - DeflateHandler.new, + CompressHandler.new, StaticFileHandler.new("."), ] ) diff --git a/spec/std/zip/zip_spec.cr b/spec/std/zip/zip_spec.cr index e9af41929b20..051c5cb12743 100644 --- a/spec/std/zip/zip_spec.cr +++ b/spec/std/zip/zip_spec.cr @@ -59,19 +59,19 @@ describe Zip do io = IO::Memory.new text = "contents of foo" - crc32 = Zlib.crc32(text) + crc32 = CRC32.checksum(text) Zip::Writer.open(io) do |zip| entry = Zip::Writer::Entry.new("foo.txt") entry.compression_method = Zip::CompressionMethod::STORED - entry.crc32 = crc32.to_u32 + entry.crc32 = crc32 entry.compressed_size = text.bytesize.to_u32 entry.uncompressed_size = text.bytesize.to_u32 zip.add entry, &.print(text) entry = Zip::Writer::Entry.new("bar.txt") entry.compression_method = Zip::CompressionMethod::STORED - entry.crc32 = crc32.to_u32 + entry.crc32 = crc32 entry.compressed_size = text.bytesize.to_u32 entry.uncompressed_size = text.bytesize.to_u32 zip.add entry, &.print(text) diff --git a/spec/std/zlib/deflate_spec.cr b/spec/std/zlib/deflate_spec.cr deleted file mode 100644 index 809cce556839..000000000000 --- a/spec/std/zlib/deflate_spec.cr +++ /dev/null @@ -1,65 +0,0 @@ -require "spec" -require "zlib" - -module Zlib - describe Deflate do - it "should be able to deflate" do - message = "this is a test string !!!!\n" - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print message - deflate.close - - io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(message) - end - - it "can be closed without sync" do - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.close - deflate.closed?.should be_true - io.closed?.should be_false - - expect_raises IO::Error, "closed stream" do - deflate.print "a" - end - end - - it "can be closed with sync (1)" do - io = IO::Memory.new - deflate = Deflate.new(io, sync_close: true) - deflate.close - deflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be closed with sync (2)" do - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.sync_close = true - deflate.close - deflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be flushed" do - io = IO::Memory.new - deflate = Deflate.new(io) - - deflate.print "this" - io.to_slice.hexstring.should eq("789c") - - deflate.flush - (io.to_slice.hexstring.size > 4).should be_true - - deflate.print " is a test string !!!!\n" - deflate.close - - io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq("this is a test string !!!!\n") - end - end -end diff --git a/spec/std/zlib/inflate_spec.cr b/spec/std/zlib/inflate_spec.cr deleted file mode 100644 index b5e7d9153e6a..000000000000 --- a/spec/std/zlib/inflate_spec.cr +++ /dev/null @@ -1,69 +0,0 @@ -require "spec" -require "zlib" - -module Zlib - describe Inflate do - it "should be able to inflate" do - io = IO::Memory.new - "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| - io.write_byte match[0].to_u8(16) - end - io.rewind - - inflate = Inflate.new(io) - - str = String::Builder.build do |builder| - IO.copy(inflate, builder) - end - - str.should eq("this is a test string !!!!\n") - inflate.read(Bytes.new(10)).should eq(0) - end - - it "can be closed without sync" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.close - inflate.closed?.should be_true - io.closed?.should be_false - - expect_raises IO::Error, "closed stream" do - inflate.gets - end - end - - it "can be closed with sync (1)" do - io = IO::Memory.new("") - inflate = Inflate.new(io, sync_close: true) - inflate.close - inflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be closed with sync (2)" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.sync_close = true - inflate.close - inflate.closed?.should be_true - io.closed?.should be_true - end - - it "should not inflate from empty stream" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.read_byte.should be_nil - end - - it "should not freeze when reading empty slice" do - io = IO::Memory.new - "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| - io.write_byte match[0].to_u8(16) - end - io.rewind - inflate = Inflate.new(io) - slice = Bytes.new(0) - inflate.read(slice).should eq(0) - end - end -end diff --git a/spec/std/zlib/reader_spec.cr b/spec/std/zlib/reader_spec.cr new file mode 100644 index 000000000000..e5633616c830 --- /dev/null +++ b/spec/std/zlib/reader_spec.cr @@ -0,0 +1,69 @@ +require "spec" +require "zlib" + +module Zlib + describe Reader do + it "should be able to read" do + io = IO::Memory.new + "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| + io.write_byte match[0].to_u8(16) + end + io.rewind + + reader = Reader.new(io) + + str = String::Builder.build do |builder| + IO.copy(reader, builder) + end + + str.should eq("this is a test string !!!!\n") + reader.read(Bytes.new(10)).should eq(0) + end + + it "can be closed without sync" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.close + reader.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + reader.gets + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io, sync_close: true) + reader.close + reader.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.sync_close = true + reader.close + reader.closed?.should be_true + io.closed?.should be_true + end + + it "should not read from empty stream" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.read_byte.should be_nil + end + + it "should not freeze when reading empty slice" do + io = IO::Memory.new + "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| + io.write_byte match[0].to_u8(16) + end + io.rewind + reader = Reader.new(io) + slice = Bytes.empty + reader.read(slice).should eq(0) + end + end +end diff --git a/spec/std/zlib/stress_spec.cr b/spec/std/zlib/stress_spec.cr index 9401d8191138..dd0005c8b55e 100644 --- a/spec/std/zlib/stress_spec.cr +++ b/spec/std/zlib/stress_spec.cr @@ -3,34 +3,34 @@ require "zlib" module Zlib describe Zlib do - it "inflate deflate should be inverse with random string" do + it "write read should be inverse with random string" do expected = String.build do |io| 1_000_000.times { rand(2000).to_i.to_s(32, io) } end io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print expected - deflate.close + writer = Writer.new(io) + writer.print expected + writer.close io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(expected) + reader = Reader.new(io) + reader.gets_to_end.should eq(expected) end - it "inflate deflate should be inverse (utf-8)" do + it "write read should be inverse (utf-8)" do expected = "日本さん語日本さん語" io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print expected - deflate.close + writer = Writer.new(io) + writer.print expected + writer.close io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(expected) + reader = Reader.new(io) + reader.gets_to_end.should eq(expected) end end end diff --git a/spec/std/zlib/writer_spec.cr b/spec/std/zlib/writer_spec.cr new file mode 100644 index 000000000000..ec3be134d662 --- /dev/null +++ b/spec/std/zlib/writer_spec.cr @@ -0,0 +1,71 @@ +require "spec" +require "zlib" + +module Zlib + describe Writer do + it "should be able to write" do + message = "this is a test string !!!!\n" + io = IO::Memory.new + + writer = Writer.new(io) + + io.bytesize.should eq(0) + writer.flush + io.bytesize.should_not eq(0) + + writer.print message + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq(message) + end + + it "can be closed without sync" do + io = IO::Memory.new + writer = Writer.new(io) + writer.close + writer.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + writer.print "a" + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new + writer = Writer.new(io, sync_close: true) + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new + writer = Writer.new(io) + writer.sync_close = true + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be flushed" do + io = IO::Memory.new + writer = Writer.new(io) + + writer.print "this" + io.to_slice.hexstring.should eq("789c") + + writer.flush + (io.to_slice.hexstring.size > 4).should be_true + + writer.print " is a test string !!!!\n" + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq("this is a test string !!!!\n") + end + end +end diff --git a/spec/std/zlib/zlib_spec.cr b/spec/std/zlib/zlib_spec.cr deleted file mode 100644 index 8de3faec1d09..000000000000 --- a/spec/std/zlib/zlib_spec.cr +++ /dev/null @@ -1,28 +0,0 @@ -require "spec" -require "zlib" - -describe Zlib do - it "should be able to calculate adler32" do - adler = Zlib.adler32("foo").to_s(16) - adler.should eq("2820145") - end - - it "should be able to calculate adler32 combined" do - adler1 = Zlib.adler32("hello") - adler2 = Zlib.adler32(" world!") - combined = Zlib.adler32_combine(adler1, adler2, " world!".size) - Zlib.adler32("hello world!").should eq(combined) - end - - it "should be able to calculate crc32" do - crc = Zlib.crc32("foo").to_s(16) - crc.should eq("8c736521") - end - - it "should be able to calculate crc32 combined" do - crc1 = Zlib.crc32("hello") - crc2 = Zlib.crc32(" world!") - combined = Zlib.crc32_combine(crc1, crc2, " world!".size) - Zlib.crc32("hello world!").should eq(combined) - end -end diff --git a/src/adler32/adler32.cr b/src/adler32/adler32.cr new file mode 100644 index 000000000000..7bca2d16ecdd --- /dev/null +++ b/src/adler32/adler32.cr @@ -0,0 +1,27 @@ +require "lib_z" + +module Adler32 + def self.initial : UInt32 + LibZ.adler32(0, nil, 0).to_u32 + end + + def self.checksum(slice : Bytes) : UInt32 + update(slice, initial) + end + + def self.checksum(string : String) : UInt32 + checksum(string.to_slice) + end + + def self.update(slice : Bytes, adler32 : UInt32) : UInt32 + LibZ.adler32(adler32, slice, slice.size).to_u32 + end + + def self.update(string : String, adler32 : UInt32) : UInt32 + update(string.to_slice, adler32) + end + + def self.combine(adler1 : UInt32, adler2 : UInt32, len) : UInt32 + LibZ.adler32_combine(adler1, adler2, len).to_u32 + end +end diff --git a/src/crc32/crc32.cr b/src/crc32/crc32.cr new file mode 100644 index 000000000000..89b1b4f49cbc --- /dev/null +++ b/src/crc32/crc32.cr @@ -0,0 +1,27 @@ +require "lib_z" + +module CRC32 + def self.initial : UInt32 + LibZ.crc32(0, nil, 0).to_u32 + end + + def self.checksum(slice : Bytes) : UInt32 + update(slice, initial) + end + + def self.checksum(string : String) : UInt32 + checksum(string.to_slice) + end + + def self.update(slice : Bytes, crc32 : UInt32) : UInt32 + LibZ.crc32(crc32, slice, slice.size).to_u32 + end + + def self.update(string : String, crc32 : UInt32) : UInt32 + update(string.to_slice, crc32) + end + + def self.combine(crc1 : UInt32, crc2 : UInt32, len) : UInt32 + LibZ.crc32_combine(crc1, crc2, len).to_u32 + end +end diff --git a/src/docs_main.cr b/src/docs_main.cr index e537cfb75d67..e1a42fa3abee 100644 --- a/src/docs_main.cr +++ b/src/docs_main.cr @@ -32,14 +32,18 @@ require "./thread" require "./xml" require "./yaml" require "./benchmark" +require "./adler32" require "./array" require "./bit_array" require "./box" require "./colorize" require "./complex" +require "./crc32" require "./deque" require "./dl" require "./file_utils" +require "./flate" +require "./gzip" require "./ini" require "./levenshtein" require "./option_parser" @@ -52,3 +56,4 @@ require "./string_scanner" require "./tempfile" require "./uri" require "./zip" +require "./zlib" diff --git a/src/flate/flate.cr b/src/flate/flate.cr new file mode 100644 index 000000000000..27550952cbf0 --- /dev/null +++ b/src/flate/flate.cr @@ -0,0 +1,33 @@ +require "lib_z" +require "./*" + +# The Flate module contains readers and writers of DEFLATE format compressed +# data, as specified in [RFC 1951](https://www.ietf.org/rfc/rfc1951.txt). +# +# See `Gzip`, `Zip` and `Zlib` for modules that provide access +# to DEFLATE-based file formats. +module Flate + NO_COMPRESSION = 0 + BEST_SPEED = 1 + BEST_COMPRESSION = 9 + DEFAULT_COMPRESSION = -1 + + enum Strategy + FILTERED = 1 + HUFFMAN_ONLY = 2 + RLE = 3 + FIXED = 4 + DEFAULT = 0 + end + + class Error < Exception + def initialize(ret, stream) + if msg = stream.msg + error_msg = String.new(msg) + super("flate: #{error_msg} #{ret}") + else + super("flate: #{ret}") + end + end + end +end diff --git a/src/flate/reader.cr b/src/flate/reader.cr new file mode 100644 index 000000000000..d3e44e6fe711 --- /dev/null +++ b/src/flate/reader.cr @@ -0,0 +1,138 @@ +# A read-only `IO` object to decompress data in the DEFLATE format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +class Flate::Reader + include IO + + # If `#sync_close?` is `true`, closing this IO will close the underlying IO. + property? sync_close : Bool + + # Returns `true` if this reader is closed. + getter? closed = false + + # Peeked bytes from the underlying IO + @peek : Bytes? + + # Creates an instance of Flate::Reader. + def initialize(@io : IO, @sync_close : Bool = false, @dict : Bytes? = nil) + @buf = uninitialized UInt8[1] # input buffer used by zlib + @stream = LibZ::ZStream.new + @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } + @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } + ret = LibZ.inflateInit2(pointerof(@stream), -LibZ::MAX_BITS, LibZ.zlibVersion, sizeof(LibZ::ZStream)) + if ret != LibZ::Error::OK + raise Flate::Error.new(ret, @stream) + end + + @end = false + end + + # Creates an instance of Flate::Reader, yields it to the given block, and closes + # it at its end. + def self.new(input : IO, sync_close : Bool = false, dict : Bytes? = nil) + reader = new input, sync_close: sync_close, dict: dict + yield reader ensure reader.close + end + + # Creates an instance of Flate::Reader for the gzip format. + # has written. + def self.gzip(input, sync_close : Bool = false) : self + new input, wbits: GZIP, sync_close: sync_close + end + + # Creates an instance of Flate::Reader for the gzip format, yields it to the given block, and closes + # it at its end. + def self.gzip(input, sync_close : Bool = false) + reader = gzip input, sync_close: sync_close + yield reader ensure reader.close + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) + raise IO::Error.new "can't write to Flate::Reader" + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + return 0 if @end + + while true + if @stream.avail_in == 0 + # Try to peek into the underlying IO, so we can feed more + # data into zlib + @peek = @io.peek + if peek = @peek + @stream.next_in = peek + @stream.avail_in = peek.size + else + # If peeking is not possible, we are cautious and + # read byte per byte to avoid reading more data beyond + # the compressed data (for example, if the compressed stream + # is part of a zip/gzip file). + @stream.next_in = @buf.to_unsafe + @stream.avail_in = @io.read(@buf.to_slice).to_u32 + end + return 0 if @stream.avail_in == 0 + end + + old_avail_in = @stream.avail_in + + @stream.avail_out = slice.size.to_u32 + @stream.next_out = slice.to_unsafe + + ret = LibZ.inflate(pointerof(@stream), LibZ::Flush::NO_FLUSH) + read_bytes = slice.size - @stream.avail_out + + # If we were able to peek, skip the used bytes in the underlying IO + avail_in_diff = old_avail_in - @stream.avail_in + if @peek && avail_in_diff > 0 + @io.skip(avail_in_diff) + end + + case ret + when LibZ::Error::NEED_DICT + if dict = @dict + ret = LibZ.inflateSetDictionary(pointerof(@stream), dict, dict.size) + next if ret == LibZ::Error::OK + end + + raise Flate::Error.new(ret, @stream) + when LibZ::Error::DATA_ERROR, + LibZ::Error::MEM_ERROR + raise Flate::Error.new(ret, @stream) + when LibZ::Error::STREAM_END + @end = true + return read_bytes + else + # LibZ.inflate might not write any data to the output slice because + # it might need more input. We can know this happened because *ret* + # is not STREAM_END. + if read_bytes == 0 + next + else + return read_bytes + end + end + end + end + + # Closes this reader. + def close + return if @closed + @closed = true + + LibZ.inflateEnd(pointerof(@stream)) + + @io.close if @sync_close + end + + # :nodoc: + def inspect(io) + to_s(io) + end +end diff --git a/src/flate/writer.cr b/src/flate/writer.cr new file mode 100644 index 000000000000..7e9d8b76f844 --- /dev/null +++ b/src/flate/writer.cr @@ -0,0 +1,97 @@ +# A write-only `IO` object to compress data in the DEFLATE format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Flate::Writer instance. +class Flate::Writer + include IO + + # If `#sync_close?` is `true`, closing this IO will close the underlying IO. + property? sync_close : Bool + + # Creates an instance of Flate::Writer. `close` must be invoked after all data + # has written. + def initialize(@output : IO, level : Int32 = Flate::DEFAULT_COMPRESSION, + strategy : Flate::Strategy = Flate::Strategy::DEFAULT, + @sync_close : Bool = false, @dict : Bytes? = nil) + unless -1 <= level <= 9 + raise ArgumentError.new("invalid Flate level: #{level} (must be in -1..9)") + end + + @buf = uninitialized UInt8[8192] # output buffer used by zlib + @stream = LibZ::ZStream.new + @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } + @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } + @closed = false + ret = LibZ.deflateInit2(pointerof(@stream), level, LibZ::Z_DEFLATED, -LibZ::MAX_BITS, LibZ::DEF_MEM_LEVEL, + strategy.value, LibZ.zlibVersion, sizeof(LibZ::ZStream)) + if ret != LibZ::Error::OK + raise Flate::Error.new(ret, @stream) + end + end + + # Creates an instance of Flate::Writer, yields it to the given block, and closes + # it at its end. + def self.new(output : IO, level : Int32 = Flate::DEFAULT_COMPRESSION, + strategy : Flate::Strategy = Flate::Strategy::DEFAULT, + sync_close : Bool = false, dict : Bytes? = nil) + writer = new(output, level: level, strategy: strategy, sync_close: sync_close, dict: dict) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise "can't read from Flate::Writer" + end + + # See `IO#write`. + def write(slice : Bytes) + check_open + + @stream.avail_in = slice.size + @stream.next_in = slice + consume_output LibZ::Flush::NO_FLUSH + end + + # See `IO#flush`. + def flush + return if @closed + + consume_output LibZ::Flush::SYNC_FLUSH + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + @stream.avail_in = 0 + @stream.next_in = Pointer(UInt8).null + consume_output LibZ::Flush::FINISH + LibZ.deflateEnd(pointerof(@stream)) + + @output.close if @sync_close + end + + # Returns `true` if this IO is closed. + def closed? + @closed + end + + # :nodoc: + def inspect(io) + to_s(io) + end + + private def consume_output(flush) + loop do + @stream.next_out = @buf.to_unsafe + @stream.avail_out = @buf.size.to_u32 + LibZ.deflate(pointerof(@stream), flush) # no bad return value + @output.write(@buf.to_slice[0, @buf.size - @stream.avail_out]) + break if @stream.avail_out != 0 + end + end +end diff --git a/src/gzip/gzip.cr b/src/gzip/gzip.cr new file mode 100644 index 000000000000..ff492d3080c7 --- /dev/null +++ b/src/gzip/gzip.cr @@ -0,0 +1,20 @@ +require "flate" +require "crc32" + +# The Gzip module contains readers and writers of gzip format compressed +# data, as specified in [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt). +module Gzip + NO_COMPRESSION = Flate::NO_COMPRESSION + BEST_SPEED = Flate::BEST_SPEED + BEST_COMPRESSION = Flate::BEST_COMPRESSION + DEFAULT_COMPRESSION = Flate::DEFAULT_COMPRESSION + + private ID1 = 0x1f_u8 + private ID2 = 0x8b_u8 + private DEFLATE = 8_u8 + + class Error < Exception + end +end + +require "./*" diff --git a/src/gzip/header.cr b/src/gzip/header.cr new file mode 100644 index 000000000000..25c399e707ec --- /dev/null +++ b/src/gzip/header.cr @@ -0,0 +1,103 @@ +# A header in a gzip stream. +class Gzip::Header + property modification_time : Time + property os : UInt8 + property extra = Bytes.empty + property name : String? + property comment : String? + + # :nodoc: + @[Flags] + enum Flg : UInt8 + TEXT + HCRC + EXTRA + NAME + COMMENT + end + + # :nodoc: + def initialize + @modification_time = Time.new + @os = 255_u8 # Unknown + end + + # :nodoc: + def initialize(first_byte : UInt8, io : IO) + header = uninitialized UInt8[10] + header[0] = first_byte + io.read_fully(header.to_slice + 1) + + if header[0] != ID1 || header[1] != ID2 || header[2] != DEFLATE + raise Error.new("invalid gzip header") + end + + flg = Flg.new(header[3]) + + seconds = IO::ByteFormat::LittleEndian.decode(Int32, header.to_slice[4, 4]) + @modification_time = Time.epoch(seconds).to_local + + xfl = header[8] + @os = header[9] + + if flg.extra? + xlen = io.read_byte.not_nil! + @extra = Bytes.new(xlen) + io.read_fully(@extra) + end + + if flg.name? + @name = io.gets('\0', chomp: true) + end + + if flg.comment? + @comment = io.gets('\0', chomp: true) + end + + if flg.hcrc? + crc16 = io.read_bytes(UInt16, IO::ByteFormat::LittleEndian) + # TODO check crc16 + end + end + + # :nodoc: + def to_io(io) + # header + io.write_byte ID1 + io.write_byte ID2 + + # compression method + io.write_byte DEFLATE + + # flg + flg = Flg::None + flg |= Flg::EXTRA if @extra + flg |= Flg::NAME if @name + flg |= Flg::COMMENT if @comment + io.write_byte flg.value + + # time + io.write_bytes(modification_time.epoch.to_u32, IO::ByteFormat::LittleEndian) + + # xfl + io.write_byte 0_u8 + + # os + io.write_byte os + + if extra = @extra + io.write_byte extra.size.to_u8 + io.write(extra) + end + + if name = @name + io << name + io.write_byte 0_u8 + end + + if comment = @comment + io << comment + io.write_byte 0_u8 + end + end +end diff --git a/src/gzip/reader.cr b/src/gzip/reader.cr new file mode 100644 index 000000000000..80982431bfc0 --- /dev/null +++ b/src/gzip/reader.cr @@ -0,0 +1,138 @@ +# A read-only `IO` object to decompress data in the gzip format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +# +# NOTE: A gzip stream can contain zero or more members. If it contains +# no members, `header` will be `nil`. If it contains one or more +# members, only the first header will be recorded here. This is +# because gzipping multiple members is not common as one usually +# combines gzip with tar. If, however, multiple members are present +# then reading from this reader will return the concatenation of +# all the members. +# +# ### Example: decompress a gzip file +# +# ``` +# require "gzip" +# +# File.write("file.gzip", Bytes[31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 75, 76, 74, 6, 0, 194, 65, 36, 53, 3, 0, 0, 0]) +# +# string = File.open("file.gzip") do |file| +# Gzip::Reader.open(file) do |gzip| +# gzip.gets_to_end +# end +# end +# string # => "abc" +# ``` +class Gzip::Reader + include IO + + # Whether to close the enclosed `IO` when closing this reader. + property? sync_close = false + + # Returns `true` if this reader is closed. + getter? closed = false + + # Returns the first header in the gzip stream, if any. + getter header : Header? + + @flate_io : Flate::Reader? + + # Creates a new reader from the given *io*. + def initialize(@io : IO, @sync_close = false) + @crc32 = CRC32.initial # CRC32 of written data + @isize = 0_u32 # Total size of written data + + first_byte = @io.read_byte + + # A gzip file could be empty (have no members), so + # we account for that case + return unless first_byte + + @header = Header.new(first_byte, @io) + @flate_io = Flate::Reader.new(@io) + end + + # Creates a new reader from the given *filename*. + def self.new(filename : String) + new(::File.new(filename), sync_close: true) + end + + # Creates a new reader from the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, sync_close = false) + reader = new(io, sync_close: sync_close) + yield reader ensure reader.close + end + + # Creates a new reader from the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String) + reader = new(filename) + yield reader ensure reader.close + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + + while true + flate_io = @flate_io + return 0 unless flate_io + + read_bytes = flate_io.read(slice) + if read_bytes == 0 + crc32 = @io.read_bytes(UInt32, IO::ByteFormat::LittleEndian) + isize = @io.read_bytes(UInt32, IO::ByteFormat::LittleEndian) + + if crc32 != @crc32 + raise Gzip::Error.new("CRC32 checksum mismatch") + end + + if isize != @isize + raise Gzip::Error.new("isize mismatch") + end + + # Reset checksum and total size for next entry + @crc32 = CRC32.initial + @isize = 0_u32 + + # Check if another header with data comes + first_byte = @io.read_byte + if first_byte + Header.new(first_byte, @io) + @flate_io = Flate::Reader.new(@io) + else + @flate_io = nil + break + end + else + # Update CRC32 and total data size + @crc32 = CRC32.update(slice[0, read_bytes], @crc32) + @isize += read_bytes + + break + end + end + + read_bytes + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) : Nil + raise IO::Error.new("can't write to Gzip::Reader") + end + + # Closes this reader. + def close + return if @closed + @closed = true + + @flate_io.try &.close + @io.close if @sync_close + end +end diff --git a/src/gzip/writer.cr b/src/gzip/writer.cr new file mode 100644 index 000000000000..3f2ea3bea3ca --- /dev/null +++ b/src/gzip/writer.cr @@ -0,0 +1,114 @@ +# A write-only `IO` object to compress data in the gzip format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Gzip::Writer instance. +# +# ### Example: compress a file +# +# ``` +# require "zlib" +# +# File.write("file.txt", "abc") +# +# File.open("./file.txt", "r") do |input_file| +# File.open("./file.gzip", "w") do |output_file| +# Gzip::Writre.open(output_file) do |gzip| +# IO.copy(input_file, gzip) +# end +# end +# end +# ``` +class Gzip::Writer + include IO + + # Whether to close the enclosed `IO` when closing this writer. + property? sync_close = false + + # Returns `true` if this writer is closed. + getter? closed = false + + # The header to write to the gzip stream. It will be + # written just before the first write to this writer. + # Changes to the header after the first write are + # ignored. + getter header = Header.new + + # Creates a new writer to the given *io*. + def initialize(@io : IO, @level = Gzip::DEFAULT_COMPRESSION, @sync_close = false) + @crc32 = CRC32.initial # CRC32 of written data + @isize = 0 # Total size of written data + end + + # Creates a new writer to the given *filename*. + def self.new(filename : String, level = Gzip::DEFAULT_COMPRESSION) + new(::File.new(filename, "w"), level: level, sync_close: true) + end + + # Creates a new writer to the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, level = Gzip::DEFAULT_COMPRESSION, sync_close = false) + writer = new(io, level: level, sync_close: sync_close) + yield writer ensure writer.close + end + + # Creates a new writer to the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String, level = Gzip::DEFAULT_COMPRESSION) + writer = new(filename, level: level) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise IO::Error.new("can't read from Gzip::Writer") + end + + # See `IO#write`. + def write(slice : Bytes) : Nil + check_open + + flate_io = write_header + flate_io.write(slice) + + # Update CRC32 and total data size + @crc32 = CRC32.update(slice, @crc32) + @isize += slice.size + end + + # Flushes data, forcing writing the gzip header if no + # data has been written yet. + # + # See `IO#flush`. + def flush + check_open + + flate_io = write_header + flate_io.flush + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + flate_io = write_header + flate_io.close + + @io.write_bytes @crc32, IO::ByteFormat::LittleEndian + @io.write_bytes @isize, IO::ByteFormat::LittleEndian + + @io.close if @sync_close + end + + private def write_header + flate_io = @flate_io + unless flate_io + flate_io = @flate_io = Flate::Writer.new(@io, level: @level) + header.to_io(@io) + end + flate_io + end +end diff --git a/src/http/common.cr b/src/http/common.cr index 43c1ac58a2d7..2aa5e4a03acf 100644 --- a/src/http/common.cr +++ b/src/http/common.cr @@ -1,5 +1,6 @@ {% if !flag?(:without_zlib) %} - require "zlib" + require "flate" + require "gzip" {% end %} module HTTP @@ -40,9 +41,9 @@ module HTTP encoding = headers["Content-Encoding"]? case encoding when "gzip" - body = Zlib::Inflate.gzip(body, sync_close: true) + body = Gzip::Reader.new(body, sync_close: true) when "deflate" - body = Zlib::Inflate.new(body, sync_close: true) + body = Flate::Reader.new(body, sync_close: true) end {% end %} end diff --git a/src/http/server.cr b/src/http/server.cr index 1936ce93c822..d5ea3553aa12 100644 --- a/src/http/server.cr +++ b/src/http/server.cr @@ -66,7 +66,7 @@ require "./common" # HTTP::Server.new("127.0.0.1", 8080, [ # HTTP::ErrorHandler.new, # HTTP::LogHandler.new, -# HTTP::DeflateHandler.new, +# HTTP::CompressHandler.new, # HTTP::StaticFileHandler.new("."), # ]).listen # ``` diff --git a/src/http/server/handlers/deflate_handler.cr b/src/http/server/handlers/compress_handler.cr similarity index 77% rename from src/http/server/handlers/deflate_handler.cr rename to src/http/server/handlers/compress_handler.cr index 83912d9ad369..b140785dd8e3 100644 --- a/src/http/server/handlers/deflate_handler.cr +++ b/src/http/server/handlers/compress_handler.cr @@ -1,10 +1,11 @@ {% if !flag?(:without_zlib) %} - require "zlib" + require "flate" + require "gzip" {% end %} # A handler that configures an `HTTP::Server::Response` to compress the response # output, either using gzip or deflate, depending on the `Accept-Encoding` request header. -class HTTP::DeflateHandler +class HTTP::CompressHandler include HTTP::Handler def call(context) @@ -15,10 +16,10 @@ class HTTP::DeflateHandler if request_headers.includes_word?("Accept-Encoding", "gzip") context.response.headers["Content-Encoding"] = "gzip" - context.response.output = Zlib::Deflate.gzip(context.response.output, sync_close: true) + context.response.output = Gzip::Writer.new(context.response.output, sync_close: true) elsif request_headers.includes_word?("Accept-Encoding", "deflate") context.response.headers["Content-Encoding"] = "deflate" - context.response.output = Zlib::Deflate.new(context.response.output, sync_close: true) + context.response.output = Flate::Writer.new(context.response.output, sync_close: true) end call_next(context) diff --git a/src/zlib/lib_zlib.cr b/src/lib_z/lib_z.cr similarity index 55% rename from src/zlib/lib_zlib.cr rename to src/lib_z/lib_z.cr index d6b51c47f8f8..1f3f51945cfd 100644 --- a/src/zlib/lib_zlib.cr +++ b/src/lib_z/lib_z.cr @@ -6,8 +6,6 @@ lib LibZ alias Long = LibC::Long alias ULong = LibC::ULong alias SizeT = LibC::SizeT - alias Double = LibC::Double - alias BitcntT = ULong alias Bytef = UInt8 @@ -37,36 +35,6 @@ lib LibZ reserved : Long end - struct GZHeader - text : Int32 - time : UInt64 - xflags : Int32 - os : Int32 - extra : UInt8* - extra_len : UInt32 - extra_max : UInt32 - name : UInt8* - name_max : UInt32 - comment : UInt8* - comm_max : UInt32 - hcrc : Int32 - done : Int32 - end - - enum Strategy - FILTERED = 1 - HUFFMAN_ONLY = 2 - RLE = 3 - FIXED = 4 - DEFAULT_STRATEGY = 0 - end - - # compression level - NO_COMPRESSION = 0 - BEST_SPEED = 1 - BEST_COMPRESSION = 9 - DEFAULT_COMPRESSION = -1 - # error codes enum Error OK = 0 @@ -95,37 +63,22 @@ lib LibZ Z_DEFLATED = 8 fun deflateInit2 = deflateInit2_(stream : ZStream*, level : Int32, method : Int32, - window_bits : Int32, mem_level : Int32, strategy : Strategy, + window_bits : Int32, mem_level : Int32, strategy : Int32, version : UInt8*, stream_size : Int32) : Error fun deflate(stream : ZStream*, flush : Flush) : Error fun deflateEnd(stream : ZStream*) : Int32 - fun deflateReset(stream : ZStream*) : Int32 - fun deflateParams(stream : ZStream*, level : Int32, strategy : Strategy) : Int32 - fun deflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt32) : Int32 + fun deflateReset(stream : ZStream*) : Error + fun deflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt) : Int fun inflateInit2 = inflateInit2_(stream : ZStream*, window_bits : Int32, version : UInt8*, stream_size : Int32) : Error fun inflate(stream : ZStream*, flush : Flush) : Error fun inflateEnd(stream : ZStream*) : Int32 fun inflateReset(stream : ZStream*) : Int32 - fun inflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt32) : Int32 + fun inflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt) : Int - alias GZFile = Void* + alias InFunc = Void*, UInt8** -> UInt + alias OutFunc = Void*, UInt8*, UInt -> Int - fun gzdopen(fd : Int32, mode : UInt8*) : GZFile - fun gzbuffer(file : GZFile, size : UInt32) : Int32 - fun gzsetparams(file : GZFile, level : Int32, strategy : Strategy) : Int32 - fun gzread(file : GZFile, buf : UInt8*, len : UInt32) : Int32 - fun gzwrite(file : GZFile, buf : UInt8*, len : UInt32) : Int32 - fun gzflush(file : GZFile, flush : Flush) : Int32 - fun gzseek(file : GZFile, offset : LibC::SizeT, whence : Int32) : Int32 - fun gzrewind(file : GZFile) : Int32 - fun gztell(file : GZFile) : LibC::SizeT - fun gzoffset(file : GZFile) : LibC::SizeT - fun gzeof(file : GZFile) : Int32 - fun gzdirect(file : GZFile) : Int32 - fun gzclose(file : GZFile) : Int32 - fun gzclose_r(file : GZFile) : Int32 - fun gzclose_w(file : GZFile) : Int32 - fun gzerror(file : GZFile, errnum : Int32*) : UInt8* - fun gzclearerr(file : GZFile) + fun inflateBackInit = inflateBackInit_(stream : ZStream*, window_bits : Int, window : UInt8*, version : UInt8*, stream_size : Int) : Int + fun inflateBack(stream : ZStream*, in : InFunc, in_desc : Void*, out : OutFunc, out_desc : Void*) : Int end diff --git a/src/zip/checksum_reader.cr b/src/zip/checksum_reader.cr index af5389ba4146..c8e0e3891f4c 100644 --- a/src/zip/checksum_reader.cr +++ b/src/zip/checksum_reader.cr @@ -5,12 +5,9 @@ module Zip private class ChecksumReader include IO - def initialize(@io : IO, @filename : String, verify @expected_crc32 : UInt32? = nil) - @crc32 = LibC::ULong.new(0) - end + getter crc32 = CRC32.initial - def crc32 - @crc32.to_u32 + def initialize(@io : IO, @filename : String, verify @expected_crc32 : UInt32? = nil) end def read(slice : Bytes) @@ -20,7 +17,7 @@ module Zip raise Zip::Error.new("checksum failed for entry #{@filename} (expected #{expected_crc32}, got #{crc32}") end else - @crc32 = Zlib.crc32(slice[0, read_bytes], @crc32) + @crc32 = CRC32.update(slice[0, read_bytes], @crc32) end read_bytes end diff --git a/src/zip/checksum_writer.cr b/src/zip/checksum_writer.cr index 4b810b45bac0..626a4f049be6 100644 --- a/src/zip/checksum_writer.cr +++ b/src/zip/checksum_writer.cr @@ -5,7 +5,7 @@ module Zip include IO getter count = 0_u32 - getter crc32 = LibC::ULong.new(0) + getter crc32 = CRC32.initial getter! io : IO def initialize(@compute_crc32 = false) @@ -17,13 +17,13 @@ module Zip def write(slice : Bytes) @count += slice.size - @crc32 = Zlib.crc32(slice, @crc32) if @compute_crc32 + @crc32 = CRC32.update(slice, @crc32) if @compute_crc32 io.write(slice) end def io=(@io) @count = 0_u32 - @crc32 = LibC::ULong.new(0) + @crc32 = CRC32.initial end end end diff --git a/src/zip/file_info.cr b/src/zip/file_info.cr index 35129f44c627..cee14d4b5de9 100644 --- a/src/zip/file_info.cr +++ b/src/zip/file_info.cr @@ -115,12 +115,13 @@ module Zip::FileInfo io = IO::Sized.new(io, compressed_size) unless is_sized when .deflated? if compressed_size == 0 && bit_3_set? - io = IO::Delimited.new(io, DEFLATE_END_SIGNATURE) + # Read until we end decompressing the deflate data, + # which has an unknown size else io = IO::Sized.new(io, compressed_size) unless is_sized end - io = Zlib::Inflate.new(io, wbits: Zlib::ZIP) + io = Flate::Reader.new(io) else raise "Unsupported compression method: #{compression_method}" end diff --git a/src/zip/reader.cr b/src/zip/reader.cr index 515d47099740..003c797cfbef 100644 --- a/src/zip/reader.cr +++ b/src/zip/reader.cr @@ -58,7 +58,7 @@ class Zip::Reader # are no more entries. # # After reading a next entry, previous entries can no - # longer be read (their IO will be closed.) + # longer be read (their `IO` will be closed.) def next_entry : Entry? return nil if @reached_end @@ -109,7 +109,15 @@ class Zip::Reader private def skip_data_descriptor(entry) if entry.compression_method.deflated? && entry.bit_3_set? - read_data_descriptor(entry) + # The data descriptor signature is optional: if we + # find it, we read the data descriptor info normally; + # otherwise, the first four bytes are the crc32 value. + signature = read UInt32 + if signature == FileInfo::DATA_DESCRIPTOR_SIGNATURE + read_data_descriptor(entry) + else + read_data_descriptor(entry, crc32: signature) + end @read_data_descriptor = true else @read_data_descriptor = false @@ -117,8 +125,8 @@ class Zip::Reader end end - private def read_data_descriptor(entry) - entry.crc32 = read UInt32 + private def read_data_descriptor(entry, crc32 = nil) + entry.crc32 = crc32 || (read UInt32) entry.compressed_size = read UInt32 entry.uncompressed_size = read UInt32 verify_checksum(entry) diff --git a/src/zip/writer.cr b/src/zip/writer.cr index 425d888d0d91..be1724fc6b70 100644 --- a/src/zip/writer.cr +++ b/src/zip/writer.cr @@ -102,7 +102,7 @@ class Zip::Writer yield @uncompressed_size_counter when .deflated? @compressed_size_counter.io = @io - io = Zlib::Deflate.new(@compressed_size_counter, wbits: Zlib::ZIP) + io = Flate::Writer.new(@compressed_size_counter) @uncompressed_size_counter.io = io yield @uncompressed_size_counter io.close diff --git a/src/zip/zip.cr b/src/zip/zip.cr index 75876cce8c83..31e666bba536 100644 --- a/src/zip/zip.cr +++ b/src/zip/zip.cr @@ -1,4 +1,5 @@ -require "zlib" +require "flate" +require "crc32" require "./*" # The Zip module contains readers and writers of the zip diff --git a/src/zlib.cr b/src/zlib.cr deleted file mode 100644 index c97acbb27771..000000000000 --- a/src/zlib.cr +++ /dev/null @@ -1,55 +0,0 @@ -require "./zlib/*" - -# The Zlib module provides access to the [zlib library](http://zlib.net/) for -# lossless data compression and decompression in zlib and gzip format: -# -# * `Zlib::Deflate` for compression -# * `Zlib::Inflate` for decompression -module Zlib - GZIP = LibZ::MAX_BITS + 16 - ZIP = -LibZ::MAX_BITS - - # Returns the linked zlib version. - def self.version : String - String.new LibZ.zlibVersion - end - - def self.adler32(data, adler) - slice = data.to_slice - LibZ.adler32(adler, slice, slice.size) - end - - def self.adler32(data) - adler = LibZ.adler32(0, nil, 0) - adler32(data, adler) - end - - def self.adler32_combine(adler1, adler2, len) - LibZ.adler32_combine(adler1, adler2, len) - end - - def self.crc32(data, crc) - slice = data.to_slice - LibZ.crc32(crc, slice, slice.size) - end - - def self.crc32(data) - crc = LibZ.crc32(0, nil, 0) - crc32(data, crc) - end - - def self.crc32_combine(crc1, crc2, len) - LibZ.crc32_combine(crc1, crc2, len) - end - - class Error < Exception - def initialize(ret, stream) - if msg = stream.msg - error_msg = String.new(msg) - super("inflate: #{error_msg} #{ret}") - else - super("inflate: #{ret}") - end - end - end -end diff --git a/src/zlib/deflate.cr b/src/zlib/deflate.cr deleted file mode 100644 index cfc52372e4e3..000000000000 --- a/src/zlib/deflate.cr +++ /dev/null @@ -1,132 +0,0 @@ -# A write-only `IO` object to compress data in zlib or gzip format. -# -# Instances of this class wrap another `IO` object. When you write to this -# instance, it compresses the data and writes it to the underlying `IO`. -# -# NOTE: unless created with a block, `close` must be invoked after all -# data has been written to a `Zlib::Deflate` instance. -# -# ### Example: compress a file -# -# ``` -# require "zlib" -# -# File.write("file.txt", "abc") -# -# File.open("./file.txt", "r") do |input_file| -# File.open("./file.gzip", "w") do |output_file| -# Zlib::Deflate.gzip(output_file) do |deflate| -# IO.copy(input_file, deflate) -# end -# end -# end -# ``` -# -# See also: `Zlib::Inflate` for decompressing data. -class Zlib::Deflate - include IO - - # If `#sync_close?` is `true`, closing this `IO` will close the underlying `IO`. - property? sync_close : Bool - - # Creates an instance of `Zlib::Deflate`. `close` must be invoked - # after all data has been written. - def initialize(@output : IO, level = LibZ::DEFAULT_COMPRESSION, wbits = LibZ::MAX_BITS, - mem_level = LibZ::DEF_MEM_LEVEL, strategy = LibZ::Strategy::DEFAULT_STRATEGY, - @sync_close : Bool = false) - @buf = uninitialized UInt8[8192] # output buffer used by zlib - @stream = LibZ::ZStream.new - @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } - @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } - @closed = false - ret = LibZ.deflateInit2(pointerof(@stream), level, LibZ::Z_DEFLATED, wbits, mem_level, - strategy, LibZ.zlibVersion, sizeof(LibZ::ZStream)) - if ret != LibZ::Error::OK - raise Zlib::Error.new(ret, @stream) - end - end - - # Creates an instance of `Zlib::Deflate`, yields it to the given block, - # and closes it at its end. - def self.new(output : IO, level = LibZ::DEFAULT_COMPRESSION, wbits = LibZ::MAX_BITS, - mem_level = LibZ::DEF_MEM_LEVEL, strategy = LibZ::Strategy::DEFAULT_STRATEGY, - sync_close : Bool = false) - deflate = new(output, level: level, wbits: wbits, mem_level: mem_level, strategy: strategy, sync_close: sync_close) - begin - yield deflate - ensure - deflate.close - end - end - - # Creates an instance of `Zlib::Deflate` for the gzip format. `close` - # must be invoked after all data has written. - def self.gzip(output, sync_close : Bool = false) : self - new output, wbits: GZIP, sync_close: sync_close - end - - # Creates an instance of `Zlib::Deflate` for the gzip format, yields it - # to the given block, and closes it at its end. - def self.gzip(output, sync_close : Bool = false) - deflate = gzip(output, sync_close: sync_close) - begin - yield deflate - ensure - deflate.close - end - end - - # Always raises: this is a write-only `IO`. - def read(slice : Bytes) - raise "can't read from Zlib::Deflate" - end - - # See `IO#write`. - def write(slice : Bytes) - check_open - - @stream.avail_in = slice.size - @stream.next_in = slice - consume_output LibZ::Flush::NO_FLUSH - end - - # See `IO#flush`. - def flush - return if @closed - - consume_output LibZ::Flush::SYNC_FLUSH - end - - # Closes this `IO`. Must be invoked after all data has been written. - def close - return if @closed - @closed = true - - @stream.avail_in = 0 - @stream.next_in = Pointer(UInt8).null - consume_output LibZ::Flush::FINISH - LibZ.deflateEnd(pointerof(@stream)) - - @output.close if @sync_close - end - - # Returns `true` if this `IO` is closed. - def closed? - @closed - end - - # :nodoc: - def inspect(io) - to_s(io) - end - - private def consume_output(flush) - loop do - @stream.next_out = @buf.to_unsafe - @stream.avail_out = @buf.size.to_u32 - LibZ.deflate(pointerof(@stream), flush) # no bad return value - @output.write(@buf.to_slice[0, @buf.size - @stream.avail_out]) - break if @stream.avail_out != 0 - end - end -end diff --git a/src/zlib/inflate.cr b/src/zlib/inflate.cr deleted file mode 100644 index edaaa92a8271..000000000000 --- a/src/zlib/inflate.cr +++ /dev/null @@ -1,132 +0,0 @@ -# A read-only `IO` object to decompress data in zlib or gzip format. -# -# Instances of this class wrap another IO object. When you read from this instance -# instance, it reads data from the underlying IO, decompresses it, and returns -# it to the caller. -# -# ### Example: decompress text a file -# -# ``` -# require "zlib" -# -# File.write("file.gzip", Bytes[31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 75, 76, 74, 6, 0, 194, 65, 36, 53, 3, 0, 0, 0]) -# -# string = File.open("file.gzip", "r") do |file| -# Zlib::Inflate.gzip(file) do |inflate| -# inflate.gets_to_end -# end -# end -# string # => "abc" -# ``` -# -# See also: `Zlib::Deflate` for compressing data. -class Zlib::Inflate - include IO - - # If `#sync_close?` is `true`, closing this `IO` will close the underlying `IO`. - property? sync_close : Bool - - # Creates an instance of `Zlib::Inflate`. - def initialize(@input : IO, wbits = LibZ::MAX_BITS, @sync_close : Bool = false) - @buf = uninitialized UInt8[8192] # input buffer used by zlib - @stream = LibZ::ZStream.new - @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } - @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } - ret = LibZ.inflateInit2(pointerof(@stream), wbits, LibZ.zlibVersion, sizeof(LibZ::ZStream)) - if ret != LibZ::Error::OK - raise Zlib::Error.new(ret, @stream) - end - @closed = false - end - - # Creates an instance of `Zlib::Inflate`, yields it to the given block, - # and closes it at its end. - def self.new(input : IO, wbits = LibZ::MAX_BITS, sync_close : Bool = false) - inflate = new input, wbits: wbits, sync_close: sync_close - begin - yield inflate - ensure - inflate.close - end - end - - # Creates an instance of `Zlib::Inflate` for the gzip format. `close` - # must be invoked after all data has written. - def self.gzip(input, sync_close : Bool = false) : self - new input, wbits: GZIP, sync_close: sync_close - end - - # Creates an instance of `Zlib::Inflate` for the gzip format, yields it - # to the given block, and closes it at its end. - def self.gzip(input, sync_close : Bool = false) - inflate = gzip input, sync_close: sync_close - begin - yield inflate - ensure - inflate.close - end - end - - # Always raises: this is a read-only `IO`. - def write(slice : Bytes) - raise IO::Error.new "Can't write to InflateIO" - end - - # See `IO#read`. - def read(slice : Bytes) - check_open - - return 0 if slice.empty? - - while true - if @stream.avail_in == 0 - @stream.next_in = @buf.to_unsafe - @stream.avail_in = @input.read(@buf.to_slice).to_u32 - return 0 if @stream.avail_in == 0 - end - - @stream.avail_out = slice.size.to_u32 - @stream.next_out = slice.to_unsafe - - ret = LibZ.inflate(pointerof(@stream), LibZ::Flush::NO_FLUSH) - read_bytes = slice.size - @stream.avail_out - case ret - when LibZ::Error::NEED_DICT, - LibZ::Error::DATA_ERROR, - LibZ::Error::MEM_ERROR - raise Zlib::Error.new(ret, @stream) - when LibZ::Error::STREAM_END - return read_bytes - else - # LibZ.inflate might not write any data to the output slice because - # it might need more input. We can know this happened because *ret* - # is not STREAM_END. - if read_bytes == 0 - next - else - return read_bytes - end - end - end - end - - # Closes this `IO`. - def close - return if @closed - @closed = true - - LibZ.inflateEnd(pointerof(@stream)) - - @input.close if @sync_close - end - - # Returns `true` if this `IO` is closed. - def closed? - @closed - end - - # :nodoc: - def inspect(io) - to_s(io) - end -end diff --git a/src/zlib/reader.cr b/src/zlib/reader.cr new file mode 100644 index 000000000000..b06122730f19 --- /dev/null +++ b/src/zlib/reader.cr @@ -0,0 +1,99 @@ +# A read-only `IO` object to decompress data in the zlib format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +class Zlib::Reader + include IO + + # Whether to close the enclosed `IO` when closing this reader. + property? sync_close = false + + # Returns `true` if this reader is closed. + getter? closed = false + + # Creates a new reader from the given *io*. + def initialize(@io : IO, @sync_close = false, dict : Bytes? = nil) + Zlib::Reader.read_header(io, dict) + @flate_io = Flate::Reader.new(@io, dict: dict) + @adler32 = Adler32.initial + @end = false + end + + # Creates a new reader from the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, sync_close = false, dict : Bytes? = nil) + reader = new(io, sync_close: sync_close, dict: dict) + yield reader ensure reader.close + end + + protected def self.read_header(io, dict) + cmf = io.read_byte || invalid_header + + cm = cmf & 0xF + if cm != 8 # the compression method must be 8 + invalid_header + end + + flg = io.read_byte || invalid_header + + # CMF and FLG, when viewed as a 16-bit unsigned integer stored + # in MSB order (CMF*256 + FLG), must be a multiple of 31 + unless (cmf.to_u16*256 + flg.to_u16).divisible_by?(31) + invalid_header + end + + fdict = flg.bit(5) == 1 + if fdict + unless dict + raise Zlib::Error.new("missing dictionary") + end + + checksum = io.read_bytes(UInt32, IO::ByteFormat::BigEndian) + dict_checksum = Adler32.checksum(dict) + if checksum != dict_checksum + raise Zlib::Error.new("dictionary ADLER-32 checksum mismatch") + end + end + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + return 0 if @end + + read_bytes = @flate_io.read(slice) + if read_bytes == 0 + # Check ADLER-32 + @end = true + @flate_io.close + adler32 = @io.read_bytes(UInt32, IO::ByteFormat::BigEndian) + if adler32 != @adler32 + raise Zlib::Error.new("ADLER-32 checksum mismatch") + end + else + # Update ADLER-32 checksum + @adler32 = Adler32.update(slice[0, read_bytes], @adler32) + end + read_bytes + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) + raise IO::Error.new "can't write to Zlib::Reader" + end + + def close + return if @closed + @closed = true + + @flate_io.close + @io.close if @sync_close + end + + protected def self.invalid_header + raise Zlib::Error.new("invalid header") + end +end diff --git a/src/zlib/writer.cr b/src/zlib/writer.cr new file mode 100644 index 000000000000..5b80a4709ed2 --- /dev/null +++ b/src/zlib/writer.cr @@ -0,0 +1,120 @@ +# A write-only `IO` object to compress data in the zlib format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Zlib::Writer instance. +class Zlib::Writer + include IO + + # Whether to close the enclosed `IO` when closing this writer. + property? sync_close = false + + # Returns `true` if this writer is closed. + getter? closed = false + + # Creates a new writer to the given *io*. + def initialize(@io : IO, @level = Zlib::DEFAULT_COMPRESSION, @sync_close = false, @dict : Bytes? = nil) + @wrote_header = false + @adler32 = Adler32.initial + @flate_io = Flate::Writer.new(@io, level: level, dict: @dict) + end + + # Creates a new writer to the given *filename*. + def self.new(filename : String, level = Zlib::DEFAULT_COMPRESSION, dict : Bytes? = nil) + new(::File.new(filename, "w"), level: level, sync_close: true, dict: dict) + end + + # Creates a new writer to the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, level = Zlib::DEFAULT_COMPRESSION, sync_close = false, dict : Bytes? = nil) + writer = new(io, level: level, sync_close: sync_close, dict: dict) + yield writer ensure writer.close + end + + # Creates a new writer to the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String, level = Zlib::DEFAULT_COMPRESSION, dict : Bytes? = nil) + writer = new(filename, level: level, dict: dict) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise IO::Error.new("can't read from Gzip::Writer") + end + + # See `IO#write`. + def write(slice : Bytes) : Nil + check_open + + write_header unless @wrote_header + + @flate_io.write(slice) + @adler32 = Adler32.update(slice, @adler32) + end + + # Flushes data, forcing writing the zlib header if no + # data has been written yet. + # + # See `IO#flush`. + def flush + check_open + + write_header unless @wrote_header + @flate_io.flush + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + write_header unless @wrote_header + + @flate_io.close + + @io.write_bytes(@adler32, IO::ByteFormat::BigEndian) + + @io.close if @sync_close + end + + private def write_header + @wrote_header = true + + # CMF byte: 7 for window size, 8 for compression method (deflate) + cmf = 0x78_u8 + @io.write_byte cmf + + dict = @dict + + flg = 0_u8 + + if dict + flg |= 1 << 5 + end + + case @level + when 0..1 + flg |= 0 << 6 + when 2..5 + flg |= 1 << 6 + when 6, -1 + flg |= 2 << 6 + else + flg |= 3 << 6 + end + + # CMF and FLG, when viewed as a 16-bit unsigned integer stored + # in MSB order (CMF*256 + FLG), must be a multiple of 31 + flg += 31 - (cmf.to_u16*256 + flg.to_u16).remainder(31) + + @io.write_byte flg + + if dict + dict_checksum = Adler32.checksum(dict) + @io.write_bytes(dict_checksum, IO::ByteFormat::BigEndian) + end + end +end diff --git a/src/zlib/zlib.cr b/src/zlib/zlib.cr new file mode 100644 index 000000000000..7aecd62fa12f --- /dev/null +++ b/src/zlib/zlib.cr @@ -0,0 +1,15 @@ +require "flate" +require "adler32" +require "./*" + +# The Zlib module contains readers and writers of zlib format compressed +# data, as specified in [RFC 1950](https://www.ietf.org/rfc/rfc1950.txt). +module Zlib + NO_COMPRESSION = Flate::NO_COMPRESSION + BEST_SPEED = Flate::BEST_SPEED + BEST_COMPRESSION = Flate::BEST_COMPRESSION + DEFAULT_COMPRESSION = Flate::DEFAULT_COMPRESSION + + class Error < Exception + end +end