Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make pkg_zip compression configurable #737

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 34 additions & 8 deletions pkg/private/zip/build_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

import argparse
import datetime
import logging
import os
import sys
import zipfile

from pkg.private import build_info
Expand Down Expand Up @@ -46,6 +48,12 @@ def _create_argument_parser():
parser.add_argument(
'-m', '--mode',
help='The file system mode to use for files added into the zip.')
parser.add_argument(
'-c', '--compression_type',
help='The compression type to use')
parser.add_argument(
'-l', '--compression_level',
help='The compression level to use')
parser.add_argument('--manifest',
help='manifest of contents to add to the layer.',
required=True)
Expand All @@ -71,7 +79,7 @@ def parse_date(ts):

class ZipWriter(object):

def __init__(self, output_path: str, time_stamp: int, default_mode: int):
def __init__(self, output_path: str, time_stamp: int, default_mode: int, compression_type: str, compression_level: int):
"""Create a writer.

You must close() after use or use in a 'with' statement.
Expand All @@ -84,7 +92,15 @@ def __init__(self, output_path: str, time_stamp: int, default_mode: int):
self.output_path = output_path
self.time_stamp = time_stamp
self.default_mode = default_mode
self.zip_file = zipfile.ZipFile(self.output_path, mode='w')
compressions = {
"deflated": zipfile.ZIP_DEFLATED,
"lzma": zipfile.ZIP_LZMA,
"bzip2": zipfile.ZIP_BZIP2,
"stored": zipfile.ZIP_STORED
}
self.compression_type = compressions[compression_type]
self.compression_level = compression_level
self.zip_file = zipfile.ZipFile(self.output_path, mode='w', compression=self.compression_type)

def __enter__(self):
return self
Expand All @@ -96,6 +112,15 @@ def close(self):
self.zip_file.close()
self.zip_file = None

def writestr(self, entry_info, content: str, compresslevel: int):
if sys.version_info >= (3, 7):
self.zip_file.writestr(entry_info, content, compresslevel=compresslevel)
else:
# Python 3.6 and lower don't support compresslevel
self.zip_file.writestr(entry_info, content)
flode marked this conversation as resolved.
Show resolved Hide resolved
if compresslevel != 6:
logging.warn("Custom compresslevel is not supported with python < 3.7")

def make_zipinfo(self, path: str, mode: str):
"""Create a Zipinfo.

Expand Down Expand Up @@ -141,10 +166,10 @@ def add_manifest_entry(self, entry):
entry_info = self.make_zipinfo(path=dst_path, mode=mode)

if entry_type == manifest.ENTRY_IS_FILE:
entry_info.compress_type = zipfile.ZIP_DEFLATED
entry_info.compress_type = self.compression_type
# Using utf-8 for the file names is for python <3.7 compatibility.
with open(src.encode('utf-8'), 'rb') as src_content:
self.zip_file.writestr(entry_info, src_content.read())
self.writestr(entry_info, src_content.read(), compresslevel=self.compression_level)
elif entry_type == manifest.ENTRY_IS_DIR:
entry_info.compress_type = zipfile.ZIP_STORED
# Set directory bits
Expand All @@ -158,7 +183,7 @@ def add_manifest_entry(self, entry):
elif entry_type == manifest.ENTRY_IS_TREE:
self.add_tree(src, dst_path, mode)
elif entry_type == manifest.ENTRY_IS_EMPTY_FILE:
entry_info.compress_type = zipfile.ZIP_DEFLATED
entry_info.compress_type = zipfile.ZIP_STORED
self.zip_file.writestr(entry_info, '')
else:
raise Exception('Unknown type for manifest entry:', entry)
Expand Down Expand Up @@ -213,9 +238,9 @@ def add_tree(self, tree_top: str, destpath: str, mode: int):
else:
f_mode = mode
entry_info = self.make_zipinfo(path=path, mode=f_mode)
entry_info.compress_type = zipfile.ZIP_DEFLATED
entry_info.compress_type = self.compression_type
with open(content_path, 'rb') as src:
self.zip_file.writestr(entry_info, src.read())
self.writestr(entry_info, src.read(), compresslevel=self.compression_level)
else:
# Implicitly created directory
dir_path = path
Expand Down Expand Up @@ -266,10 +291,11 @@ def main(args):
default_mode = None
if args.mode:
default_mode = int(args.mode, 8)
compression_level = int(args.compression_level)

manifest = _load_manifest(args.directory, args.manifest)
with ZipWriter(
args.output, time_stamp=ts, default_mode=default_mode) as zip_out:
args.output, time_stamp=ts, default_mode=default_mode, compression_type=args.compression_type, compression_level=compression_level) as zip_out:
for entry in manifest:
zip_out.add_manifest_entry(entry)

Expand Down
12 changes: 12 additions & 0 deletions pkg/private/zip/zip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def _pkg_zip_impl(ctx):
args.add("-d", substitute_package_variables(ctx, ctx.attr.package_dir))
args.add("-t", ctx.attr.timestamp)
args.add("-m", ctx.attr.mode)
args.add("-c", str(ctx.attr.compression_type))
args.add("-l", ctx.attr.compression_level)
inputs = []
if ctx.attr.stamp == 1 or (ctx.attr.stamp == -1 and
ctx.attr.private_stamp_detect):
Expand Down Expand Up @@ -115,6 +117,16 @@ Jan 1, 1980 will be rounded up and the precision in the zip file is
limited to a granularity of 2 seconds.""",
default = 315532800,
),
"compression_level": attr.int(
default = 6,
doc = "The compression level to use, 1 is the fastest, 9 gives the smallest results. 0 skips compression, depending on the method used"
),
"compression_type": attr.string(
default = "deflated",
doc = """The compression to use. Note that lzma and bzip2 might not be supported by all readers.
The list of compressions is the same as Python's ZipFile: https://docs.python.org/3/library/zipfile.html#zipfile.ZIP_STORED""",
values = ["deflated", "lzma", "bzip2", "stored"]
),

# Common attributes
"out": attr.output(
Expand Down
35 changes: 35 additions & 0 deletions tests/zip/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,37 @@ pkg_zip(
package_variables = ":my_package_variables",
)

# Different compressions
pkg_zip(
name = "test_zip_deflated_level_3",
srcs = [
"//tests:testdata/loremipsum.txt",
],
compression_type = "deflated",
compression_level = 3,
)
pkg_zip(
name = "test_zip_bzip2",
srcs = [
"//tests:testdata/loremipsum.txt",
],
compression_type = "bzip2",
)
pkg_zip(
name = "test_zip_lzma",
srcs = [
"//tests:testdata/loremipsum.txt",
],
compression_type = "lzma",
)
pkg_zip(
name = "test_zip_stored",
srcs = [
"//tests:testdata/loremipsum.txt",
],
compression_type = "stored",
)

py_test(
name = "zip_test",
srcs = [
Expand All @@ -252,6 +283,10 @@ py_test(
":test_zip_permissions.zip",
":test_zip_timestamp.zip",
":test_zip_tree.zip",
":test_zip_deflated_level_3",
":test_zip_bzip2",
":test_zip_lzma",
":test_zip_stored",
],
python_version = "PY3",
deps = [
Expand Down
28 changes: 28 additions & 0 deletions tests/zip/zip_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import datetime
import filecmp
import os
import sys
import unittest
import zipfile

Expand Down Expand Up @@ -123,6 +124,33 @@ def test_zip_tree(self):
{"filename": "generate_tree/b/e"},
])

def test_compression_deflated(self):
if sys.version_info >= (3, 7):
self.assertZipFileContent("test_zip_deflated_level_3.zip", [
{"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 312},
])
else:
# Python 3.6 doesn't support setting compresslevel, so the file size differs
self.assertZipFileContent("test_zip_deflated_level_3.zip", [
{"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 309},
])

def test_compression_bzip2(self):
self.assertZipFileContent("test_zip_bzip2.zip", [
{"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 340},
])

def test_compression_lzma(self):
self.assertZipFileContent("test_zip_lzma.zip", [
{"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 378},
])

def test_compression_stored(self):
self.assertZipFileContent("test_zip_stored.zip", [
{"filename": "loremipsum.txt", "crc": LOREM_CRC, "size": 543},
])



if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions tests/zip/zip_test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def assertZipFileContent(self, zip_file, content):
elif "isexe" in expected:
got_mode = (info.external_attr >> 16) & UNIX_RX_BITS
self.assertEqual(oct(got_mode), oct(UNIX_RX_BITS))
elif "size" in expected:
self.assertEqual(info.compress_size, expected["size"])

else:
if "attr" in expected:
Expand Down