Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support headerless format in getFrameParameters and decompress #217

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .cargo/config
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[target.x86_64-apple-darwin]
rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"]

[target.aarch64-apple-darwin]
rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"]
Comment on lines +4 to +5
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For compiling on M1

8 changes: 4 additions & 4 deletions c-ext/decompressor.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,15 @@ PyObject *Decompressor_decompress(ZstdDecompressor *self, PyObject *args,
goto finally;
}

decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);

if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
ZSTD_frameHeader frameHeader;
if (ZSTD_getFrameHeader_advanced(&frameHeader, source.buf, source.len, self->format) != 0) {
PyErr_SetString(ZstdError,
"error determining content size from frame header");
goto finally;
}
decompressedSize=frameHeader.frameContentSize;
/* Special case of empty frame. */
else if (0 == decompressedSize) {
if (0 == decompressedSize) {
result = PyBytes_FromStringAndSize("", 0);
goto finally;
}
Expand Down
10 changes: 6 additions & 4 deletions c-ext/frameparams.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@ extern PyObject *ZstdError;

FrameParametersObject *get_frame_parameters(PyObject *self, PyObject *args,
PyObject *kwargs) {
static char *kwlist[] = {"data", NULL};
static char *kwlist[] = {"data", "format", NULL};

Py_buffer source;

ZSTD_frameHeader header;
ZSTD_format_e format = ZSTD_f_zstd1;
FrameParametersObject *result = NULL;
size_t zresult;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
kwlist, &source)) {
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:get_frame_parameters",
kwlist, &source, &format)) {
return NULL;
}

zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
zresult = ZSTD_getFrameHeader_advanced(&header, source.buf, source.len, format);

if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "cannot get frame parameters: %s",
Expand Down
32 changes: 23 additions & 9 deletions rust-ext/src/decompressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,17 +176,31 @@ impl ZstdDecompressor {

self.setup_dctx(py, true)?;

let output_size =
unsafe { zstd_sys::ZSTD_getFrameContentSize(buffer.buf_ptr(), buffer.len_bytes()) };
let mut header = zstd_sys::ZSTD_frameHeader {
frameContentSize: 0,
windowSize: 0,
blockSizeMax: 0,
frameType: zstd_sys::ZSTD_frameType_e::ZSTD_frame,
headerSize: 0,
dictID: 0,
checksumFlag: 0,
_reserved1: 0,
_reserved2: 0,
};
let zresult = unsafe {
zstd_sys::ZSTD_getFrameHeader_advanced(&mut header, buffer.buf_ptr(), buffer.len_bytes(), self.format)
};

if zresult != 0 {
return Err(ZstdError::new_err(
"error determining content size from frame header"
))
}

let (output_buffer_size, output_size) =
if output_size == zstd_sys::ZSTD_CONTENTSIZE_ERROR as _ {
return Err(ZstdError::new_err(
"error determining content size from frame header",
));
} else if output_size == 0 {
if header.frameContentSize == 0 {
return Ok(PyBytes::new(py, &[]));
} else if output_size == zstd_sys::ZSTD_CONTENTSIZE_UNKNOWN as _ {
} else if header.frameContentSize == zstd_sys::ZSTD_CONTENTSIZE_UNKNOWN as _ {
if max_output_size == 0 {
return Err(ZstdError::new_err(
"could not determine content size in frame header",
Expand All @@ -195,7 +209,7 @@ impl ZstdDecompressor {

(max_output_size, 0)
} else {
(output_size as _, output_size)
(header.frameContentSize as _, header.frameContentSize)
};

let mut dest_buffer: Vec<u8> = Vec::new();
Expand Down
15 changes: 12 additions & 3 deletions rust-ext/src/frame_parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use {
crate::ZstdError,
pyo3::{buffer::PyBuffer, prelude::*, wrap_pyfunction},
pyo3::{buffer::PyBuffer, prelude::*, wrap_pyfunction, exceptions::{PyValueError}},
};

#[pyclass(module = "zstandard.backend_rust")]
Expand Down Expand Up @@ -67,10 +67,19 @@ fn frame_header_size(data: PyBuffer<u8>) -> PyResult<usize> {
}

#[pyfunction]
fn get_frame_parameters(py: Python, buffer: PyBuffer<u8>) -> PyResult<Py<FrameParameters>> {
#[pyo3(signature = (buffer, format=zstd_sys::ZSTD_format_e::ZSTD_f_zstd1 as u32))]
fn get_frame_parameters(py: Python, buffer: PyBuffer<u8>, format: u32) -> PyResult<Py<FrameParameters>> {
let raw_data = unsafe {
std::slice::from_raw_parts::<u8>(buffer.buf_ptr() as *const _, buffer.len_bytes())
};
let format = if format == zstd_sys::ZSTD_format_e::ZSTD_f_zstd1 as _ {
zstd_sys::ZSTD_format_e::ZSTD_f_zstd1
} else if format == zstd_sys::ZSTD_format_e::ZSTD_f_zstd1_magicless as _ {
zstd_sys::ZSTD_format_e::ZSTD_f_zstd1_magicless
} else {
return Err(PyValueError::new_err(format!("invalid format value")));
};


let mut header = zstd_sys::ZSTD_frameHeader {
frameContentSize: 0,
Expand All @@ -84,7 +93,7 @@ fn get_frame_parameters(py: Python, buffer: PyBuffer<u8>) -> PyResult<Py<FramePa
_reserved2: 0,
};
let zresult = unsafe {
zstd_sys::ZSTD_getFrameHeader(&mut header, raw_data.as_ptr() as *const _, raw_data.len())
zstd_sys::ZSTD_getFrameHeader_advanced(&mut header, raw_data.as_ptr() as *const _, raw_data.len(), format)
};

if unsafe { zstd_sys::ZSTD_isError(zresult) } != 0 {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_compressor_fuzzing.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ def test_data_equivalence(self, original, threads, use_dict):
dctx = zstd.ZstdDecompressor(**kwargs)

for i, frame in enumerate(result):
self.assertEqual(dctx.decompress(frame), original[i])
self.assertEqual(dctx.decompress(frame.tobytes()), original[i])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the rust backend the test was erroring out becase frame wasn't a buffer but a zstandard.backend_rust.BufferSegment - not sure what the best way to fix it is...
This does work, so I added the conversion



@unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
Expand Down
10 changes: 10 additions & 0 deletions tests/test_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@ def test_attributes(self):
self.assertEqual(params.dict_id, 15)
self.assertTrue(params.has_checksum)

def test_attributes_without_header(self):
# Set multiple things without magic header
params = zstd.get_frame_parameters(
b"\x45\x40\x0f\x10\x00", format=zstd.FORMAT_ZSTD1_MAGICLESS
)
self.assertEqual(params.content_size, 272)
self.assertEqual(params.window_size, 262144)
self.assertEqual(params.dict_id, 15)
self.assertTrue(params.has_checksum)

def test_input_types(self):
v = zstd.FRAME_HEADER + b"\x00\x00"

Expand Down
10 changes: 10 additions & 0 deletions tests/test_decompressor_decompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ def test_input_types(self):
for source in sources:
self.assertEqual(dctx.decompress(source), b"foo")

def test_headerless(self):
compression_params = zstd.ZstdCompressionParameters(
format=zstd.FORMAT_ZSTD1_MAGICLESS,
)
cctx = zstd.ZstdCompressor(compression_params=compression_params)
compressed = cctx.compress(b"foo")

dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
self.assertEqual(dctx.decompress(compressed), b"foo")

def test_no_content_size_in_frame(self):
cctx = zstd.ZstdCompressor(write_content_size=False)
compressed = cctx.compress(b"foobar")
Expand Down
2 changes: 1 addition & 1 deletion zstandard/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ class FrameParameters(object):
def estimate_decompression_context_size() -> int: ...
def frame_content_size(data: ByteString) -> int: ...
def frame_header_size(data: ByteString) -> int: ...
def get_frame_parameters(data: ByteString) -> FrameParameters: ...
def get_frame_parameters(data: ByteString, format: Optional[int]) -> FrameParameters: ...
def train_dictionary(
dict_size: int,
samples: list[ByteString],
Expand Down
17 changes: 9 additions & 8 deletions zstandard/backend_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2558,7 +2558,7 @@ def frame_header_size(data):
return zresult


def get_frame_parameters(data):
def get_frame_parameters(data, format=FORMAT_ZSTD1):
"""
Parse a zstd frame header into frame parameters.

Expand All @@ -2569,13 +2569,15 @@ def get_frame_parameters(data):

:param data:
Data from which to read frame parameters.
:param format:
Set the format of data for the decoder.
:return:
:py:class:`FrameParameters`
"""
params = ffi.new("ZSTD_frameHeader *")

data_buffer = ffi.from_buffer(data)
zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer))
zresult = lib.ZSTD_getFrameHeader_advanced(params, data_buffer, len(data_buffer), format)
if lib.ZSTD_isError(zresult):
raise ZstdError(
"cannot get frame parameters: %s" % _zstd_error(zresult)
Expand Down Expand Up @@ -3820,13 +3822,12 @@ def decompress(

data_buffer = ffi.from_buffer(data)

output_size = lib.ZSTD_getFrameContentSize(
data_buffer, len(data_buffer)
)

if output_size == lib.ZSTD_CONTENTSIZE_ERROR:
params = ffi.new("ZSTD_frameHeader *")
zresult = lib.ZSTD_getFrameHeader_advanced(params, data_buffer, len(data_buffer), self._format)
if zresult != 0:
raise ZstdError("error determining content size from frame header")
elif output_size == 0:
output_size = params.frameContentSize
if output_size == 0:
return b""
elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
if not max_output_size:
Expand Down
Loading