Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use nvcomp defaults for algo options. #450

Merged
merged 4 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions python/kvikio/kvikio/_lib/libnvcomp_ll.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ from kvikio._lib.nvcomp_ll_cxx_api cimport (
nvcompBatchedLZ4CompressGetTempSize,
nvcompBatchedLZ4DecompressAsync,
nvcompBatchedLZ4DecompressGetTempSize,
nvcompBatchedLZ4DefaultOpts,
nvcompBatchedLZ4GetDecompressSizeAsync,
nvcompBatchedLZ4Opts_t,
)
Expand All @@ -371,20 +372,24 @@ class nvCompBatchAlgorithmLZ4(nvCompBatchAlgorithm):

HEADER_SIZE_BYTES: size_t = sizeof(uint32_t)

def __init__(self, data_type: int = 0, has_header: bool = True):
def __init__(self, data_type: int = None, has_header: bool = True):
madsbk marked this conversation as resolved.
Show resolved Hide resolved
"""Initialize the codec.
Parameters
----------
data_type: int
Source data type.
data_type: int or None
Source data type. If None, uses nvcomp default options.
has_header: bool
Whether the compressed data has a header.
This enables data compatibility between numcodecs LZ4 codec,
which has the header and nvCOMP LZ4 codec which does not
require the header.
"""
self.options = nvcompBatchedLZ4Opts_t(data_type)
if data_type is None:
self.options = nvcompBatchedLZ4DefaultOpts
else:
self.options = nvcompBatchedLZ4Opts_t(data_type)

self.has_header = has_header

# Note on LZ4 header structure: numcodecs LZ4 codec prepends
Expand Down Expand Up @@ -621,6 +626,7 @@ from kvikio._lib.nvcomp_ll_cxx_api cimport (
nvcompBatchedGdeflateCompressGetTempSize,
nvcompBatchedGdeflateDecompressAsync,
nvcompBatchedGdeflateDecompressGetTempSize,
nvcompBatchedGdeflateDefaultOpts,
nvcompBatchedGdeflateGetDecompressSizeAsync,
nvcompBatchedGdeflateOpts_t,
)
Expand All @@ -633,8 +639,11 @@ class nvCompBatchAlgorithmGdeflate(nvCompBatchAlgorithm):

options: nvcompBatchedGdeflateOpts_t

def __init__(self, algo: int = 0):
self.options = nvcompBatchedGdeflateOpts_t(algo)
def __init__(self, algo: int = None):
madsbk marked this conversation as resolved.
Show resolved Hide resolved
if algo is None:
self.options = nvcompBatchedGdeflateDefaultOpts
else:
self.options = nvcompBatchedGdeflateOpts_t(algo)

def _get_comp_temp_size(
self,
Expand Down Expand Up @@ -756,6 +765,7 @@ from kvikio._lib.nvcomp_ll_cxx_api cimport (
nvcompBatchedZstdCompressGetTempSize,
nvcompBatchedZstdDecompressAsync,
nvcompBatchedZstdDecompressGetTempSize,
nvcompBatchedZstdDefaultOpts,
nvcompBatchedZstdGetDecompressSizeAsync,
nvcompBatchedZstdOpts_t,
)
Expand All @@ -769,7 +779,7 @@ class nvCompBatchAlgorithmZstd(nvCompBatchAlgorithm):
options: nvcompBatchedZstdOpts_t

def __init__(self):
self.options = nvcompBatchedZstdOpts_t(0)
self.options = nvcompBatchedZstdDefaultOpts

def _get_comp_temp_size(
self,
Expand Down Expand Up @@ -891,6 +901,7 @@ from kvikio._lib.nvcomp_ll_cxx_api cimport (
nvcompBatchedSnappyCompressGetTempSize,
nvcompBatchedSnappyDecompressAsync,
nvcompBatchedSnappyDecompressGetTempSize,
nvcompBatchedSnappyDefaultOpts,
nvcompBatchedSnappyGetDecompressSizeAsync,
nvcompBatchedSnappyOpts_t,
)
Expand All @@ -904,7 +915,7 @@ class nvCompBatchAlgorithmSnappy(nvCompBatchAlgorithm):
options: nvcompBatchedSnappyOpts_t

def __init__(self):
self.options = nvcompBatchedSnappyOpts_t(0)
self.options = nvcompBatchedSnappyDefaultOpts

def _get_comp_temp_size(
self,
Expand Down Expand Up @@ -1026,6 +1037,7 @@ from kvikio._lib.nvcomp_ll_cxx_api cimport (
nvcompBatchedDeflateCompressGetTempSize,
nvcompBatchedDeflateDecompressAsync,
nvcompBatchedDeflateDecompressGetTempSize,
nvcompBatchedDeflateDefaultOpts,
nvcompBatchedDeflateGetDecompressSizeAsync,
nvcompBatchedDeflateOpts_t,
)
Expand All @@ -1038,14 +1050,17 @@ class nvCompBatchAlgorithmDeflate(nvCompBatchAlgorithm):

options: nvcompBatchedDeflateOpts_t

def __init__(self, algo: int = 0):
self.options = nvcompBatchedDeflateOpts_t(algo)
def __init__(self, algo: int = None):
madsbk marked this conversation as resolved.
Show resolved Hide resolved
if algo is None:
self.options = nvcompBatchedDeflateDefaultOpts
else:
self.options = nvcompBatchedDeflateOpts_t(algo)

def _get_comp_temp_size(
self,
size_t batch_size,
size_t max_uncompressed_chunk_bytes,
) -> (nvcompStatus_t, size_t):
) -> tuple[nvcompStatus_t, size_t]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, how did this ever work? I guess cython is less strict with parsing.

Copy link
Member

@jakirkham jakirkham Aug 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are valid, the type format we had before is called a ctuple and has been around in Cython longer

Support for typing.Tuple is relatively new

Here's an example in the docs (please see 2nd entry)

cdef size_t temp_bytes = 0

err = nvcompBatchedDeflateCompressGetTempSize(
Expand Down
10 changes: 10 additions & 0 deletions python/kvikio/kvikio/_lib/nvcomp_ll_cxx_api.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ cdef extern from "nvcomp/lz4.h" nogil:
ctypedef struct nvcompBatchedLZ4Opts_t:
nvcompType_t data_type

cdef nvcompBatchedLZ4Opts_t nvcompBatchedLZ4DefaultOpts

# Compression API.
cdef nvcompStatus_t nvcompBatchedLZ4CompressGetTempSize(
size_t batch_size,
Expand Down Expand Up @@ -109,6 +111,8 @@ cdef extern from "nvcomp/gdeflate.h" nogil:
ctypedef struct nvcompBatchedGdeflateOpts_t:
int algo

cdef nvcompBatchedGdeflateOpts_t nvcompBatchedGdeflateDefaultOpts

# Compression API.
cdef nvcompStatus_t nvcompBatchedGdeflateCompressGetTempSize(
size_t batch_size,
Expand Down Expand Up @@ -171,6 +175,8 @@ cdef extern from "nvcomp/zstd.h" nogil:
ctypedef struct nvcompBatchedZstdOpts_t:
int reserved

cdef nvcompBatchedZstdOpts_t nvcompBatchedZstdDefaultOpts

# Compression API.
cdef nvcompStatus_t nvcompBatchedZstdCompressGetTempSize(
size_t batch_size,
Expand Down Expand Up @@ -233,6 +239,8 @@ cdef extern from "nvcomp/snappy.h" nogil:
ctypedef struct nvcompBatchedSnappyOpts_t:
int reserved

cdef nvcompBatchedSnappyOpts_t nvcompBatchedSnappyDefaultOpts

# Compression API.
cdef nvcompStatus_t nvcompBatchedSnappyCompressGetTempSize(
size_t batch_size,
Expand Down Expand Up @@ -296,6 +304,8 @@ cdef extern from "nvcomp/deflate.h" nogil:
ctypedef struct nvcompBatchedDeflateOpts_t:
int algo

cdef nvcompBatchedDeflateOpts_t nvcompBatchedDeflateDefaultOpts

# Compression API.
cdef nvcompStatus_t nvcompBatchedDeflateCompressGetTempSize(
size_t batch_size,
Expand Down