-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prepare data processing release (#78)
- Loading branch information
1 parent
3815f50
commit 72f5cb1
Showing
15 changed files
with
323 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Encryption module for cshelve. | ||
""" | ||
from functools import partial | ||
from logging import Logger | ||
from typing import Dict | ||
|
||
from ._data_processing import DataProcessing | ||
from .exceptions import UnknownEncryptionAlgorithmError | ||
|
||
|
||
ALGORITHMS_NAME_KEY = "algorithm" | ||
COMPRESSION_LEVEL_KEY = "level" | ||
|
||
|
||
def configure( | ||
logger: Logger, data_processing: DataProcessing, config: Dict[str, str] | ||
) -> None: | ||
""" | ||
Configure the encryption algorithm. | ||
""" | ||
# Encryption is not configured, silently return. | ||
if not config: | ||
return | ||
|
||
if ALGORITHMS_NAME_KEY not in config: | ||
logger.info("No compression algorithm specified.") | ||
return | ||
|
||
algorithm = config[ALGORITHMS_NAME_KEY] | ||
|
||
supported_algorithms = { | ||
"aes256": _aes256, | ||
} | ||
|
||
if encryption := supported_algorithms.get(algorithm): | ||
logger.debug(f"Configuring encryption algorithm: {algorithm}") | ||
crypt_fct, decrypt_fct = encryption(config) | ||
data_processing.add_pre_processing(crypt_fct) | ||
data_processing.add_post_processing(decrypt_fct) | ||
logger.debug(f"Encryption algorithm {algorithm} configured.") | ||
else: | ||
raise UnknownEncryptionAlgorithmError( | ||
f"Unsupported encryption algorithm: {algorithm}" | ||
) | ||
|
||
|
||
def _aes256(config: Dict[str, str]): | ||
""" | ||
Configure aes256 encryption. | ||
""" | ||
import zlib | ||
|
||
crypt = lambda x: x | ||
decrypt = lambda x: x | ||
|
||
return crypt, decrypt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
Compression Configuration | ||
========================= | ||
|
||
*cshelve* supports compression to reduce the size of stored data. | ||
This is particularly useful when working with large datasets or to reduce network time. | ||
The compression algorithm can be configured using a configuration file. | ||
|
||
Configuration File | ||
################## | ||
|
||
The compression settings are specified in an INI file. | ||
Below is an example configuration file named `config.ini`: | ||
|
||
.. code-block:: ini | ||
[default] | ||
provider = in-memory | ||
persist-key = compression | ||
exists = true | ||
[compression] | ||
algorithm = zlib | ||
level = 1 | ||
In this example, the `algorithm` is set to `zlib`, and the `compression level <https://docs.python.org/3/library/zlib.html>`_ is set to `1`. | ||
|
||
Supported Algorithms | ||
##################### | ||
|
||
Currently, *cshelve* supports the following compression algorithms: | ||
|
||
- `zlib`: A widely-used compression library. | ||
|
||
Using Compression | ||
################# | ||
|
||
Once compression is configured as previously in the `config.ini` file, it will automatically compress data before storing it and decompress data when retrieving it. | ||
The application code doesn't need to be updated: | ||
|
||
.. code-block:: python | ||
import cshelve | ||
with cshelve.open('config.ini') as db: | ||
db['data'] = 'This is some data that will be compressed.' | ||
with cshelve.open('config.ini') as db: | ||
data = db['data'] | ||
print(data) # Output: This is some data that will be compressed. | ||
In this example, the data is compressed before being stored and decompressed when retrieved, thanks to the configuration. | ||
|
||
Error Handling | ||
############## | ||
|
||
If an unsupported compression algorithm is specified, *cshelve* will raise an `UnknownCompressionAlgorithmError`. | ||
Ensure that the algorithm specified in the configuration file is supported. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
examples/asterix-and-obelix-database/azure-compression.ini
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[default] | ||
provider = azure-blob | ||
account_url = https://dscccccccccccccc.blob.core.windows.net | ||
auth_type = passwordless | ||
container_name = example-asterix-and-obelix-database-compression | ||
|
||
[logging] | ||
http = true | ||
|
||
[compression] | ||
algorithm = zlib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
""" | ||
This module test data processing modules. | ||
""" | ||
import pickle | ||
import zlib | ||
import cshelve | ||
|
||
from helpers import unique_key | ||
|
||
|
||
def test_compression(): | ||
""" | ||
Ensure the data is compressed. | ||
""" | ||
compressed_configuration = "tests/configurations/in-memory/compression.ini" | ||
key_pattern = unique_key + "test_writeback" | ||
data = "this must be compressed" | ||
|
||
with cshelve.open(compressed_configuration) as db: | ||
db[key_pattern] = data | ||
|
||
assert ( | ||
pickle.loads(zlib.decompress(db.dict.db.db[key_pattern.encode()])) == data | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
Test the compression module. | ||
""" | ||
from unittest.mock import Mock | ||
import zlib | ||
|
||
import pytest | ||
|
||
from cshelve import UnknownCompressionAlgorithmError | ||
from cshelve._compression import configure | ||
from cshelve._data_processing import DataProcessing | ||
|
||
|
||
@pytest.fixture | ||
def data_processing(): | ||
return DataProcessing() | ||
|
||
|
||
def test_no_compression(data_processing): | ||
""" | ||
Ensure nothing si configure when the config is empty. | ||
""" | ||
logger = Mock() | ||
config = {} | ||
|
||
configure(logger, data_processing, config) | ||
|
||
assert len(data_processing.post_processing) == 0 | ||
assert len(data_processing.pre_processing) == 0 | ||
|
||
|
||
def test_default_zlib_config(data_processing): | ||
""" | ||
Ensure Zlib is configured when defined. | ||
If no level is provided, the default compression must be set. | ||
""" | ||
logger = Mock() | ||
config = {"algorithm": "zlib"} | ||
|
||
configure(logger, data_processing, config) | ||
|
||
assert len(data_processing.post_processing) == 1 | ||
assert len(data_processing.pre_processing) == 1 | ||
assert data_processing.pre_processing[0].func == zlib.compress | ||
assert data_processing.post_processing[0].func == zlib.decompress | ||
assert ( | ||
data_processing.pre_processing[0].keywords["level"] | ||
== zlib.Z_DEFAULT_COMPRESSION | ||
) | ||
assert data_processing.post_processing[0].keywords == {} | ||
|
||
first_pre_processing_applied = id(data_processing.pre_processing[0]) | ||
first_post_processing_applied = id(data_processing.post_processing[0]) | ||
|
||
# Ensure the same behaviours and order if configured twice. | ||
configure(logger, data_processing, config) | ||
|
||
assert len(data_processing.post_processing) == 2 | ||
assert len(data_processing.pre_processing) == 2 | ||
# Ensure the order is respected. | ||
assert first_pre_processing_applied == id(data_processing.pre_processing[0]) | ||
assert first_post_processing_applied == id(data_processing.post_processing[0]) | ||
|
||
|
||
def test_zlib_level(data_processing): | ||
""" | ||
Ensure the Zlib configuration level can be configured. | ||
""" | ||
logger = Mock() | ||
compression_level = 5 | ||
config = {"algorithm": "zlib", "level": compression_level} | ||
|
||
configure(logger, data_processing, config) | ||
|
||
assert data_processing.pre_processing[0].keywords["level"] == compression_level | ||
|
||
|
||
def test_unknowned_algorithm(data_processing): | ||
""" | ||
Ensure an exception is raised when an unknowed algorithm is provided. | ||
""" | ||
logger = Mock() | ||
config = {"algorithm": "unknow"} | ||
|
||
with pytest.raises(UnknownCompressionAlgorithmError): | ||
configure(logger, data_processing, config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.