Skip to content

Commit

Permalink
Prepare data processing release (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
florian-vuillemot authored Dec 16, 2024
1 parent 3815f50 commit 72f5cb1
Show file tree
Hide file tree
Showing 15 changed files with 323 additions and 8 deletions.
3 changes: 2 additions & 1 deletion cshelve/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pathlib import Path
import shelve

from .data_processing import DataProcessing
from ._data_processing import DataProcessing
from ._database import _Database
from ._compression import configure as _configure_compression
from ._factory import factory as _factory
Expand All @@ -25,6 +25,7 @@
KeyNotFoundError,
ReadOnlyError,
UnknownCompressionAlgorithmError,
UnknownEncryptionAlgorithmError,
UnknownProviderError,
)

Expand Down
3 changes: 1 addition & 2 deletions cshelve/_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from logging import Logger
from typing import Dict

from .data_processing import DataProcessing
from ._data_processing import DataProcessing
from .exceptions import UnknownCompressionAlgorithmError


Expand All @@ -24,7 +24,6 @@ def configure(
return

if ALGORITHMS_NAME_KEY not in config:
# Inform the user in case of missing configuration.
logger.info("No compression algorithm specified.")
return

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion cshelve/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections.abc import MutableMapping
from concurrent.futures import ThreadPoolExecutor

from .data_processing import DataProcessing
from ._data_processing import DataProcessing
from .provider_interface import ProviderInterface
from ._flag import can_create, can_write, clear_db
from .exceptions import (
Expand Down
57 changes: 57 additions & 0 deletions cshelve/_encryption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Encryption module for cshelve.
"""
from functools import partial
from logging import Logger
from typing import Dict

from ._data_processing import DataProcessing
from .exceptions import UnknownEncryptionAlgorithmError


ALGORITHMS_NAME_KEY = "algorithm"
COMPRESSION_LEVEL_KEY = "level"


def configure(
logger: Logger, data_processing: DataProcessing, config: Dict[str, str]
) -> None:
"""
Configure the encryption algorithm.
"""
# Encryption is not configured, silently return.
if not config:
return

if ALGORITHMS_NAME_KEY not in config:
logger.info("No compression algorithm specified.")
return

algorithm = config[ALGORITHMS_NAME_KEY]

supported_algorithms = {
"aes256": _aes256,
}

if encryption := supported_algorithms.get(algorithm):
logger.debug(f"Configuring encryption algorithm: {algorithm}")
crypt_fct, decrypt_fct = encryption(config)
data_processing.add_pre_processing(crypt_fct)
data_processing.add_post_processing(decrypt_fct)
logger.debug(f"Encryption algorithm {algorithm} configured.")
else:
raise UnknownEncryptionAlgorithmError(
f"Unsupported encryption algorithm: {algorithm}"
)


def _aes256(config: Dict[str, str]):
"""
Configure aes256 encryption.
"""
import zlib

crypt = lambda x: x
decrypt = lambda x: x

return crypt, decrypt
2 changes: 2 additions & 0 deletions cshelve/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def factory(logger: Logger, provider: str) -> ProviderInterface:


def _factory(logger: Logger, provider: str):
logger.info(f"Loading provider {provider}")

if provider == "azure-blob":
from ._azure_blob_storage import AzureBlobStorage

Expand Down
8 changes: 8 additions & 0 deletions cshelve/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ class UnknownCompressionAlgorithmError(RuntimeError):
pass


class UnknownEncryptionAlgorithmError(RuntimeError):
"""
Raised when the encryption algorithm provided is incorrect.
"""

pass


class KeyNotFoundError(KeyError):
"""
Raised when a resource is not found.
Expand Down
57 changes: 57 additions & 0 deletions doc/source/compression.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Compression Configuration
=========================

*cshelve* supports compression to reduce the size of stored data.
This is particularly useful when working with large datasets or to reduce network time.
The compression algorithm can be configured using a configuration file.

Configuration File
##################

The compression settings are specified in an INI file.
Below is an example configuration file named `config.ini`:

.. code-block:: ini
[default]
provider = in-memory
persist-key = compression
exists = true
[compression]
algorithm = zlib
level = 1
In this example, the `algorithm` is set to `zlib`, and the `compression level <https://docs.python.org/3/library/zlib.html>`_ is set to `1`.

Supported Algorithms
#####################

Currently, *cshelve* supports the following compression algorithms:

- `zlib`: A widely-used compression library.

Using Compression
#################

Once compression is configured as previously in the `config.ini` file, it will automatically compress data before storing it and decompress data when retrieving it.
The application code doesn't need to be updated:

.. code-block:: python
import cshelve
with cshelve.open('config.ini') as db:
db['data'] = 'This is some data that will be compressed.'
with cshelve.open('config.ini') as db:
data = db['data']
print(data) # Output: This is some data that will be compressed.
In this example, the data is compressed before being stored and decompressed when retrieved, thanks to the configuration.

Error Handling
##############

If an unsupported compression algorithm is specified, *cshelve* will raise an `UnknownCompressionAlgorithmError`.
Ensure that the algorithm specified in the configuration file is supported.
7 changes: 4 additions & 3 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ Table of contents
.. toctree::
:maxdepth: 1

introduction
tutorial
in-memory
azure-blob
compression
in-memory
introduction
logging
tutorial
writeback


Expand Down
11 changes: 11 additions & 0 deletions examples/asterix-and-obelix-database/azure-compression.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[default]
provider = azure-blob
account_url = https://dscccccccccccccc.blob.core.windows.net
auth_type = passwordless
container_name = example-asterix-and-obelix-database-compression

[logging]
http = true

[compression]
algorithm = zlib
3 changes: 3 additions & 0 deletions examples/asterix-and-obelix-database/run-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,8 @@ run_test $DB
DB=./azure-passwordless.ini
run_test $DB

DB=./azure-compression.ini
run_test $DB

echo "All tests passed."
exit 0
24 changes: 24 additions & 0 deletions tests/end-to-end/test_data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
This module test data processing modules.
"""
import pickle
import zlib
import cshelve

from helpers import unique_key


def test_compression():
"""
Ensure the data is compressed.
"""
compressed_configuration = "tests/configurations/in-memory/compression.ini"
key_pattern = unique_key + "test_writeback"
data = "this must be compressed"

with cshelve.open(compressed_configuration) as db:
db[key_pattern] = data

assert (
pickle.loads(zlib.decompress(db.dict.db.db[key_pattern.encode()])) == data
)
86 changes: 86 additions & 0 deletions tests/units/test_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Test the compression module.
"""
from unittest.mock import Mock
import zlib

import pytest

from cshelve import UnknownCompressionAlgorithmError
from cshelve._compression import configure
from cshelve._data_processing import DataProcessing


@pytest.fixture
def data_processing():
return DataProcessing()


def test_no_compression(data_processing):
"""
Ensure nothing si configure when the config is empty.
"""
logger = Mock()
config = {}

configure(logger, data_processing, config)

assert len(data_processing.post_processing) == 0
assert len(data_processing.pre_processing) == 0


def test_default_zlib_config(data_processing):
"""
Ensure Zlib is configured when defined.
If no level is provided, the default compression must be set.
"""
logger = Mock()
config = {"algorithm": "zlib"}

configure(logger, data_processing, config)

assert len(data_processing.post_processing) == 1
assert len(data_processing.pre_processing) == 1
assert data_processing.pre_processing[0].func == zlib.compress
assert data_processing.post_processing[0].func == zlib.decompress
assert (
data_processing.pre_processing[0].keywords["level"]
== zlib.Z_DEFAULT_COMPRESSION
)
assert data_processing.post_processing[0].keywords == {}

first_pre_processing_applied = id(data_processing.pre_processing[0])
first_post_processing_applied = id(data_processing.post_processing[0])

# Ensure the same behaviours and order if configured twice.
configure(logger, data_processing, config)

assert len(data_processing.post_processing) == 2
assert len(data_processing.pre_processing) == 2
# Ensure the order is respected.
assert first_pre_processing_applied == id(data_processing.pre_processing[0])
assert first_post_processing_applied == id(data_processing.post_processing[0])


def test_zlib_level(data_processing):
"""
Ensure the Zlib configuration level can be configured.
"""
logger = Mock()
compression_level = 5
config = {"algorithm": "zlib", "level": compression_level}

configure(logger, data_processing, config)

assert data_processing.pre_processing[0].keywords["level"] == compression_level


def test_unknowned_algorithm(data_processing):
"""
Ensure an exception is raised when an unknowed algorithm is provided.
"""
logger = Mock()
config = {"algorithm": "unknow"}

with pytest.raises(UnknownCompressionAlgorithmError):
configure(logger, data_processing, config)
2 changes: 1 addition & 1 deletion tests/units/test_database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest.mock import Mock

import pytest
from cshelve.data_processing import DataProcessing
from cshelve._data_processing import DataProcessing
from cshelve._database import _Database
from cshelve._in_memory import InMemory
from cshelve.exceptions import CanNotCreateDBError, DBDoesNotExistsError, ReadOnlyError
Expand Down
Loading

0 comments on commit 72f5cb1

Please sign in to comment.