From 5be3cbc6c4c0764ec47a6a50b9dd7e6ab67d1736 Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sat, 21 Dec 2024 12:51:42 +0100 Subject: [PATCH 01/11] encryption retrieve key --- .devcontainer/Dockerfile | 2 +- CHANGELOG.md | 4 ++ cshelve/__init__.py | 7 ++++ cshelve/_encryption.py | 69 ++++++++++++++++++++++++++++---- cshelve/exceptions.py | 16 ++++++++ doc/source/encryption.rst | 83 +++++++++++++++++++++++++++++++++++++++ pyproject.toml | 4 ++ uv.lock | 36 ++++++++++++++++- 8 files changed, 210 insertions(+), 11 deletions(-) create mode 100644 doc/source/encryption.rst diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e7da7b7..b737b43 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,7 +12,7 @@ RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash USER vscode # Copy the project files then build the virtual environment -COPY --chown=vscode:vscode pyproject.toml uv.lock /build/package +COPY --chown=vscode:vscode . /build/package RUN cd /build/package && uv venv --python 3.9 && uv sync --dev # Copy the project files to the workspace diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eb4490..a86c895 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [0.9.0] - 2024- +### Added +- Allow data encryption + ## [0.8.0] - 2024-12-17 ### Added - Allow data compression diff --git a/cshelve/__init__.py b/cshelve/__init__.py index e17dd20..af8da35 100644 --- a/cshelve/__init__.py +++ b/cshelve/__init__.py @@ -13,6 +13,7 @@ from ._data_processing import DataProcessing from ._database import _Database from ._compression import configure as _configure_compression +from ._encryption import configure as _configure_encryption from ._factory import factory as _factory from ._parser import load as _config_loader from ._parser import use_local_shelf @@ -22,7 +23,9 @@ CanNotCreateDBError, ConfigurationError, DBDoesNotExistsError, + EncryptionKeyNotDefinedError, KeyNotFoundError, + NoEncryptionKeyError, ReadOnlyError, UnknownCompressionAlgorithmError, UnknownEncryptionAlgorithmError, @@ -37,11 +40,14 @@ "ConfigurationError", "DataProcessing", "DBDoesNotExistsError", + "EncryptionKeyNotDefinedError", "KeyNotFoundError", + "NoEncryptionKeyError", "open", "ReadOnlyError", "ResourceNotFoundError", "UnknownCompressionAlgorithmError", + "UnknownEncryptionAlgorithmError", "UnknownProviderError", ] @@ -76,6 +82,7 @@ def __init__( # Data processing object used to apply pre and post processing to the data. data_processing = DataProcessing() _configure_compression(logger, data_processing, config.compression) + _configure_encryption(logger, data_processing, config.encryption) # The CloudDatabase object is the class that interacts with the cloud storage backend. # This class doesn't perform or respect the shelve.Shelf logic and interface so we need to wrap it. diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index dedd46e..687add3 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -1,16 +1,24 @@ """ Encryption module for cshelve. """ +import os from functools import partial from logging import Logger from typing import Dict from ._data_processing import DataProcessing -from .exceptions import UnknownEncryptionAlgorithmError +from .exceptions import ( + UnknownEncryptionAlgorithmError, + NoEncryptionKeyError, + EncryptionKeyNotDefinedError, +) +# Key that can be defined in the INI file. ALGORITHMS_NAME_KEY = "algorithm" -COMPRESSION_LEVEL_KEY = "level" +## User can provide the key via the ini file or env variable. +KEY_KEY = "key" +ENVIRONMENT_KEY = "environment_key" def configure( @@ -24,18 +32,20 @@ def configure( return if ALGORITHMS_NAME_KEY not in config: - logger.info("No compression algorithm specified.") + logger.info("No encryption algorithm specified.") return algorithm = config[ALGORITHMS_NAME_KEY] + key = _get_key(logger, config) + supported_algorithms = { "aes256": _aes256, } if encryption := supported_algorithms.get(algorithm): logger.debug(f"Configuring encryption algorithm: {algorithm}") - crypt_fct, decrypt_fct = encryption(config) + crypt_fct, decrypt_fct = encryption(config, key) data_processing.add_pre_processing(crypt_fct) data_processing.add_post_processing(decrypt_fct) logger.debug(f"Encryption algorithm {algorithm} configured.") @@ -45,13 +55,56 @@ def configure( ) -def _aes256(config: Dict[str, str]): +def _get_key(logger, config) -> bytes: + if env_key := config.get(ENVIRONMENT_KEY): + if key := os.environ.get(env_key): + return key.encode() + logger.error( + f"Encryption key is configured to use use environment variable but environment variable '{ENVIRONMENT_KEY}' doesn't not exists." + ) + raise EncryptionKeyNotDefinedError( + f"Environment variable '{ENVIRONMENT_KEY}' not found." + ) + + if key := config.get(KEY_KEY): + logger.info( + "Encryption is based on a key defined in the config file and not an environment variable." + ) + return key.encode() + + logger.error("Encryption is specified without key.") + raise NoEncryptionKeyError("Encryption is specified without key.") + + +def _aes256(config: Dict[str, str], key: bytes): """ Configure aes256 encryption. """ - import zlib + from Crypto.Cipher import AES - crypt = lambda x: x - decrypt = lambda x: x + crypt = partial(_crypt, AES, key) + decrypt = partial(_decrypt, AES, key) return crypt, decrypt + + +def _crypt(AES, key: bytes, data: bytes) -> bytes: + cipher = AES.new(key, AES.MODE_EAX) + ciphertext, tag = cipher.encrypt_and_digest(data) + + res = bytes() + res += tag + res += cipher.nonce + res += ciphertext + + return res + + +def _decrypt(AES, key: bytes, data: bytes) -> bytes: + tag = data[:16] + nonce = data[16 : 16 + 12] + + cipher = AES.new(key, AES.MODE_EAX, nonce=nonce) + plaintext = cipher.decrypt(data) + + return plaintext diff --git a/cshelve/exceptions.py b/cshelve/exceptions.py index 68fa29a..65c8e1b 100644 --- a/cshelve/exceptions.py +++ b/cshelve/exceptions.py @@ -30,6 +30,22 @@ class UnknownEncryptionAlgorithmError(RuntimeError): pass +class NoEncryptionKeyError(RuntimeError): + """ + Raised when there is no encryption key provided. + """ + + pass + + +class EncryptionKeyNotDefinedError(RuntimeError): + """ + Raised when there is no encryption key defined as environment variable. + """ + + pass + + class KeyNotFoundError(KeyError): """ Raised when a resource is not found. diff --git a/doc/source/encryption.rst b/doc/source/encryption.rst new file mode 100644 index 0000000..b8153a7 --- /dev/null +++ b/doc/source/encryption.rst @@ -0,0 +1,83 @@ +Encryption Configuration +======================== + +*cshelve* supports data encryption before sending data to the provider. +This is particularly useful when user want to ensure the non modification of the data and reduce potential attacks via pickles. + +Installation +############ + +The encryption functionality is not natively installed, to install it, run: + +.. code-block:: console + + $ pip install cshelve[encryption] + + +Configuration File +################## + +The encryption settings are specified in an INI file. +Below is an example configuration file named `config.ini`: + +.. code-block:: ini + + [default] + provider = in-memory + persist-key = compression + exists = true + + [encryption] + algorithm = aes256 + # Development configuration putting the key in the config file. + key = "my encryption key" + +In this example, the `algorithm` is set to `aes256`, and the `encryption key` is set to `my encryption key`. + +For security purpose its better to not put the key in the config file and use environment variable to provide it. +Here the same example using an environment variable named `ENCRYPTION_KEY`: + +.. code-block:: ini + + [default] + provider = in-memory + persist-key = compression + exists = true + + [encryption] + algorithm = aes256 + # Here the environment variable containing the encryption key is named ENCRYPTION_KEY. + environment_key = ENCRYPTION_KEY + + +Supported Algorithms +##################### + +Currently, *cshelve* supports the following encryption algorithms: + +- `aes256`: A widely-used symetric encryption library. + +Using Encryption +################# + +Once encryption is configured as previously in the `config.ini` file, it will automatically crypt data before storing it and decrypt data when retrieving it. +The application code doesn't need to be updated: + +.. code-block:: python + + import cshelve + + with cshelve.open('config.ini') as db: + db['data'] = 'This is some data that will be encrypt.' + + with cshelve.open('config.ini') as db: + data = db['data'] + print(data) # Output: This is some data that will be encrypt. + +In this example, the data is encrypt before being stored and decrypt when retrieved, thanks to the configuration. + +Error Handling +############## + +If an unsupported compression algorithm is specified, *cshelve* will raise an `UnknownEncryptionAlgorithmError`. +Ensure that the algorithm specified in the configuration file is supported. diff --git a/pyproject.toml b/pyproject.toml index 92889c4..847f432 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dev-dependencies = [ "ruff>=0.6.9", "sphinx-autobuild>=2024.10.3", "sphinx>=7.4.7", + "pycryptodome>=3.21.0", ] [tool.uv.sources] @@ -50,3 +51,6 @@ azure-blob = [ "azure-identity>=1.19.0", "azure-storage-blob>=12.23.1", ] +encryption = [ + "pycryptodome>=3.21.0", +] diff --git a/uv.lock b/uv.lock index a080a2f..5580429 100644 --- a/uv.lock +++ b/uv.lock @@ -401,14 +401,17 @@ wheels = [ [[package]] name = "cshelve" -version = "0.4.1" -source = { virtual = "." } +version = "0.8.0" +source = { editable = "." } [package.optional-dependencies] azure-blob = [ { name = "azure-identity" }, { name = "azure-storage-blob" }, ] +encryption = [ + { name = "pycryptodome" }, +] [package.dev-dependencies] dev = [ @@ -429,6 +432,7 @@ dev = [ requires-dist = [ { name = "azure-identity", marker = "extra == 'azure-blob'", specifier = ">=1.19.0" }, { name = "azure-storage-blob", marker = "extra == 'azure-blob'", specifier = ">=12.23.1" }, + { name = "pycryptodome", marker = "extra == 'encryption'", specifier = ">=3.21.0" }, ] [package.metadata.requires-dev] @@ -927,6 +931,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, ] +[[package]] +name = "pycryptodome" +version = "3.21.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/52/13b9db4a913eee948152a079fe58d035bd3d1a519584155da8e786f767e6/pycryptodome-3.21.0.tar.gz", hash = "sha256:f7787e0d469bdae763b876174cf2e6c0f7be79808af26b1da96f1a64bcf47297", size = 4818071 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/88/5e83de10450027c96c79dc65ac45e9d0d7a7fef334f39d3789a191f33602/pycryptodome-3.21.0-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:2480ec2c72438430da9f601ebc12c518c093c13111a5c1644c82cdfc2e50b1e4", size = 2495937 }, + { url = "https://files.pythonhosted.org/packages/66/e1/8f28cd8cf7f7563319819d1e172879ccce2333781ae38da61c28fe22d6ff/pycryptodome-3.21.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:de18954104667f565e2fbb4783b56667f30fb49c4d79b346f52a29cb198d5b6b", size = 1634629 }, + { url = "https://files.pythonhosted.org/packages/6a/c1/f75a1aaff0c20c11df8dc8e2bf8057e7f73296af7dfd8cbb40077d1c930d/pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de4b7263a33947ff440412339cb72b28a5a4c769b5c1ca19e33dd6cd1dcec6e", size = 2168708 }, + { url = "https://files.pythonhosted.org/packages/ea/66/6f2b7ddb457b19f73b82053ecc83ba768680609d56dd457dbc7e902c41aa/pycryptodome-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0714206d467fc911042d01ea3a1847c847bc10884cf674c82e12915cfe1649f8", size = 2254555 }, + { url = "https://files.pythonhosted.org/packages/2c/2b/152c330732a887a86cbf591ed69bd1b489439b5464806adb270f169ec139/pycryptodome-3.21.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d85c1b613121ed3dbaa5a97369b3b757909531a959d229406a75b912dd51dd1", size = 2294143 }, + { url = "https://files.pythonhosted.org/packages/55/92/517c5c498c2980c1b6d6b9965dffbe31f3cd7f20f40d00ec4069559c5902/pycryptodome-3.21.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:8898a66425a57bcf15e25fc19c12490b87bd939800f39a03ea2de2aea5e3611a", size = 2160509 }, + { url = "https://files.pythonhosted.org/packages/39/1f/c74288f54d80a20a78da87df1818c6464ac1041d10988bb7d982c4153fbc/pycryptodome-3.21.0-cp36-abi3-musllinux_1_2_i686.whl", hash = "sha256:932c905b71a56474bff8a9c014030bc3c882cee696b448af920399f730a650c2", size = 2329480 }, + { url = "https://files.pythonhosted.org/packages/39/1b/d0b013bf7d1af7cf0a6a4fce13f5fe5813ab225313755367b36e714a63f8/pycryptodome-3.21.0-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:18caa8cfbc676eaaf28613637a89980ad2fd96e00c564135bf90bc3f0b34dd93", size = 2254397 }, + { url = "https://files.pythonhosted.org/packages/14/71/4cbd3870d3e926c34706f705d6793159ac49d9a213e3ababcdade5864663/pycryptodome-3.21.0-cp36-abi3-win32.whl", hash = "sha256:280b67d20e33bb63171d55b1067f61fbd932e0b1ad976b3a184303a3dad22764", size = 1775641 }, + { url = "https://files.pythonhosted.org/packages/43/1d/81d59d228381576b92ecede5cd7239762c14001a828bdba30d64896e9778/pycryptodome-3.21.0-cp36-abi3-win_amd64.whl", hash = "sha256:b7aa25fc0baa5b1d95b7633af4f5f1838467f1815442b22487426f94e0d66c53", size = 1812863 }, + { url = "https://files.pythonhosted.org/packages/25/b3/09ff7072e6d96c9939c24cf51d3c389d7c345bf675420355c22402f71b68/pycryptodome-3.21.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:2cb635b67011bc147c257e61ce864879ffe6d03342dc74b6045059dfbdedafca", size = 1691593 }, + { url = "https://files.pythonhosted.org/packages/a8/91/38e43628148f68ba9b68dedbc323cf409e537fd11264031961fd7c744034/pycryptodome-3.21.0-pp27-pypy_73-win32.whl", hash = "sha256:4c26a2f0dc15f81ea3afa3b0c87b87e501f235d332b7f27e2225ecb80c0b1cdd", size = 1765997 }, + { url = "https://files.pythonhosted.org/packages/08/16/ae464d4ac338c1dd41f89c41f9488e54f7d2a3acf93bb920bb193b99f8e3/pycryptodome-3.21.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d5ebe0763c982f069d3877832254f64974139f4f9655058452603ff559c482e8", size = 1615855 }, + { url = "https://files.pythonhosted.org/packages/1e/8c/b0cee957eee1950ce7655006b26a8894cee1dc4b8747ae913684352786eb/pycryptodome-3.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ee86cbde706be13f2dec5a42b52b1c1d1cbb90c8e405c68d0755134735c8dc6", size = 1650018 }, + { url = "https://files.pythonhosted.org/packages/93/4d/d7138068089b99f6b0368622e60f97a577c936d75f533552a82613060c58/pycryptodome-3.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fd54003ec3ce4e0f16c484a10bc5d8b9bd77fa662a12b85779a2d2d85d67ee0", size = 1687977 }, + { url = "https://files.pythonhosted.org/packages/96/02/90ae1ac9f28be4df0ed88c127bf4acc1b102b40053e172759d4d1c54d937/pycryptodome-3.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5dfafca172933506773482b0e18f0cd766fd3920bd03ec85a283df90d8a17bc6", size = 1788273 }, + { url = "https://files.pythonhosted.org/packages/04/cf/72831e972d2bd94f7ea8d8364b00f2bac2e848a601d6cff12376543152bb/pycryptodome-3.21.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:590ef0898a4b0a15485b05210b4a1c9de8806d3ad3d47f74ab1dc07c67a6827f", size = 1615737 }, + { url = "https://files.pythonhosted.org/packages/ce/b2/7b8b846ed3340cf266cc1fc57cc308fb4e569847f728e18d8e7c89954973/pycryptodome-3.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f35e442630bc4bc2e1878482d6f59ea22e280d7121d7adeaedba58c23ab6386b", size = 1649932 }, + { url = "https://files.pythonhosted.org/packages/95/87/de5181de6e82aadd94ff6f1f6a58164b199f9bb953897682aa3bd0773c2f/pycryptodome-3.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff99f952db3db2fbe98a0b355175f93ec334ba3d01bbde25ad3a5a33abc02b58", size = 1687888 }, + { url = "https://files.pythonhosted.org/packages/33/c2/c7b6f7a9a7eb50f478804b933e64de5dcdc6726881d9004e0cb857a8b8ff/pycryptodome-3.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8acd7d34af70ee63f9a849f957558e49a98f8f1634f86a59d2be62bb8e93f71c", size = 1788556 }, +] + [[package]] name = "pygments" version = "2.18.0" From 099d6b48a283efb650a8453fdac8f65467b7c894 Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sat, 21 Dec 2024 15:55:29 +0100 Subject: [PATCH 02/11] add structure to keep encryption data --- cshelve/__init__.py | 3 +- cshelve/_encryption.py | 70 ++++++++++---- cshelve/_parser.py | 10 +- cshelve/exceptions.py | 8 ++ doc/source/encryption.rst | 3 + tests/configurations/in-memory/encryption.ini | 8 ++ tests/end-to-end/test_data_processing.py | 21 +++- tests/units/test_encryption.py | 96 +++++++++---------- 8 files changed, 150 insertions(+), 69 deletions(-) create mode 100644 tests/configurations/in-memory/encryption.ini diff --git a/cshelve/__init__.py b/cshelve/__init__.py index af8da35..120c96c 100644 --- a/cshelve/__init__.py +++ b/cshelve/__init__.py @@ -22,6 +22,7 @@ AuthTypeError, CanNotCreateDBError, ConfigurationError, + DataCorruptionError, DBDoesNotExistsError, EncryptionKeyNotDefinedError, KeyNotFoundError, @@ -38,7 +39,7 @@ "AuthTypeError", "CanNotCreateDBError", "ConfigurationError", - "DataProcessing", + "DataCorruptionError", "DBDoesNotExistsError", "EncryptionKeyNotDefinedError", "KeyNotFoundError", diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index 687add3..d317d97 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -2,6 +2,8 @@ Encryption module for cshelve. """ import os +import struct +from collections import namedtuple from functools import partial from logging import Logger from typing import Dict @@ -11,6 +13,7 @@ UnknownEncryptionAlgorithmError, NoEncryptionKeyError, EncryptionKeyNotDefinedError, + DataCorruptionError, ) @@ -21,6 +24,16 @@ ENVIRONMENT_KEY = "environment_key" +# Normally the 'tag' is using 16 bytes and the 'nonce' 12 bytes. +# But, by security and for the futur, we keep their length in a this dedicated data structure. +# With also keep the algorithm as a unsigned char. +MessageInformation = namedtuple( + "MessageInformation", ["algorithm", "len_tag", "len_nonce", "message"] +) +# Hold the encrypted message. +Message = namedtuple("Message", ["tag", "nonce", "encrypted_data"]) + + def configure( logger: Logger, data_processing: DataProcessing, config: Dict[str, str] ) -> None: @@ -40,12 +53,13 @@ def configure( key = _get_key(logger, config) supported_algorithms = { - "aes256": _aes256, + "aes256": (_aes256, 1), } - if encryption := supported_algorithms.get(algorithm): + if algorithm := supported_algorithms.get(algorithm): + fct, algo_signature = algorithm logger.debug(f"Configuring encryption algorithm: {algorithm}") - crypt_fct, decrypt_fct = encryption(config, key) + crypt_fct, decrypt_fct = fct(algo_signature, config, key) data_processing.add_pre_processing(crypt_fct) data_processing.add_post_processing(decrypt_fct) logger.debug(f"Encryption algorithm {algorithm} configured.") @@ -76,35 +90,55 @@ def _get_key(logger, config) -> bytes: raise NoEncryptionKeyError("Encryption is specified without key.") -def _aes256(config: Dict[str, str], key: bytes): +def _aes256(signature, config: Dict[str, str], key: bytes): """ Configure aes256 encryption. """ from Crypto.Cipher import AES - crypt = partial(_crypt, AES, key) - decrypt = partial(_decrypt, AES, key) + crypt = partial(_crypt, signature, AES, key) + decrypt = partial(_decrypt, signature, AES, key) return crypt, decrypt -def _crypt(AES, key: bytes, data: bytes) -> bytes: +def _crypt(signature, AES, key: bytes, data: bytes) -> bytes: cipher = AES.new(key, AES.MODE_EAX) - ciphertext, tag = cipher.encrypt_and_digest(data) + encrypted_data, tag = cipher.encrypt_and_digest(data) + + message = Message(tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data) + + info = MessageInformation( + algorithm=signature, + len_tag=len(tag), + len_nonce=len(cipher.nonce), + message=message.tag + message.nonce + message.encrypted_data, + ) + + return struct.pack( + f" bytes: + message_len = len(bytes) - 8 * 3 # 3 bytes (b) + info = MessageInformation._make(struct.unpack(f" bytes: - tag = data[:16] - nonce = data[16 : 16 + 12] + plaintext = cipher.decrypt(message.encrypted_data) - cipher = AES.new(key, AES.MODE_EAX, nonce=nonce) - plaintext = cipher.decrypt(data) + try: + cipher.verify(message.tag) + except ValueError: + raise DataCorruptionError("The encrypted data was modified.") return plaintext diff --git a/cshelve/_parser.py b/cshelve/_parser.py index 6057328..cf83879 100644 --- a/cshelve/_parser.py +++ b/cshelve/_parser.py @@ -21,10 +21,14 @@ LOGGING_KEY_STORE = "logging" # Compression configuration section. COMPRESSION_KEY_STORE = "compression" +# Encryption configuration section. +ENCRYPTION_KEY_STORE = "encryption" # Tuple containing the provider name and its configuration. -Config = namedtuple("Config", ["provider", "default", "logging", "compression"]) +Config = namedtuple( + "Config", ["provider", "default", "logging", "compression", "encryption"] +) def use_local_shelf(filename: Path) -> bool: @@ -47,6 +51,9 @@ def load(logger: Logger, filename: Path) -> Tuple[str, Dict[str, str]]: compression_config = ( config[COMPRESSION_KEY_STORE] if COMPRESSION_KEY_STORE in config else {} ) + encryption_config = ( + config[ENCRYPTION_KEY_STORE] if ENCRYPTION_KEY_STORE in config else {} + ) logger.debug(f"Configuration file '{filename}' loaded.") return Config( @@ -54,4 +61,5 @@ def load(logger: Logger, filename: Path) -> Tuple[str, Dict[str, str]]: default=c, logging=logging_config, compression=compression_config, + encryption=encryption_config, ) diff --git a/cshelve/exceptions.py b/cshelve/exceptions.py index 65c8e1b..8eefa53 100644 --- a/cshelve/exceptions.py +++ b/cshelve/exceptions.py @@ -46,6 +46,14 @@ class EncryptionKeyNotDefinedError(RuntimeError): pass +class DataCorruptionError(RuntimeError): + """ + Raised when a data is not accessible due to a corruption. + """ + + pass + + class KeyNotFoundError(KeyError): """ Raised when a resource is not found. diff --git a/doc/source/encryption.rst b/doc/source/encryption.rst index b8153a7..8de449c 100644 --- a/doc/source/encryption.rst +++ b/doc/source/encryption.rst @@ -4,6 +4,8 @@ Encryption Configuration *cshelve* supports data encryption before sending data to the provider. This is particularly useful when user want to ensure the non modification of the data and reduce potential attacks via pickles. +**Note:** Only the value (the pickle) is encrypted, not the key. + Installation ############ @@ -68,6 +70,7 @@ The application code doesn't need to be updated: import cshelve with cshelve.open('config.ini') as db: + # The message will be encrypted but not the key 'data'. db['data'] = 'This is some data that will be encrypt.' with cshelve.open('config.ini') as db: diff --git a/tests/configurations/in-memory/encryption.ini b/tests/configurations/in-memory/encryption.ini new file mode 100644 index 0000000..a416126 --- /dev/null +++ b/tests/configurations/in-memory/encryption.ini @@ -0,0 +1,8 @@ +[default] +provider = in-memory +persist-key = encryption +exists = true + +[encryption] +algorithm = aes256 +key = 'best key ever' diff --git a/tests/end-to-end/test_data_processing.py b/tests/end-to-end/test_data_processing.py index e7ebe2a..735e423 100644 --- a/tests/end-to-end/test_data_processing.py +++ b/tests/end-to-end/test_data_processing.py @@ -3,6 +3,9 @@ """ import pickle import zlib + +import pytest + import cshelve from helpers import unique_key @@ -13,7 +16,7 @@ def test_compression(): Ensure the data is compressed. """ compressed_configuration = "tests/configurations/in-memory/compression.ini" - key_pattern = unique_key + "test_writeback" + key_pattern = unique_key + "test_compression" data = "this must be compressed" with cshelve.open(compressed_configuration) as db: @@ -22,3 +25,19 @@ def test_compression(): assert ( pickle.loads(zlib.decompress(db.dict.db.db[key_pattern.encode()])) == data ) + + +def test_encryption(): + """ + Ensure the data is encrypted. + """ + encryption_configuration = "tests/configurations/in-memory/encryption.ini" + key_pattern = unique_key + "test_encryption" + data = "this must be encrypted" + + with cshelve.open(encryption_configuration) as db: + db[key_pattern] = data + + with pytest.raises(Exception): + # Can't unpickled an encrypted data. + pickle.loads(db.dict.db.db[key_pattern.encode()]) diff --git a/tests/units/test_encryption.py b/tests/units/test_encryption.py index 147193a..ca1aecc 100644 --- a/tests/units/test_encryption.py +++ b/tests/units/test_encryption.py @@ -1,66 +1,66 @@ -# """ -# Test the encryption module. -# """ -# from unittest.mock import Mock +""" +Test the encryption module. +""" +from unittest.mock import Mock -# import pytest +import pytest -# from cshelve import UnknownEncryptionAlgorithmError -# from cshelve._encryption import configure -# from cshelve._data_processing import DataProcessing +from cshelve import UnknownEncryptionAlgorithmError +from cshelve._encryption import configure +from cshelve._data_processing import DataProcessing -# @pytest.fixture -# def data_processing(): -# return DataProcessing() +@pytest.fixture +def data_processing(): + return DataProcessing() -# def test_no_encryption(data_processing): -# """ -# Ensure nothing si configure when the config is empty. -# """ -# logger = Mock() -# config = {} +def test_no_encryption(data_processing): + """ + Ensure nothing si configure when the config is empty. + """ + logger = Mock() + config = {} -# configure(logger, data_processing, config) + configure(logger, data_processing, config) -# assert len(data_processing.post_processing) == 0 -# assert len(data_processing.pre_processing) == 0 + assert len(data_processing.post_processing) == 0 + assert len(data_processing.pre_processing) == 0 -# def test_default_aes256_config(data_processing): -# """ -# Ensure AES256 is configured when defined. -# """ -# logger = Mock() -# config = {"algorithm": "aes256"} +def test_default_aes256_config(data_processing): + """ + Ensure AES256 is configured when defined. + """ + logger = Mock() + config = {"algorithm": "aes256"} -# configure(logger, data_processing, config) + configure(logger, data_processing, config) -# assert len(data_processing.post_processing) == 1 -# assert len(data_processing.pre_processing) == 1 -# assert data_processing.pre_processing[0].func == None -# assert data_processing.post_processing[0].func == None + assert len(data_processing.post_processing) == 1 + assert len(data_processing.pre_processing) == 1 + assert data_processing.pre_processing[0].func == None + assert data_processing.post_processing[0].func == None -# first_pre_processing_applied = id(data_processing.pre_processing[0]) -# first_post_processing_applied = id(data_processing.post_processing[0]) + first_pre_processing_applied = id(data_processing.pre_processing[0]) + first_post_processing_applied = id(data_processing.post_processing[0]) -# # Ensure the same behaviours and order if configured twice. -# configure(logger, data_processing, config) + # Ensure the same behaviours and order if configured twice. + configure(logger, data_processing, config) -# assert len(data_processing.post_processing) == 2 -# assert len(data_processing.pre_processing) == 2 -# # Ensure the order is respected. -# assert first_pre_processing_applied == id(data_processing.pre_processing[0]) -# assert first_post_processing_applied == id(data_processing.post_processing[0]) + assert len(data_processing.post_processing) == 2 + assert len(data_processing.pre_processing) == 2 + # Ensure the order is respected. + assert first_pre_processing_applied == id(data_processing.pre_processing[0]) + assert first_post_processing_applied == id(data_processing.post_processing[0]) -# def test_unknowned_algorithm(data_processing): -# """ -# Ensure an exception is raised when an unknowed algorithm is provided. -# """ -# logger = Mock() -# config = {"algorithm": "unknow"} +def test_unknowned_algorithm(data_processing): + """ + Ensure an exception is raised when an unknowed algorithm is provided. + """ + logger = Mock() + config = {"algorithm": "unknow"} -# with pytest.raises(UnknownEncryptionAlgorithmError): -# configure(logger, data_processing, config) + with pytest.raises(UnknownEncryptionAlgorithmError): + configure(logger, data_processing, config) From 522b467c012d76ca7cd61dfd39b1d2bc17ff5d1e Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 08:18:18 +0000 Subject: [PATCH 03/11] Remove useless exception --- cshelve/__init__.py | 2 - cshelve/_encryption.py | 9 ++-- cshelve/exceptions.py | 8 ---- tests/configurations/in-memory/encryption.ini | 2 +- ...ing.py => test_buildin_data_processing.py} | 0 tests/units/test_azure_blob_storage.py | 1 - tests/units/test_cloud_shelf.py | 8 ++-- tests/units/test_encryption.py | 44 ++++++++++++++++--- tests/units/test_open.py | 3 +- tests/units/test_protocol_interface.py | 2 +- 10 files changed, 52 insertions(+), 27 deletions(-) rename tests/end-to-end/{test_data_processing.py => test_buildin_data_processing.py} (100%) diff --git a/cshelve/__init__.py b/cshelve/__init__.py index 120c96c..53c60b3 100644 --- a/cshelve/__init__.py +++ b/cshelve/__init__.py @@ -24,7 +24,6 @@ ConfigurationError, DataCorruptionError, DBDoesNotExistsError, - EncryptionKeyNotDefinedError, KeyNotFoundError, NoEncryptionKeyError, ReadOnlyError, @@ -41,7 +40,6 @@ "ConfigurationError", "DataCorruptionError", "DBDoesNotExistsError", - "EncryptionKeyNotDefinedError", "KeyNotFoundError", "NoEncryptionKeyError", "open", diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index d317d97..03f89e7 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -12,7 +12,6 @@ from .exceptions import ( UnknownEncryptionAlgorithmError, NoEncryptionKeyError, - EncryptionKeyNotDefinedError, DataCorruptionError, ) @@ -76,7 +75,7 @@ def _get_key(logger, config) -> bytes: logger.error( f"Encryption key is configured to use use environment variable but environment variable '{ENVIRONMENT_KEY}' doesn't not exists." ) - raise EncryptionKeyNotDefinedError( + raise NoEncryptionKeyError( f"Environment variable '{ENVIRONMENT_KEY}' not found." ) @@ -116,7 +115,11 @@ def _crypt(signature, AES, key: bytes, data: bytes) -> bytes: ) return struct.pack( - f" Date: Sun, 22 Dec 2024 08:45:52 +0000 Subject: [PATCH 04/11] fix tests --- .env.example | 1 + .github/workflows/quality.yml | 1 + .github/workflows/tests.yml | 2 ++ cshelve/_encryption.py | 8 +++++--- tests/configurations/azure-blob/encryption.ini | 9 +++++++++ tests/end-to-end/test_api.py | 2 ++ tests/end-to-end/test_large.py | 2 ++ tests/end-to-end/test_writeback.py | 2 ++ 8 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 tests/configurations/azure-blob/encryption.ini diff --git a/.env.example b/.env.example index 8d41dd4..ab5d5ec 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,3 @@ AZURE_STORAGE_ACCESS_KEY="" AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;" +ENCRYPTION_KEY="Sixteen byte key" diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 9a13a8b..3ba41d0 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -167,3 +167,4 @@ jobs: env: # Local connection string for Azurite. AZURE_STORAGE_CONNECTION_STRING: "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + ENCRYPTION_KEY: "Sixteen byte key" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e18cb81..ca7ee96 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -57,6 +57,7 @@ jobs: run: uv run --python ${{ matrix.python-version }} pytest -m "azure" tests/end-to-end env: AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }} + ENCRYPTION_KEY: "Sixteen byte key" AZURE_STORAGE_ACCESS_KEY: ${{ secrets.AZURE_STORAGE_ACCESS_KEY }} - name: Run end-to-end tests supporting Azurite @@ -67,3 +68,4 @@ jobs: uv run --python ${{ matrix.python-version }} pytest -m "not azure" tests/end-to-end env: AZURE_STORAGE_CONNECTION_STRING: "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + ENCRYPTION_KEY: "Sixteen byte key" diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index 03f89e7..dcb36f2 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -124,15 +124,17 @@ def _crypt(signature, AES, key: bytes, data: bytes) -> bytes: def _decrypt(signature, AES, key: bytes, data: bytes) -> bytes: - message_len = len(bytes) - 8 * 3 # 3 bytes (b) - info = MessageInformation._make(struct.unpack(f" Date: Sun, 22 Dec 2024 16:00:42 +0000 Subject: [PATCH 05/11] update doc --- doc/Makefile | 2 +- doc/source/encryption.rst | 95 +++++++++++++++++++++------------------ doc/source/index.rst | 1 + 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/doc/Makefile b/doc/Makefile index 521e4f9..facf254 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -20,4 +20,4 @@ help: @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) livehtml: - @sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) --host 0.0.0.0 diff --git a/doc/source/encryption.rst b/doc/source/encryption.rst index 8de449c..31d5e29 100644 --- a/doc/source/encryption.rst +++ b/doc/source/encryption.rst @@ -1,86 +1,95 @@ Encryption Configuration -======================== +========================= -*cshelve* supports data encryption before sending data to the provider. -This is particularly useful when user want to ensure the non modification of the data and reduce potential attacks via pickles. +The `cshelve` module supports encrypting data before it is sent to the storage provider. +This feature is particularly useful for ensuring data integrity and mitigating potential security risks associated with pickles. -**Note:** Only the value (the pickle) is encrypted, not the key. +.. note:: + Only the values (pickled data) are encrypted, not the keys. + +.. caution:: + Encryption is CPU-intensive and may impact performance. Installation ############ -The encryption functionality is not natively installed, to install it, run: +Encryption functionality is not included by default. To enable encryption, install the additional dependencies by running: .. code-block:: console - $ pip install cshelve[encryption] - + $ pip install cshelve[encryption] Configuration File ################## -The encryption settings are specified in an INI file. -Below is an example configuration file named `config.ini`: +Encryption settings are defined in an INI file. Below is an example configuration file named `config.ini`: .. code-block:: ini - [default] - provider = in-memory - persist-key = compression - exists = true + [default] + provider = in-memory + persist-key = compression + exists = true - [encryption] - algorithm = aes256 - # Development configuration putting the key in the config file. - key = "my encryption key" + [encryption] + algorithm = aes256 + # Development configuration: encryption key stored directly in the file. + key = Sixteen byte key -In this example, the `algorithm` is set to `aes256`, and the `encryption key` is set to `my encryption key`. +In this example, the encryption algorithm is set to `aes256`, and the encryption key is defined as `my encryption key`. -For security purpose its better to not put the key in the config file and use environment variable to provide it. -Here the same example using an environment variable named `ENCRYPTION_KEY`: +Using Environment Variables for Keys +#################################### -.. code-block:: ini +For improved security, it is recommended to avoid storing encryption keys directly in configuration files. Instead, use an environment variable to supply the key. +Here's an updated example using an environment variable named `ENCRYPTION_KEY`: - [default] - provider = in-memory - persist-key = compression - exists = true +.. code-block:: ini - [encryption] - algorithm = aes256 - # Here the environment variable containing the encryption key is named ENCRYPTION_KEY. - environment_key = ENCRYPTION_KEY + [default] + provider = in-memory + persist-key = compression + exists = true + [encryption] + algorithm = aes256 + # The encryption key is retrieved from the environment variable `ENCRYPTION_KEY`. + environment_key = ENCRYPTION_KEY Supported Algorithms ##################### -Currently, *cshelve* supports the following encryption algorithms: +Currently, `cshelve` supports the following encryption algorithm: -- `aes256`: A widely-used symetric encryption library. +- **`aes256`**: A widely-used symmetric encryption standard. Using Encryption ################# -Once encryption is configured as previously in the `config.ini` file, it will automatically crypt data before storing it and decrypt data when retrieving it. -The application code doesn't need to be updated: +Once encryption is configured in the `config.ini` file, data will automatically be encrypted before storage and decrypted upon retrieval. No changes are required in the application code. For example: .. code-block:: python - import cshelve + import cshelve - with cshelve.open('config.ini') as db: - # The message will be encrypted but not the key 'data'. - db['data'] = 'This is some data that will be encrypt.' + # Writing encrypted data + with cshelve.open('config.ini') as db: + db['data'] = 'This is some data that will be encrypted.' - with cshelve.open('config.ini') as db: - data = db['data'] - print(data) # Output: This is some data that will be encrypt. + # Reading encrypted data + with cshelve.open('config.ini') as db: + data = db['data'] + print(data) # Output: This is some data that will be encrypted. -In this example, the data is encrypt before being stored and decrypt when retrieved, thanks to the configuration. +In this example, the data is transparently encrypted when stored and decrypted when retrieved, as specified in the configuration. Error Handling ############## -If an unsupported compression algorithm is specified, *cshelve* will raise an `UnknownEncryptionAlgorithmError`. -Ensure that the algorithm specified in the configuration file is supported. +If an unsupported encryption algorithm is specified in the configuration file, cshelve will raise an `UnknownEncryptionAlgorithmError`. Additionally, the following errors may occur: + +- `NoEncryptionKeyError`: Raised when no encryption key is provided for encryption. + +- `DataCorruptionError`: Raised when the encrypted data is found to be corrupted during decryption. + +Ensure that the algorithm listed in the config.ini file matches one of the supported options and that the encryption key is correctly provided. diff --git a/doc/source/index.rst b/doc/source/index.rst index e58fb55..0882d9e 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -19,6 +19,7 @@ Table of contents azure-blob compression + encryption in-memory introduction logging From 15e5b6af6ca329eabc9aec23006c0881d5285b8b Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:01:24 +0000 Subject: [PATCH 06/11] update encryption doc --- cshelve/_encryption.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index dcb36f2..079be53 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -18,18 +18,18 @@ # Key that can be defined in the INI file. ALGORITHMS_NAME_KEY = "algorithm" -## User can provide the key via the ini file or env variable. +# User can provide the key via the INI file or environment variable. KEY_KEY = "key" ENVIRONMENT_KEY = "environment_key" -# Normally the 'tag' is using 16 bytes and the 'nonce' 12 bytes. -# But, by security and for the futur, we keep their length in a this dedicated data structure. -# With also keep the algorithm as a unsigned char. +# Normally the 'tag' uses 16 bytes and the 'nonce' 12 bytes. +# But, for security and future-proofing, we keep their lengths in this dedicated data structure. +# We also keep the algorithm as an unsigned char. MessageInformation = namedtuple( "MessageInformation", ["algorithm", "len_tag", "len_nonce", "message"] ) -# Hold the encrypted message. +# Holds the encrypted message. Message = namedtuple("Message", ["tag", "nonce", "encrypted_data"]) @@ -73,7 +73,7 @@ def _get_key(logger, config) -> bytes: if key := os.environ.get(env_key): return key.encode() logger.error( - f"Encryption key is configured to use use environment variable but environment variable '{ENVIRONMENT_KEY}' doesn't not exists." + f"Encryption key is configured to use the environment variable but the environment variable '{ENVIRONMENT_KEY}' does not exist." ) raise NoEncryptionKeyError( f"Environment variable '{ENVIRONMENT_KEY}' not found." @@ -85,8 +85,8 @@ def _get_key(logger, config) -> bytes: ) return key.encode() - logger.error("Encryption is specified without key.") - raise NoEncryptionKeyError("Encryption is specified without key.") + logger.error("Encryption is specified without a key.") + raise NoEncryptionKeyError("Encryption is specified without a key.") def _aes256(signature, config: Dict[str, str], key: bytes): From 0e4e9c15e84cf078b2d1a273aa889b523c94d657 Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:04:15 +0000 Subject: [PATCH 07/11] renaming --- cshelve/__init__.py | 8 ++++---- cshelve/_encryption.py | 14 ++++++++------ cshelve/exceptions.py | 4 ++-- doc/source/encryption.rst | 4 ++-- tests/units/test_encryption.py | 6 +++--- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cshelve/__init__.py b/cshelve/__init__.py index 53c60b3..b82f288 100644 --- a/cshelve/__init__.py +++ b/cshelve/__init__.py @@ -22,10 +22,10 @@ AuthTypeError, CanNotCreateDBError, ConfigurationError, - DataCorruptionError, + EncryptedDataCorruptionError, DBDoesNotExistsError, KeyNotFoundError, - NoEncryptionKeyError, + MissingEncryptionKeyError, ReadOnlyError, UnknownCompressionAlgorithmError, UnknownEncryptionAlgorithmError, @@ -38,10 +38,10 @@ "AuthTypeError", "CanNotCreateDBError", "ConfigurationError", - "DataCorruptionError", + "EncryptedDataCorruptionError", "DBDoesNotExistsError", "KeyNotFoundError", - "NoEncryptionKeyError", + "MissingEncryptionKeyError", "open", "ReadOnlyError", "ResourceNotFoundError", diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index 079be53..75cd537 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -11,8 +11,8 @@ from ._data_processing import DataProcessing from .exceptions import ( UnknownEncryptionAlgorithmError, - NoEncryptionKeyError, - DataCorruptionError, + MissingEncryptionKeyError, + EncryptedDataCorruptionError, ) @@ -75,7 +75,7 @@ def _get_key(logger, config) -> bytes: logger.error( f"Encryption key is configured to use the environment variable but the environment variable '{ENVIRONMENT_KEY}' does not exist." ) - raise NoEncryptionKeyError( + raise MissingEncryptionKeyError( f"Environment variable '{ENVIRONMENT_KEY}' not found." ) @@ -86,7 +86,7 @@ def _get_key(logger, config) -> bytes: return key.encode() logger.error("Encryption is specified without a key.") - raise NoEncryptionKeyError("Encryption is specified without a key.") + raise MissingEncryptionKeyError("Encryption is specified without a key.") def _aes256(signature, config: Dict[str, str], key: bytes): @@ -128,7 +128,9 @@ def _decrypt(signature, AES, key: bytes, data: bytes) -> bytes: info = MessageInformation._make(struct.unpack(f" bytes: try: cipher.verify(message.tag) except ValueError: - raise DataCorruptionError("The encrypted data was modified.") + raise EncryptedDataCorruptionError("The encrypted data was modified.") return plaintext diff --git a/cshelve/exceptions.py b/cshelve/exceptions.py index 0e2bd0f..cd1053a 100644 --- a/cshelve/exceptions.py +++ b/cshelve/exceptions.py @@ -30,7 +30,7 @@ class UnknownEncryptionAlgorithmError(RuntimeError): pass -class NoEncryptionKeyError(RuntimeError): +class MissingEncryptionKeyError(RuntimeError): """ Raised when there is no encryption key provided. """ @@ -38,7 +38,7 @@ class NoEncryptionKeyError(RuntimeError): pass -class DataCorruptionError(RuntimeError): +class EncryptedDataCorruptionError(RuntimeError): """ Raised when a data is not accessible due to a corruption. """ diff --git a/doc/source/encryption.rst b/doc/source/encryption.rst index 31d5e29..d200569 100644 --- a/doc/source/encryption.rst +++ b/doc/source/encryption.rst @@ -88,8 +88,8 @@ Error Handling If an unsupported encryption algorithm is specified in the configuration file, cshelve will raise an `UnknownEncryptionAlgorithmError`. Additionally, the following errors may occur: -- `NoEncryptionKeyError`: Raised when no encryption key is provided for encryption. +- `MissingEncryptionKeyError`: Raised when no encryption key is provided for encryption. -- `DataCorruptionError`: Raised when the encrypted data is found to be corrupted during decryption. +- `EncryptedDataCorruptionError`: Raised when the encrypted data is found to be corrupted during decryption. Ensure that the algorithm listed in the config.ini file matches one of the supported options and that the encryption key is correctly provided. diff --git a/tests/units/test_encryption.py b/tests/units/test_encryption.py index 86fa777..136bdd9 100644 --- a/tests/units/test_encryption.py +++ b/tests/units/test_encryption.py @@ -5,7 +5,7 @@ import pytest -from cshelve import UnknownEncryptionAlgorithmError, NoEncryptionKeyError +from cshelve import UnknownEncryptionAlgorithmError, MissingEncryptionKeyError from cshelve._encryption import configure from cshelve._data_processing import DataProcessing @@ -71,7 +71,7 @@ def test_no_key_provided(data_processing): logger = Mock() config = {"algorithm": "aes256"} - with pytest.raises(NoEncryptionKeyError): + with pytest.raises(MissingEncryptionKeyError): configure(logger, data_processing, config) @@ -94,5 +94,5 @@ def test_key_not_in_env_var(data_processing): logger = Mock() config = {"algorithm": "aes256", "environment_key": "KEY_IN_MISSING_ENV"} - with pytest.raises(NoEncryptionKeyError): + with pytest.raises(MissingEncryptionKeyError): configure(logger, data_processing, config) From d4113a6630c7337776aa471417f42a9ee1c488af Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:06:36 +0000 Subject: [PATCH 08/11] file renaming --- ...uildin_data_processing.py => test_building_data_processing.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/end-to-end/{test_buildin_data_processing.py => test_building_data_processing.py} (100%) diff --git a/tests/end-to-end/test_buildin_data_processing.py b/tests/end-to-end/test_building_data_processing.py similarity index 100% rename from tests/end-to-end/test_buildin_data_processing.py rename to tests/end-to-end/test_building_data_processing.py From 1d5d371eb1c59fafae4e5e78ebc21e4b88a2742c Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:25:58 +0000 Subject: [PATCH 09/11] refacto --- cshelve/_encryption.py | 85 ++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index 75cd537..0419663 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -26,11 +26,12 @@ # Normally the 'tag' uses 16 bytes and the 'nonce' 12 bytes. # But, for security and future-proofing, we keep their lengths in this dedicated data structure. # We also keep the algorithm as an unsigned char. -MessageInformation = namedtuple( - "MessageInformation", ["algorithm", "len_tag", "len_nonce", "message"] +EncryptedMessageInformation = namedtuple( + "EncryptedMessageInformation", + ["algorithm", "len_tag", "len_nonce", "encrypted_message"], ) # Holds the encrypted message. -Message = namedtuple("Message", ["tag", "nonce", "encrypted_data"]) +EncryptedMessage = namedtuple("EncryptedMessage", ["tag", "nonce", "encrypted_data"]) def configure( @@ -55,8 +56,8 @@ def configure( "aes256": (_aes256, 1), } - if algorithm := supported_algorithms.get(algorithm): - fct, algo_signature = algorithm + if algorithm in supported_algorithms: + fct, algo_signature = supported_algorithms[algorithm] logger.debug(f"Configuring encryption algorithm: {algorithm}") crypt_fct, decrypt_fct = fct(algo_signature, config, key) data_processing.add_pre_processing(crypt_fct) @@ -73,7 +74,7 @@ def _get_key(logger, config) -> bytes: if key := os.environ.get(env_key): return key.encode() logger.error( - f"Encryption key is configured to use the environment variable but the environment variable '{ENVIRONMENT_KEY}' does not exist." + f"Encryption key is configured to use the environment variable but the environment variable '{env_key}' does not exist." ) raise MissingEncryptionKeyError( f"Environment variable '{ENVIRONMENT_KEY}' not found." @@ -105,47 +106,75 @@ def _crypt(signature, AES, key: bytes, data: bytes) -> bytes: cipher = AES.new(key, AES.MODE_EAX) encrypted_data, tag = cipher.encrypt_and_digest(data) - message = Message(tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data) + encrypted_message = EncryptedMessage( + tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data + ) - info = MessageInformation( + encrypted_message_information = EncryptedMessageInformation( algorithm=signature, len_tag=len(tag), len_nonce=len(cipher.nonce), - message=message.tag + message.nonce + message.encrypted_data, + encrypted_message=encrypted_message.tag + + encrypted_message.nonce + + encrypted_message.encrypted_data, ) return struct.pack( - f" bytes: - message_len = len(data) - 3 # 3 bytes (b) - info = MessageInformation._make(struct.unpack(f" EncryptedMessageInformation: + message_len = len(data) - 3 # 3 bytes for the MessageInformation structure (b) + + if message_len > 1: + info = EncryptedMessageInformation._make( + struct.unpack(f" EncryptedMessage: + encrypted_data_len = len(info.encrypted_message) - info.len_tag - info.len_nonce + + if encrypted_data_len > 1: + encrypted_message = EncryptedMessage._make( + struct.unpack( + f"<{info.len_tag}s{info.len_nonce}s{encrypted_data_len}s", + info.encrypted_message, + ) ) - ) + return encrypted_message - cipher = AES.new(key, AES.MODE_EAX, nonce=message.nonce) + raise EncryptedDataCorruptionError("The encrypted data is corrupted.") + +def _decrypt_data(AES, key: bytes, message: EncryptedMessage) -> bytes: + cipher = AES.new(key, AES.MODE_EAX, nonce=message.nonce) plaintext = cipher.decrypt(message.encrypted_data) try: cipher.verify(message.tag) + return plaintext except ValueError: - raise EncryptedDataCorruptionError("The encrypted data was modified.") - - return plaintext + raise EncryptedDataCorruptionError("The encrypted data is corrupted.") From 6780602a253866daac598171036f9eaf7c3236ea Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:41:09 +0000 Subject: [PATCH 10/11] refactoring --- cshelve/_encryption.py | 68 +++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/cshelve/_encryption.py b/cshelve/_encryption.py index 0419663..a883bae 100644 --- a/cshelve/_encryption.py +++ b/cshelve/_encryption.py @@ -26,12 +26,12 @@ # Normally the 'tag' uses 16 bytes and the 'nonce' 12 bytes. # But, for security and future-proofing, we keep their lengths in this dedicated data structure. # We also keep the algorithm as an unsigned char. -EncryptedMessageInformation = namedtuple( - "EncryptedMessageInformation", - ["algorithm", "len_tag", "len_nonce", "encrypted_message"], +MessageDetails = namedtuple( + "MessageDetails", + ["algorithm", "len_tag", "len_nonce", "ciphered_message"], ) # Holds the encrypted message. -EncryptedMessage = namedtuple("EncryptedMessage", ["tag", "nonce", "encrypted_data"]) +CipheredMessage = namedtuple("CipheredMessage", ["tag", "nonce", "encrypted_data"]) def configure( @@ -106,75 +106,67 @@ def _crypt(signature, AES, key: bytes, data: bytes) -> bytes: cipher = AES.new(key, AES.MODE_EAX) encrypted_data, tag = cipher.encrypt_and_digest(data) - encrypted_message = EncryptedMessage( - tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data - ) + cipher = CipheredMessage(tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data) - encrypted_message_information = EncryptedMessageInformation( + md = MessageDetails( algorithm=signature, len_tag=len(tag), len_nonce=len(cipher.nonce), - encrypted_message=encrypted_message.tag - + encrypted_message.nonce - + encrypted_message.encrypted_data, + ciphered_message=cipher.tag + cipher.nonce + cipher.encrypted_data, ) return struct.pack( - f" bytes: - message_information = _extract_encrypted_message_information(signature, data) - message = _extract_encrypted_message(message_information) - return _decrypt_data(AES, key, message) + md = _extract_message_details(signature, data) + cm = _extract_ciphered_message(md) + return _decrypt_data(AES, key, cm) -def _extract_encrypted_message_information( - signature, data: bytes -) -> EncryptedMessageInformation: +def _extract_message_details(signature, data: bytes) -> MessageDetails: message_len = len(data) - 3 # 3 bytes for the MessageInformation structure (b) if message_len > 1: - info = EncryptedMessageInformation._make( - struct.unpack(f" EncryptedMessage: - encrypted_data_len = len(info.encrypted_message) - info.len_tag - info.len_nonce +def _extract_ciphered_message(md: MessageDetails) -> CipheredMessage: + data_len = len(md.ciphered_message) - md.len_tag - md.len_nonce - if encrypted_data_len > 1: - encrypted_message = EncryptedMessage._make( + if data_len > 1: + cm = CipheredMessage._make( struct.unpack( - f"<{info.len_tag}s{info.len_nonce}s{encrypted_data_len}s", - info.encrypted_message, + f"<{md.len_tag}s{md.len_nonce}s{data_len}s", + md.ciphered_message, ) ) - return encrypted_message + return cm raise EncryptedDataCorruptionError("The encrypted data is corrupted.") -def _decrypt_data(AES, key: bytes, message: EncryptedMessage) -> bytes: - cipher = AES.new(key, AES.MODE_EAX, nonce=message.nonce) - plaintext = cipher.decrypt(message.encrypted_data) +def _decrypt_data(AES, key: bytes, cm: CipheredMessage) -> bytes: + cipher = AES.new(key, AES.MODE_EAX, nonce=cm.nonce) + plaintext = cipher.decrypt(cm.encrypted_data) try: - cipher.verify(message.tag) + cipher.verify(cm.tag) return plaintext except ValueError: raise EncryptedDataCorruptionError("The encrypted data is corrupted.") From 5f48293f446d5e82ff89c03c8a32998f290ab9c1 Mon Sep 17 00:00:00 2001 From: florian-vuillemot Date: Sun, 22 Dec 2024 16:43:36 +0000 Subject: [PATCH 11/11] prepare release --- .github/workflows/quality.yml | 2 +- .github/workflows/release.yml | 2 +- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 3ba41d0..2917a69 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -20,7 +20,7 @@ permissions: contents: read env: - WHEEL: cshelve-0.8.0-py3-none-any.whl + WHEEL: cshelve-0.9.0-py3-none-any.whl jobs: build: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 14cc194..397901d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ permissions: contents: read env: - WHEEL: cshelve-0.8.0-py3-none-any.whl + WHEEL: cshelve-0.9.0-py3-none-any.whl jobs: build: diff --git a/CHANGELOG.md b/CHANGELOG.md index a86c895..81227d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [0.9.0] - 2024- +## [0.9.0] - 2024-12-22 ### Added - Allow data encryption diff --git a/pyproject.toml b/pyproject.toml index 847f432..0761071 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "cshelve" -version = "0.8.0" +version = "0.9.0" description = "Propulsing the shelve module to the cloud" readme = "README.md" requires-python = ">=3.9"