Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encryption #80

Merged
merged 11 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash
USER vscode

# Copy the project files then build the virtual environment
COPY --chown=vscode:vscode pyproject.toml uv.lock /build/package
COPY --chown=vscode:vscode . /build/package
RUN cd /build/package && uv venv --python 3.9 && uv sync --dev

# Copy the project files to the workspace
Expand Down
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
AZURE_STORAGE_ACCESS_KEY=""
AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;"
ENCRYPTION_KEY="Sixteen byte key"
3 changes: 2 additions & 1 deletion .github/workflows/quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ permissions:
contents: read

env:
WHEEL: cshelve-0.8.0-py3-none-any.whl
WHEEL: cshelve-0.9.0-py3-none-any.whl

jobs:
build:
Expand Down Expand Up @@ -167,3 +167,4 @@ jobs:
env:
# Local connection string for Azurite.
AZURE_STORAGE_CONNECTION_STRING: "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
ENCRYPTION_KEY: "Sixteen byte key"
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ permissions:
contents: read

env:
WHEEL: cshelve-0.8.0-py3-none-any.whl
WHEEL: cshelve-0.9.0-py3-none-any.whl

jobs:
build:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
run: uv run --python ${{ matrix.python-version }} pytest -m "azure" tests/end-to-end
env:
AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
ENCRYPTION_KEY: "Sixteen byte key"
AZURE_STORAGE_ACCESS_KEY: ${{ secrets.AZURE_STORAGE_ACCESS_KEY }}

- name: Run end-to-end tests supporting Azurite
Expand All @@ -67,3 +68,4 @@ jobs:
uv run --python ${{ matrix.python-version }} pytest -m "not azure" tests/end-to-end
env:
AZURE_STORAGE_CONNECTION_STRING: "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
ENCRYPTION_KEY: "Sixteen byte key"
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [0.9.0] - 2024-12-22
### Added
- Allow data encryption

## [0.8.0] - 2024-12-17
### Added
- Allow data compression
Expand Down
8 changes: 7 additions & 1 deletion cshelve/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ._data_processing import DataProcessing
from ._database import _Database
from ._compression import configure as _configure_compression
from ._encryption import configure as _configure_encryption
from ._factory import factory as _factory
from ._parser import load as _config_loader
from ._parser import use_local_shelf
Expand All @@ -21,8 +22,10 @@
AuthTypeError,
CanNotCreateDBError,
ConfigurationError,
EncryptedDataCorruptionError,
DBDoesNotExistsError,
KeyNotFoundError,
MissingEncryptionKeyError,
ReadOnlyError,
UnknownCompressionAlgorithmError,
UnknownEncryptionAlgorithmError,
Expand All @@ -35,13 +38,15 @@
"AuthTypeError",
"CanNotCreateDBError",
"ConfigurationError",
"DataProcessing",
"EncryptedDataCorruptionError",
"DBDoesNotExistsError",
"KeyNotFoundError",
"MissingEncryptionKeyError",
"open",
"ReadOnlyError",
"ResourceNotFoundError",
"UnknownCompressionAlgorithmError",
"UnknownEncryptionAlgorithmError",
"UnknownProviderError",
]

Expand Down Expand Up @@ -76,6 +81,7 @@ def __init__(
# Data processing object used to apply pre and post processing to the data.
data_processing = DataProcessing()
_configure_compression(logger, data_processing, config.compression)
_configure_encryption(logger, data_processing, config.encryption)

# The CloudDatabase object is the class that interacts with the cloud storage backend.
# This class doesn't perform or respect the shelve.Shelf logic and interface so we need to wrap it.
Expand Down
135 changes: 125 additions & 10 deletions cshelve/_encryption.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
"""
Encryption module for cshelve.
"""
import os
import struct
from collections import namedtuple
from functools import partial
from logging import Logger
from typing import Dict

from ._data_processing import DataProcessing
from .exceptions import UnknownEncryptionAlgorithmError
from .exceptions import (
UnknownEncryptionAlgorithmError,
MissingEncryptionKeyError,
EncryptedDataCorruptionError,
)


# Key that can be defined in the INI file.
ALGORITHMS_NAME_KEY = "algorithm"
COMPRESSION_LEVEL_KEY = "level"
# User can provide the key via the INI file or environment variable.
KEY_KEY = "key"
ENVIRONMENT_KEY = "environment_key"


# Normally the 'tag' uses 16 bytes and the 'nonce' 12 bytes.
# But, for security and future-proofing, we keep their lengths in this dedicated data structure.
# We also keep the algorithm as an unsigned char.
MessageDetails = namedtuple(
"MessageDetails",
["algorithm", "len_tag", "len_nonce", "ciphered_message"],
)
# Holds the encrypted message.
CipheredMessage = namedtuple("CipheredMessage", ["tag", "nonce", "encrypted_data"])


def configure(
Expand All @@ -24,18 +45,21 @@ def configure(
return

if ALGORITHMS_NAME_KEY not in config:
logger.info("No compression algorithm specified.")
logger.info("No encryption algorithm specified.")
return

algorithm = config[ALGORITHMS_NAME_KEY]

key = _get_key(logger, config)

supported_algorithms = {
"aes256": _aes256,
"aes256": (_aes256, 1),
}

if encryption := supported_algorithms.get(algorithm):
if algorithm in supported_algorithms:
fct, algo_signature = supported_algorithms[algorithm]
logger.debug(f"Configuring encryption algorithm: {algorithm}")
crypt_fct, decrypt_fct = encryption(config)
crypt_fct, decrypt_fct = fct(algo_signature, config, key)
data_processing.add_pre_processing(crypt_fct)
data_processing.add_post_processing(decrypt_fct)
logger.debug(f"Encryption algorithm {algorithm} configured.")
Expand All @@ -45,13 +69,104 @@ def configure(
)


def _aes256(config: Dict[str, str]):
def _get_key(logger, config) -> bytes:
if env_key := config.get(ENVIRONMENT_KEY):
if key := os.environ.get(env_key):
return key.encode()
logger.error(
f"Encryption key is configured to use the environment variable but the environment variable '{env_key}' does not exist."
)
raise MissingEncryptionKeyError(
f"Environment variable '{ENVIRONMENT_KEY}' not found."
)

if key := config.get(KEY_KEY):
logger.info(
"Encryption is based on a key defined in the config file and not an environment variable."
)
return key.encode()

logger.error("Encryption is specified without a key.")
raise MissingEncryptionKeyError("Encryption is specified without a key.")


def _aes256(signature, config: Dict[str, str], key: bytes):
"""
Configure aes256 encryption.
"""
import zlib
from Crypto.Cipher import AES

crypt = lambda x: x
decrypt = lambda x: x
crypt = partial(_crypt, signature, AES, key)
decrypt = partial(_decrypt, signature, AES, key)

return crypt, decrypt


def _crypt(signature, AES, key: bytes, data: bytes) -> bytes:
cipher = AES.new(key, AES.MODE_EAX)
encrypted_data, tag = cipher.encrypt_and_digest(data)

cipher = CipheredMessage(tag=tag, nonce=cipher.nonce, encrypted_data=encrypted_data)

md = MessageDetails(
algorithm=signature,
len_tag=len(tag),
len_nonce=len(cipher.nonce),
ciphered_message=cipher.tag + cipher.nonce + cipher.encrypted_data,
)

return struct.pack(
f"<bbb{len(md.ciphered_message)}s",
md.algorithm,
md.len_tag,
md.len_nonce,
md.ciphered_message,
)


def _decrypt(signature, AES, key: bytes, data: bytes) -> bytes:
md = _extract_message_details(signature, data)
cm = _extract_ciphered_message(md)
return _decrypt_data(AES, key, cm)


def _extract_message_details(signature, data: bytes) -> MessageDetails:
message_len = len(data) - 3 # 3 bytes for the MessageInformation structure (b)

if message_len > 1:
md = MessageDetails._make(struct.unpack(f"<bbb{message_len}s", data))

if md.algorithm != signature:
raise EncryptedDataCorruptionError(
"Algorithm used for the encryption is not the expected one."
)

return md

raise EncryptedDataCorruptionError("The encrypted data is corrupted.")


def _extract_ciphered_message(md: MessageDetails) -> CipheredMessage:
data_len = len(md.ciphered_message) - md.len_tag - md.len_nonce

if data_len > 1:
cm = CipheredMessage._make(
struct.unpack(
f"<{md.len_tag}s{md.len_nonce}s{data_len}s",
md.ciphered_message,
)
)
return cm

raise EncryptedDataCorruptionError("The encrypted data is corrupted.")


def _decrypt_data(AES, key: bytes, cm: CipheredMessage) -> bytes:
cipher = AES.new(key, AES.MODE_EAX, nonce=cm.nonce)
plaintext = cipher.decrypt(cm.encrypted_data)

try:
cipher.verify(cm.tag)
return plaintext
except ValueError:
raise EncryptedDataCorruptionError("The encrypted data is corrupted.")
10 changes: 9 additions & 1 deletion cshelve/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@
LOGGING_KEY_STORE = "logging"
# Compression configuration section.
COMPRESSION_KEY_STORE = "compression"
# Encryption configuration section.
ENCRYPTION_KEY_STORE = "encryption"


# Tuple containing the provider name and its configuration.
Config = namedtuple("Config", ["provider", "default", "logging", "compression"])
Config = namedtuple(
"Config", ["provider", "default", "logging", "compression", "encryption"]
)


def use_local_shelf(filename: Path) -> bool:
Expand All @@ -47,11 +51,15 @@ def load(logger: Logger, filename: Path) -> Tuple[str, Dict[str, str]]:
compression_config = (
config[COMPRESSION_KEY_STORE] if COMPRESSION_KEY_STORE in config else {}
)
encryption_config = (
config[ENCRYPTION_KEY_STORE] if ENCRYPTION_KEY_STORE in config else {}
)

logger.debug(f"Configuration file '{filename}' loaded.")
return Config(
provider=c[PROVIDER_KEY],
default=c,
logging=logging_config,
compression=compression_config,
encryption=encryption_config,
)
16 changes: 16 additions & 0 deletions cshelve/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ class UnknownEncryptionAlgorithmError(RuntimeError):
pass


class MissingEncryptionKeyError(RuntimeError):
"""
Raised when there is no encryption key provided.
"""

pass


class EncryptedDataCorruptionError(RuntimeError):
"""
Raised when a data is not accessible due to a corruption.
"""

pass


class KeyNotFoundError(KeyError):
"""
Raised when a resource is not found.
Expand Down
2 changes: 1 addition & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ help:
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

livehtml:
@sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) --host 0.0.0.0
Loading
Loading