Skip to content

Commit

Permalink
chore: improve save/load methods for encrypted data-frames
Browse files Browse the repository at this point in the history
  • Loading branch information
RomanBredehoft committed Apr 3, 2024
1 parent 4dcd616 commit c3982ca
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 25 deletions.
10 changes: 5 additions & 5 deletions deps_licenses/licenses_mac_silicon_user.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ Name, Version, License
GitPython, 3.1.41, BSD License
PyYAML, 6.0.1, MIT License
anyio, 3.7.1, MIT License
boto3, 1.34.72, Apache Software License
botocore, 1.34.72, Apache Software License
boto3, 1.34.75, Apache Software License
botocore, 1.34.75, Apache Software License
brevitas, 0.8.0, UNKNOWN
certifi, 2023.7.22, Mozilla Public License 2.0 (MPL 2.0)
charset-normalizer, 3.3.2, MIT License
click, 8.1.7, BSD License
coloredlogs, 15.0.1, MIT License
concrete-python, 2024.3.27, BSD-3-Clause
concrete-python, 2.6.0rc1, BSD-3-Clause
dependencies, 2.0.1, BSD License
dill, 0.3.8, BSD License
exceptiongroup, 1.2.0, MIT License
Expand All @@ -19,7 +19,7 @@ flatbuffers, 24.3.25, Apache Software License
fsspec, 2024.3.1, BSD License
gitdb, 4.0.11, BSD License
h11, 0.14.0, MIT License
huggingface-hub, 0.22.1, Apache Software License
huggingface-hub, 0.22.2, Apache Software License
humanfriendly, 10.0, MIT License
hummingbird-ml, 0.4.8, MIT License
idna, 3.6, BSD License
Expand Down Expand Up @@ -67,7 +67,7 @@ tokenizers, 0.15.2, Apache Software License
tomli, 2.0.1, MIT License
torch, 1.13.1, BSD License
tqdm, 4.66.2, MIT License; Mozilla Public License 2.0 (MPL 2.0)
transformers, 4.39.1, Apache Software License
transformers, 4.39.3, Apache Software License
typing_extensions, 4.5.0, Python Software Foundation License
tzdata, 2024.1, Apache Software License
urllib3, 2.2.1, MIT License
Expand Down
2 changes: 1 addition & 1 deletion deps_licenses/licenses_mac_silicon_user.txt.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8de2e8c13fe9a1fe80d9cce43dee7493
74a229e0dccc68a1f77c7ca59dbf7614
12 changes: 6 additions & 6 deletions src/concrete/ml/pandas/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,28 +133,28 @@ def deserialize_elementwise(array: numpy.ndarray) -> numpy.ndarray:
return numpy.vectorize(deserialize_value)(array)


def serialize_evaluation_keys(evaluation_keys: fhe.EvaluationKeys) -> str:
def serialize_evaluation_keys(evaluation_keys: fhe.EvaluationKeys) -> bytes:
"""Serialize the evaluation keys into a string of hexadecimal numbers.
Args:
evaluation_keys (fhe.EvaluationKeys): The evaluation keys to serialize.
Returns:
str: The serialized evaluation keys as a string of hexadecimal numbers.
bytes: The serialized evaluation keys.
"""
return serialize_value(evaluation_keys)
return evaluation_keys.serialize()


def deserialize_evaluation_keys(serialized_evaluation_keys: str) -> fhe.EvaluationKeys:
def deserialize_evaluation_keys(serialized_evaluation_keys: bytes) -> fhe.EvaluationKeys:
"""Deserialize the evaluation keys.
Args:
serialized_evaluation_keys (str): The evaluation keys to deserialize.
serialized_evaluation_keys (bytes): The evaluation keys to deserialize.
Returns:
fhe.EvaluationKeys: The deserialized evaluation keys.
"""
return fhe.EvaluationKeys.deserialize(bytes.fromhex(serialized_evaluation_keys))
return fhe.EvaluationKeys.deserialize(serialized_evaluation_keys)


def slice_hex_str(hex_str: str, n: int = 10) -> str:
Expand Down
43 changes: 30 additions & 13 deletions src/concrete/ml/pandas/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Define the encrypted data-frame framework."""
import json
import zipfile
from pathlib import Path
from typing import Dict, Hashable, List, Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -254,11 +255,12 @@ def merge(

return joined_df

def _to_dict(self) -> Dict:
"""Serialize the encrypted data-frame as a dictionary.
def _to_dict_and_eval_keys(self) -> Tuple[Dict, fhe.EvaluationKeys]:
"""Serialize the encrypted data-frame as a dictionary and evaluations keys.
Returns:
Dict: The serialized data-frame.
fhe.EvaluationKeys: The serialized evaluations keys.
"""
# Serialize encrypted values element-wise
encrypted_values = serialize_elementwise(self._encrypted_values)
Expand All @@ -273,20 +275,20 @@ def _to_dict(self) -> Dict:
output_dict = {
"encrypted_values": encrypted_values.tolist(),
"encrypted_nan": encrypted_nan,
"evaluation_keys": evaluation_keys,
"column_names": self._column_names,
"dtype_mappings": self._dtype_mappings,
"api_version": self._api_version,
}

return output_dict
return output_dict, evaluation_keys

@classmethod
def _from_dict(cls, dict_to_load: Dict):
"""Load a serialized encrypted data-frame from a dictionary.
def _from_dict_and_eval_keys(cls, dict_to_load: Dict, evaluation_keys: fhe.EvaluationKeys):
"""Load a serialized encrypted data-frame from a dictionary and evaluations keys.
Args:
dict_to_load (Dict): The serialized encrypted data-frame.
evaluation_keys (fhe.EvaluationKeys): The serialized evaluations keys.
Returns:
EncryptedDataFrame: The loaded encrypted data-frame.
Expand All @@ -295,7 +297,7 @@ def _from_dict(cls, dict_to_load: Dict):
encrypted_values = deserialize_elementwise(dict_to_load["encrypted_values"])
encrypted_nan = deserialize_value(dict_to_load["encrypted_nan"])

evaluation_keys = deserialize_evaluation_keys(dict_to_load["evaluation_keys"])
evaluation_keys = deserialize_evaluation_keys(evaluation_keys)

column_names = dict_to_load["column_names"]
dtype_mappings = dict_to_load["dtype_mappings"]
Expand All @@ -318,9 +320,16 @@ def save(self, path: Union[Path, str]):
"""
path = Path(path)

encrypted_df_dict = self._to_dict()
with path.open("w", encoding="utf-8") as file:
json.dump(encrypted_df_dict, file)
if path.suffix != ".zip":
path = path.with_suffix(".zip")

encrypted_df_dict, evaluation_keys = self._to_dict_and_eval_keys()

encrypted_df_json_bytes = json.dumps(encrypted_df_dict).encode(encoding="utf-8")

with zipfile.ZipFile(path, "w") as zip_file:
zip_file.writestr("encrypted_dataframe.json", encrypted_df_json_bytes)
zip_file.writestr("evaluation_keys", evaluation_keys)

@classmethod
def load(cls, path: Union[Path, str]):
Expand All @@ -334,7 +343,15 @@ def load(cls, path: Union[Path, str]):
"""
path = Path(path)

with path.open("r", encoding="utf-8") as file:
encrypted_df_dict = json.load(file)
if path.suffix != ".zip":
path = path.with_suffix(".zip")

with zipfile.ZipFile(path, "r") as zip_file:
with zip_file.open("encrypted_dataframe.json") as encrypted_df_json_file:
encrypted_df_json_bytes = encrypted_df_json_file.read()
encrypted_df_dict = json.loads(encrypted_df_json_bytes)

with zip_file.open("evaluation_keys") as evaluation_keys_file:
evaluation_keys = evaluation_keys_file.read()

return cls._from_dict(encrypted_df_dict)
return cls._from_dict_and_eval_keys(encrypted_df_dict, evaluation_keys)

0 comments on commit c3982ca

Please sign in to comment.