Merge branch 'master' into feature/spellchecker

mlcommons · Oct 16, 2024 · fa6ae76 · fa6ae76 · github-actions · Oct 16, 2024
2 parents b98d76d + 301c188
commit fa6ae76
Show file tree

Hide file tree

Showing 41 changed files with 1,658 additions and 74 deletions.
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -106,10 +106,17 @@ jobs:
         run: |
           pytest --cov=. --cov-report=xml --cov-append -k "update_version"
 
-      - name: Upload coverage
+      - name: Upload coverage to CodeCov
         if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change
         uses: codecov/codecov-action@v1
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml
-          flags: unittests
+          flags: unittests
+
+      - name: Upload coverage to Codacy
+        if: github.ref == 'refs/heads/master' # only run when on master
+        uses: codacy/[email protected]
+        with:
+          project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
+          coverage-reports: ./coverage.xml
diff --git a/GANDLF/cli/huggingface_hub_handler.py b/GANDLF/cli/huggingface_hub_handler.py
@@ -0,0 +1,142 @@
+from huggingface_hub import HfApi, snapshot_download, ModelCardData, ModelCard
+from typing import List, Union
+from GANDLF import version
+from pathlib import Path
+from GANDLF.utils import get_git_hash
+import re
+
+
+def validate_model_card(file_path: str):
+    """
+    Validate that the required fields in the model card are not null, empty, or set to 'REQUIRED_FOR_GANDLF'.
+    The fields must contain valid alphabetic or alphanumeric values.
+
+    Args:
+        file_path (str): The path to the Markdown file to validate.
+
+    Raises:
+        AssertionError: If any required field is missing, empty, null, or contains 'REQUIRED_FOR_GANDLF'.
+    """
+    # Read the Markdown file
+    path = Path(file_path)
+    with path.open("r") as file:
+        template_str = file.read()
+
+    # Define required fields and their regex patterns to capture the values
+    patterns = {
+        "Developed by": re.compile(
+            r'\*\*Developed by:\*\*\s*\{\{\s*developers\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
+            re.MULTILINE,
+        ),
+        "License": re.compile(
+            r'\*\*License:\*\*\s*\{\{\s*license\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
+            re.MULTILINE,
+        ),
+        "Primary Organization": re.compile(
+            r'\*\*Primary Organization:\*\*\s*\{\{\s*primary_organization\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
+            re.MULTILINE,
+        ),
+        "Commercial use policy": re.compile(
+            r'\*\*Commercial use policy:\*\*\s*\{\{\s*commercial_use\s*\|\s*default\("(.+?)",\s*true\)\s*\}\}',
+            re.MULTILINE,
+        ),
+    }
+
+    # Iterate through the required fields and validate
+    for field, pattern in patterns.items():
+        match = pattern.search(template_str)
+
+        # Ensure the field is present and does not contain 'REQUIRED_FOR_GANDLF'
+        assert match, f"Field '{field}' is missing or not found in the file."
+
+        extract_value = match.group(1)
+
+        # Get the field value
+        value = (
+            re.search(r"\[([^\]]+)\]", extract_value).group(1)
+            if re.search(r"\[([^\]]+)\]", extract_value)
+            else None
+        )
+
+        # Ensure the field is not set to 'REQUIRED_FOR_GANDLF' or empty
+        assert (
+            value != "REQUIRED_FOR_GANDLF"
+        ), f"The value for '{field}' is set to the default placeholder '[REQUIRED_FOR_GANDLF]'. It must be a valid value."
+        assert value, f"The value for '{field}' is empty or null."
+
+        # Ensure the value contains only alphabetic or alphanumeric characters
+        assert re.match(
+            r"^[a-zA-Z0-9]+$", value
+        ), f"The value for '{field}' must be alphabetic or alphanumeric, but got: '{value}'"
+
+    print(
+        "All required fields are valid, non-empty, properly filled, and do not contain '[REQUIRED_FOR_GANDLF]'."
+    )
+
+    # Example usage
+    return template_str
+
+
+def push_to_model_hub(
+    repo_id: str,
+    folder_path: str,
+    hf_template: str,
+    path_in_repo: Union[str, None] = None,
+    commit_message: Union[str, None] = None,
+    commit_description: Union[str, None] = None,
+    token: Union[str, None] = None,
+    repo_type: Union[str, None] = None,
+    revision: Union[str, None] = None,
+    allow_patterns: Union[List[str], str, None] = None,
+    ignore_patterns: Union[List[str], str, None] = None,
+    delete_patterns: Union[List[str], str, None] = None,
+):
+    api = HfApi(token=token)
+
+    try:
+        repo_id = api.create_repo(repo_id).repo_id
+    except Exception as e:
+        print(f"Error: {e}")
+
+    tags = ["v" + version]
+
+    git_hash = get_git_hash()
+
+    if not git_hash == "None":
+        tags += [git_hash]
+
+    readme_template = validate_model_card(hf_template)
+
+    card_data = ModelCardData(library_name="GaNDLF", tags=tags)
+    card = ModelCard.from_template(card_data, template_str=readme_template)
+
+    card.save(Path(folder_path, "README.md"))
+
+    api.upload_folder(
+        repo_id=repo_id,
+        folder_path=folder_path,
+        repo_type="model",
+        revision=revision,
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
+        delete_patterns=delete_patterns,
+    )
+    print("Model Sucessfully Uploded")
+
+
+def download_from_hub(
+    repo_id: str,
+    revision: Union[str, None] = None,
+    cache_dir: Union[str, None] = None,
+    local_dir: Union[str, None] = None,
+    force_download: bool = False,
+    token: Union[str, None] = None,
+):
+    snapshot_download(
+        repo_id=repo_id,
+        revision=revision,
+        cache_dir=cache_dir,
+        local_dir=local_dir,
+        force_download=force_download,
+        token=token,
+    )
diff --git a/GANDLF/compute/inference_loop.py b/GANDLF/compute/inference_loop.py
@@ -89,7 +89,16 @@ def inference_loop(
             assert file_to_load != None, "The 'best_file' was not found"
 
         main_dict = torch.load(file_to_load, map_location=parameters["device"])
-        model.load_state_dict(main_dict["model_state_dict"])
+        state_dict = main_dict["model_state_dict"]
+        if parameters.get("differential_privacy"):
+            # this is required for torch==1.11 and for DP inference
+            new_state_dict = {}
+            for key, val in state_dict.items():
+                new_key = key.replace("_module.", "")
+                new_state_dict[new_key] = val  # remove `module.`
+            state_dict = new_state_dict
+
+        model.load_state_dict(state_dict)
         parameters["previous_parameters"] = main_dict.get("parameters", None)
         model.eval()
     elif parameters["model"]["type"].lower() == "openvino":

diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py
@@ -31,6 +31,10 @@
 from .forward_pass import validate_network
 from .generic import create_pytorch_objects
 
+from GANDLF.privacy.opacus.model_handling import empty_collate
+from GANDLF.privacy.opacus import handle_dynamic_batch_size, prep_for_opacus_training
+from opacus.utils.batch_memory_manager import wrap_data_loader
+
 # hides torchio citation request, see https://github.com/fepegar/torchio/issues/235
 os.environ["TORCHIO_HIDE_CITATION_PROMPT"] = "1"
 
@@ -91,6 +95,14 @@ def train_network(
     for batch_idx, (subject) in enumerate(
         tqdm(train_dataloader, desc="Looping over training data")
     ):
+        if params.get("differential_privacy"):
+            subject, params["batch_size"] = handle_dynamic_batch_size(
+                subject=subject, params=params
+            )
+            assert not isinstance(
+                model, torch.nn.DataParallel
+            ), "Differential privacy is not supported with DataParallel or DistributedDataParallel. Please use a single GPU or DDP with Opacus."
+
         optimizer.zero_grad()
         image = (  # 5D tensor: (B, C, H, W, D)
             torch.cat(
@@ -212,6 +224,23 @@ def train_network(
     return average_epoch_train_loss, average_epoch_train_metric
 
 
+def train_network_wrapper(model, train_dataloader, optimizer, params):
+    """
+    Wrapper Function to handle train_dataloader for benign and DP cases and pass on to train a network for a single epoch
+    """
+
+    if params.get("differential_privacy"):
+        with train_dataloader as memory_safe_data_loader:
+            epoch_train_loss, epoch_train_metric = train_network(
+                model, memory_safe_data_loader, optimizer, params
+            )
+    else:
+        epoch_train_loss, epoch_train_metric = train_network(
+            model, train_dataloader, optimizer, params
+        )
+    return epoch_train_loss, epoch_train_metric
+
+
 def training_loop(
     training_data: pd.DataFrame,
     validation_data: pd.DataFrame,
@@ -368,6 +397,7 @@ def training_loop(
         logger_csv_filename=os.path.join(output_dir, "logs_validation.csv"),
         metrics=metrics_log,
         mode="valid",
+        add_epsilon=bool(params.get("differential_privacy")),
     )
     if testingDataDefined:
         test_logger = Logger(
@@ -392,6 +422,36 @@ def training_loop(
 
     print("Using device:", device, flush=True)
 
+    if params.get("differential_privacy"):
+        print(
+            "Using Opacus to make training differentially private with respect to the training data."
+        )
+
+        model, optimizer, train_dataloader, privacy_engine = prep_for_opacus_training(
+            model=model,
+            optimizer=optimizer,
+            train_dataloader=train_dataloader,
+            params=params,
+        )
+
+        train_dataloader.collate_fn = empty_collate(train_dataloader.dataset[0])
+
+        # train_dataloader = BatchMemoryManager(
+        #     data_loader=train_dataloader,
+        #     max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
+        #     optimizer=optimizer,
+        # )
+        batch_size = params["batch_size"]
+        max_physical_batch_size = params["differential_privacy"].get(
+            "physical_batch_size"
+        )
+        if max_physical_batch_size and max_physical_batch_size != batch_size:
+            train_dataloader = wrap_data_loader(
+                data_loader=train_dataloader,
+                max_batch_size=max_physical_batch_size,
+                optimizer=optimizer,
+            )
+
     # Iterate for number of epochs
     for epoch in range(start_epoch, epochs):
         if params["track_memory_usage"]:
@@ -453,6 +513,14 @@ def training_loop(
 
         patience += 1
 
+        # if training with differential privacy, print privacy epsilon
+        if params.get("differential_privacy"):
+            delta = params["differential_privacy"]["delta"]
+            this_epsilon = privacy_engine.get_epsilon(delta)
+            print(f"     Epoch Final   Privacy: (ε = {this_epsilon:.2f}, δ = {delta})")
+            # save for logging
+            epoch_valid_metric["epsilon"] = this_epsilon
+
         # Write the losses to a logger
         train_logger.write(epoch, epoch_train_loss, epoch_train_metric)
         valid_logger.write(epoch, epoch_valid_loss, epoch_valid_metric)

diff --git a/GANDLF/config_manager.py b/GANDLF/config_manager.py
@@ -7,6 +7,7 @@
 
 from .utils import version_check
 from GANDLF.data.post_process import postprocessing_after_reverse_one_hot_encoding
+from GANDLF.privacy.opacus import parse_opacus_params
 
 from GANDLF.metrics import surface_distance_ids
 from importlib.metadata import version
@@ -710,6 +711,10 @@ def _parseConfig(
         temp_dict["type"] = params["optimizer"]
         params["optimizer"] = temp_dict
 
+    # initialize defaults for DP
+    if params.get("differential_privacy"):
+        params = parse_opacus_params(params, initialize_key)
+
     # initialize defaults for inference mechanism
     inference_mechanism = {"grid_aggregator_overlap": "crop", "patch_overlap": 0}
     initialize_inference_mechanism = False
❌ Errors	Count
ℹ️ binary-file	7
ℹ️ candidate-pattern	63
❌ check-file-path	310
❌ forbidden-pattern	13