Log study info in anonymisation (#587)

* Log study info in anonymisation To enable easier tracking * Make dicom helpers public
SAFEHR-data · Jan 2, 2025 · 208e74c · 208e74c
1 parent 1d62bf7
commit 208e74c
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 50 deletions.
diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py
@@ -42,6 +42,7 @@
 from pydicom import dcmread
 
 import orthanc
+from pixl_dcmd.dicom_helpers import get_study_info
 from pixl_dcmd.main import (
     anonymise_dicom_and_update_db,
     parse_validation_results,
@@ -53,6 +54,8 @@
 
     from core.project_config.pixl_config_model import PixlConfig
 
+    from pixl_dcmd.dicom_helpers import StudyInfo
+
 ORTHANC_USERNAME = config("ORTHANC_USERNAME")
 ORTHANC_PASSWORD = config("ORTHANC_PASSWORD")
 ORTHANC_URL = "http://localhost:8042"
@@ -234,7 +237,6 @@ def _import_studies_from_raw(
 
     Args:
         study_resource_ids: Resource IDs of the study in Orthanc Raw
-        study_uids: Corresponding StudyInstanceUIDs
         project_name: Name of the project
 
     - Pull studies from Orthanc Raw based on its resource ID
@@ -246,7 +248,8 @@ def _import_studies_from_raw(
     anonymised_study_uids = []
 
     for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False):
-        anonymised_uid = _anonymise_study_and_upload(study_resource_id, study_uid, project_name)
+        logger.debug("Processing project '{}', study '{}' ", project_name, study_uid)
+        anonymised_uid = _anonymise_study_and_upload(study_resource_id, project_name)
         if anonymised_uid:
             anonymised_study_uids.append(anonymised_uid)
 
@@ -270,27 +273,26 @@ def _import_studies_from_raw(
         send_study(study_id=resource_id, project_name=project_name)
 
 
-def _anonymise_study_and_upload(
-    study_resource_id: str, study_uid: str, project_name: str
-) -> str | None:
+def _anonymise_study_and_upload(study_resource_id: str, project_name: str) -> str | None:
     zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id)
 
+    study_info = _get_study_info_from_first_file(zipped_study_bytes)
+    logger.info("Processing project '{}', {}", project_name, study_info)
+
     with ZipFile(zipped_study_bytes) as zipped_study:
         try:
             anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances(
                 zipped_study=zipped_study,
-                study_uid=study_uid,
+                study_info=study_info,
                 project_name=project_name,
             )
         except PixlDiscardError as discard:
             logger.warning(
-                "Failed to anonymize project: '{}', study: {}: {}", project_name, study_uid, discard
+                "Failed to anonymize project: '{}', {}: {}", project_name, study_info, discard
             )
             return None
         except Exception:  # noqa: BLE001
-            logger.exception(
-                "Failed to anonymize project: '{}', study: {}", project_name, study_uid
-            )
+            logger.exception("Failed to anonymize project: '{}', {}", project_name, study_info)
             return None
 
     _upload_instances(anonymised_instances_bytes)
@@ -310,36 +312,16 @@ def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO:
     return BytesIO(response.content)
 
 
-def _get_study_resource_id(study_uid: str) -> str:
-    """
-    Get the resource ID for an existing study based on its StudyInstanceUID.
-
-    Returns None if there are no resources with the given StudyInstanceUID.
-    Returns the resource ID if there is a single resource with the given StudyInstanceUID.
-    Returns None if there are multiple resources with the given StudyInstanceUID and deletes
-    the studies.
-    """
-    data = json.dumps(
-        {
-            "Level": "Study",
-            "Query": {
-                "StudyInstanceUID": study_uid,
-            },
-        }
-    )
-    study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
-    if not study_resource_ids:
-        message = f"No study found with StudyInstanceUID {study_uid}"
-        raise ValueError(message)
-    if len(study_resource_ids) > 1:
-        message = f"Multiple studies found with StudyInstanceUID {study_uid}"
-        raise ValueError(message)
-
-    return study_resource_ids[0]
+def _get_study_info_from_first_file(zipped_study_bytes) -> StudyInfo:
+    with ZipFile(zipped_study_bytes) as zipped_study:
+        file_info = zipped_study.infolist()[0]
+        with zipped_study.open(file_info) as file:
+            dataset = dcmread(file)
+            return get_study_info(dataset)
 
 
 def _anonymise_study_instances(
-    zipped_study: ZipFile, study_uid: str, project_name: str
+    zipped_study: ZipFile, study_info: StudyInfo, project_name: str
 ) -> tuple[list[bytes], str]:
     """
     Iterate over all instances and anonymise them.
@@ -350,7 +332,6 @@ def _anonymise_study_instances(
     """
     config = load_project_config(project_name)
     anonymised_instances_bytes = []
-    logger.info("Processing project '{}', study: {}", project_name, study_uid)
     skipped_instance_counts = defaultdict(int)
     dicom_validation_errors = {}
 
@@ -364,9 +345,9 @@ def _anonymise_study_instances(
                 )
             except PixlSkipInstanceError as e:
                 logger.debug(
-                    "Skipping instance {} for study {}: {}",
+                    "Skipping instance {} for {}: {}",
                     dataset[0x0008, 0x0018].value,
-                    study_uid,
+                    study_info,
                     e,
                 )
                 skipped_instance_counts[str(e)] += 1
@@ -380,9 +361,9 @@ def _anonymise_study_instances(
         raise PixlDiscardError(message)
 
     logger.debug(
-        "Project '{}' Study {}, skipped instances: {}",
+        "Project '{}' {}, skipped instances: {}",
         project_name,
-        study_uid,
+        study_info,
         dict(skipped_instance_counts),
     )
 
@@ -391,7 +372,7 @@ def _anonymise_study_instances(
             "The anonymisation introduced the following validation errors:\n{}",
             parse_validation_results(dicom_validation_errors),
         )
-    logger.success("Finished anonymising project: '{}', study: {}", project_name, study_uid)
+    logger.success("Finished anonymising project: '{}', {}", project_name, study_info)
     return anonymised_instances_bytes, anonymised_study_uid
 
 
@@ -419,6 +400,34 @@ def _upload_instances(instances_bytes: list[bytes]) -> None:
     upload_response.raise_for_status()
 
 
+def _get_study_resource_id(study_uid: str) -> str:
+    """
+    Get the resource ID for an existing study based on its StudyInstanceUID.
+
+    Returns None if there are no resources with the given StudyInstanceUID.
+    Returns the resource ID if there is a single resource with the given StudyInstanceUID.
+    Returns None if there are multiple resources with the given StudyInstanceUID and deletes
+    the studies.
+    """
+    data = json.dumps(
+        {
+            "Level": "Study",
+            "Query": {
+                "StudyInstanceUID": study_uid,
+            },
+        }
+    )
+    study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
+    if not study_resource_ids:
+        message = f"No study found with StudyInstanceUID {study_uid}"
+        raise ValueError(message)
+    if len(study_resource_ids) > 1:
+        message = f"Multiple studies found with StudyInstanceUID {study_uid}"
+        raise ValueError(message)
+
+    return study_resource_ids[0]
+
+
 def send_study(study_id: str, project_name: str) -> None:
     """
     Send the resource to the appropriate destination.

diff --git a/pixl_dcmd/src/pixl_dcmd/_database.py b/pixl_dcmd/src/pixl_dcmd/_database.py
@@ -24,7 +24,7 @@
 from sqlalchemy import URL, create_engine, exists
 from sqlalchemy.orm import sessionmaker, exc
 
-from pixl_dcmd._dicom_helpers import StudyInfo
+from pixl_dcmd.dicom_helpers import StudyInfo
 
 url = URL.create(
     drivername="postgresql+psycopg2",

diff --git a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py → pixl_dcmd/src/pixl_dcmd/dicom_helpers.py b/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py → pixl_dcmd/src/pixl_dcmd/dicom_helpers.py
diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py
@@ -33,14 +33,14 @@
     get_uniq_pseudo_study_uid_and_update_db,
     get_pseudo_patient_id_and_update_db,
 )
-from pixl_dcmd._dicom_helpers import (
+from pixl_dcmd.dicom_helpers import (
     DicomValidator,
     get_study_info,
 )
 from pixl_dcmd._tag_schemes import _scheme_list_to_dict, merge_tag_schemes
 
 if typing.TYPE_CHECKING:
-    from pixl_dcmd._dicom_helpers import StudyInfo
+    from pixl_dcmd.dicom_helpers import StudyInfo
 
 
 def write_dataset_to_bytes(dataset: Dataset) -> bytes:

diff --git a/pixl_dcmd/tests/conftest.py b/pixl_dcmd/tests/conftest.py
@@ -23,7 +23,7 @@
 from collections.abc import Generator
 from typing import Optional
 
-from pixl_dcmd._dicom_helpers import get_study_info
+from pixl_dcmd.dicom_helpers import get_study_info
 from core.project_config import load_project_config
 import pytest
 import pytest_pixl.dicom

diff --git a/pixl_dcmd/tests/test_database.py b/pixl_dcmd/tests/test_database.py
@@ -22,7 +22,7 @@
     get_uniq_pseudo_study_uid_and_update_db,
     get_pseudo_patient_id_and_update_db,
 )
-from pixl_dcmd._dicom_helpers import StudyInfo
+from pixl_dcmd.dicom_helpers import StudyInfo
 from sqlalchemy.orm import Session
 
 STUDY_DATE = datetime.date.fromisoformat("2023-01-01")

diff --git a/pixl_dcmd/tests/test_dicom_validator.py b/pixl_dcmd/tests/test_dicom_validator.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import pytest
-from pixl_dcmd._dicom_helpers import DicomValidator
+from pixl_dcmd.dicom_helpers import DicomValidator
 from pixl_dcmd.main import anonymise_dicom
 from pydicom import Dataset
 

diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py
@@ -35,7 +35,7 @@
 from core.project_config.pixl_config_model import load_config_and_validate
 from decouple import config
 
-from pixl_dcmd._dicom_helpers import get_study_info
+from pixl_dcmd.dicom_helpers import get_study_info
 from pixl_dcmd.main import (
     anonymise_dicom_and_update_db,
     _anonymise_dicom_from_scheme,