Skip to content

Commit

Permalink
Log study info in anonymisation (#587)
Browse files Browse the repository at this point in the history
* Log study info in anonymisation

To enable easier tracking

* Make dicom helpers public
  • Loading branch information
stefpiatek authored Jan 2, 2025
1 parent 1d62bf7 commit 208e74c
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 50 deletions.
95 changes: 52 additions & 43 deletions orthanc/orthanc-anon/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from pydicom import dcmread

import orthanc
from pixl_dcmd.dicom_helpers import get_study_info
from pixl_dcmd.main import (
anonymise_dicom_and_update_db,
parse_validation_results,
Expand All @@ -53,6 +54,8 @@

from core.project_config.pixl_config_model import PixlConfig

from pixl_dcmd.dicom_helpers import StudyInfo

ORTHANC_USERNAME = config("ORTHANC_USERNAME")
ORTHANC_PASSWORD = config("ORTHANC_PASSWORD")
ORTHANC_URL = "http://localhost:8042"
Expand Down Expand Up @@ -234,7 +237,6 @@ def _import_studies_from_raw(
Args:
study_resource_ids: Resource IDs of the study in Orthanc Raw
study_uids: Corresponding StudyInstanceUIDs
project_name: Name of the project
- Pull studies from Orthanc Raw based on its resource ID
Expand All @@ -246,7 +248,8 @@ def _import_studies_from_raw(
anonymised_study_uids = []

for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False):
anonymised_uid = _anonymise_study_and_upload(study_resource_id, study_uid, project_name)
logger.debug("Processing project '{}', study '{}' ", project_name, study_uid)
anonymised_uid = _anonymise_study_and_upload(study_resource_id, project_name)
if anonymised_uid:
anonymised_study_uids.append(anonymised_uid)

Expand All @@ -270,27 +273,26 @@ def _import_studies_from_raw(
send_study(study_id=resource_id, project_name=project_name)


def _anonymise_study_and_upload(
study_resource_id: str, study_uid: str, project_name: str
) -> str | None:
def _anonymise_study_and_upload(study_resource_id: str, project_name: str) -> str | None:
zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id)

study_info = _get_study_info_from_first_file(zipped_study_bytes)
logger.info("Processing project '{}', {}", project_name, study_info)

with ZipFile(zipped_study_bytes) as zipped_study:
try:
anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances(
zipped_study=zipped_study,
study_uid=study_uid,
study_info=study_info,
project_name=project_name,
)
except PixlDiscardError as discard:
logger.warning(
"Failed to anonymize project: '{}', study: {}: {}", project_name, study_uid, discard
"Failed to anonymize project: '{}', {}: {}", project_name, study_info, discard
)
return None
except Exception: # noqa: BLE001
logger.exception(
"Failed to anonymize project: '{}', study: {}", project_name, study_uid
)
logger.exception("Failed to anonymize project: '{}', {}", project_name, study_info)
return None

_upload_instances(anonymised_instances_bytes)
Expand All @@ -310,36 +312,16 @@ def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO:
return BytesIO(response.content)


def _get_study_resource_id(study_uid: str) -> str:
"""
Get the resource ID for an existing study based on its StudyInstanceUID.
Returns None if there are no resources with the given StudyInstanceUID.
Returns the resource ID if there is a single resource with the given StudyInstanceUID.
Returns None if there are multiple resources with the given StudyInstanceUID and deletes
the studies.
"""
data = json.dumps(
{
"Level": "Study",
"Query": {
"StudyInstanceUID": study_uid,
},
}
)
study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
if not study_resource_ids:
message = f"No study found with StudyInstanceUID {study_uid}"
raise ValueError(message)
if len(study_resource_ids) > 1:
message = f"Multiple studies found with StudyInstanceUID {study_uid}"
raise ValueError(message)

return study_resource_ids[0]
def _get_study_info_from_first_file(zipped_study_bytes) -> StudyInfo:
with ZipFile(zipped_study_bytes) as zipped_study:
file_info = zipped_study.infolist()[0]
with zipped_study.open(file_info) as file:
dataset = dcmread(file)
return get_study_info(dataset)


def _anonymise_study_instances(
zipped_study: ZipFile, study_uid: str, project_name: str
zipped_study: ZipFile, study_info: StudyInfo, project_name: str
) -> tuple[list[bytes], str]:
"""
Iterate over all instances and anonymise them.
Expand All @@ -350,7 +332,6 @@ def _anonymise_study_instances(
"""
config = load_project_config(project_name)
anonymised_instances_bytes = []
logger.info("Processing project '{}', study: {}", project_name, study_uid)
skipped_instance_counts = defaultdict(int)
dicom_validation_errors = {}

Expand All @@ -364,9 +345,9 @@ def _anonymise_study_instances(
)
except PixlSkipInstanceError as e:
logger.debug(
"Skipping instance {} for study {}: {}",
"Skipping instance {} for {}: {}",
dataset[0x0008, 0x0018].value,
study_uid,
study_info,
e,
)
skipped_instance_counts[str(e)] += 1
Expand All @@ -380,9 +361,9 @@ def _anonymise_study_instances(
raise PixlDiscardError(message)

logger.debug(
"Project '{}' Study {}, skipped instances: {}",
"Project '{}' {}, skipped instances: {}",
project_name,
study_uid,
study_info,
dict(skipped_instance_counts),
)

Expand All @@ -391,7 +372,7 @@ def _anonymise_study_instances(
"The anonymisation introduced the following validation errors:\n{}",
parse_validation_results(dicom_validation_errors),
)
logger.success("Finished anonymising project: '{}', study: {}", project_name, study_uid)
logger.success("Finished anonymising project: '{}', {}", project_name, study_info)
return anonymised_instances_bytes, anonymised_study_uid


Expand Down Expand Up @@ -419,6 +400,34 @@ def _upload_instances(instances_bytes: list[bytes]) -> None:
upload_response.raise_for_status()


def _get_study_resource_id(study_uid: str) -> str:
"""
Get the resource ID for an existing study based on its StudyInstanceUID.
Returns None if there are no resources with the given StudyInstanceUID.
Returns the resource ID if there is a single resource with the given StudyInstanceUID.
Returns None if there are multiple resources with the given StudyInstanceUID and deletes
the studies.
"""
data = json.dumps(
{
"Level": "Study",
"Query": {
"StudyInstanceUID": study_uid,
},
}
)
study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
if not study_resource_ids:
message = f"No study found with StudyInstanceUID {study_uid}"
raise ValueError(message)
if len(study_resource_ids) > 1:
message = f"Multiple studies found with StudyInstanceUID {study_uid}"
raise ValueError(message)

return study_resource_ids[0]


def send_study(study_id: str, project_name: str) -> None:
"""
Send the resource to the appropriate destination.
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/src/pixl_dcmd/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sqlalchemy import URL, create_engine, exists
from sqlalchemy.orm import sessionmaker, exc

from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo

url = URL.create(
drivername="postgresql+psycopg2",
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions pixl_dcmd/src/pixl_dcmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@
get_uniq_pseudo_study_uid_and_update_db,
get_pseudo_patient_id_and_update_db,
)
from pixl_dcmd._dicom_helpers import (
from pixl_dcmd.dicom_helpers import (
DicomValidator,
get_study_info,
)
from pixl_dcmd._tag_schemes import _scheme_list_to_dict, merge_tag_schemes

if typing.TYPE_CHECKING:
from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo


def write_dataset_to_bytes(dataset: Dataset) -> bytes:
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from collections.abc import Generator
from typing import Optional

from pixl_dcmd._dicom_helpers import get_study_info
from pixl_dcmd.dicom_helpers import get_study_info
from core.project_config import load_project_config
import pytest
import pytest_pixl.dicom
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_uniq_pseudo_study_uid_and_update_db,
get_pseudo_patient_id_and_update_db,
)
from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo
from sqlalchemy.orm import Session

STUDY_DATE = datetime.date.fromisoformat("2023-01-01")
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_dicom_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import annotations

import pytest
from pixl_dcmd._dicom_helpers import DicomValidator
from pixl_dcmd.dicom_helpers import DicomValidator
from pixl_dcmd.main import anonymise_dicom
from pydicom import Dataset

Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from core.project_config.pixl_config_model import load_config_and_validate
from decouple import config

from pixl_dcmd._dicom_helpers import get_study_info
from pixl_dcmd.dicom_helpers import get_study_info
from pixl_dcmd.main import (
anonymise_dicom_and_update_db,
_anonymise_dicom_from_scheme,
Expand Down

0 comments on commit 208e74c

Please sign in to comment.