diff --git a/.env.sample b/.env.sample index 975096bba..dd127d74a 100644 --- a/.env.sample +++ b/.env.sample @@ -54,7 +54,6 @@ ORTHANC_ANON_URL=http://orthanc-anon:8042 ORTHANC_ANON_USERNAME= ORTHANC_ANON_PASSWORD= ORTHANC_ANON_AE_TITLE= -ORTHANC_ANON_HTTP_TIMEOUT=60 ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT=false ENABLE_DICOM_WEB=true STUDY_TIME_OFFSET= diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a15a66f6e..f95ad02ef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -28,11 +28,11 @@ concurrency: jobs: lint: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - name: Checkout - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Run pre-commit uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 @@ -46,7 +46,7 @@ jobs: docker compose config --quiet test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 30 strategy: fail-fast: false # run all tests if even if one fails @@ -54,10 +54,10 @@ jobs: package_dir: [pixl_core, hasher, pixl_dcmd, cli, pixl_export, pixl_imaging, pytest-pixl] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Init Python - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: python-version: "3.11" cache: "pip" @@ -78,7 +78,7 @@ jobs: AZURE_KEY_VAULT_SECRET_NAME: test - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0 + uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1 with: directory: ${{ matrix.package_dir }} env: @@ -86,13 +86,13 @@ jobs: system-test: if: ${{ ! github.event.pull_request.draft || contains(github.event.pull_request.title, '[force-system-test]') }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 30 steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - - uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3 - name: Init Python - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: python-version: "3.11" cache: "pip" @@ -179,7 +179,7 @@ jobs: - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0 + uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1 with: directory: test env: diff --git a/.renovaterc.json5 b/.renovaterc.json5 index 50625e30a..ed970c527 100644 --- a/.renovaterc.json5 +++ b/.renovaterc.json5 @@ -2,6 +2,36 @@ $schema: "https://docs.renovatebot.com/renovate-schema.json", extends: [ "github>UCL-ARC/.github//renovate/default-config.json", - "schedule:monthly", + ":assignAndReview(team:arc-dev)", + "group:allNonMajor" ], -} \ No newline at end of file + customDatasources: { + dicomSpec: { + defaultRegistryUrlTemplate: "https://dicom.nema.org/medical/dicom", + format: "html", + }, + }, + customManagers: [ + { + customType: "regex", + description: "Update DICOM Spec edition used for validation", + fileMatch: [ + "orthanc/orthanc-anon/plugin/download_dicom_spec.py", + "pixl_dcmd/src/pixl_dcmd/main.py", + ], + matchStrings: [ + 'edition\\s?=\\s?"(?.*?)"\n', + '.*\\(edition\\s?=\\s?"(?.*?)"\\)\n', + ], + depNameTemplate: "dicomSpec", + datasourceTemplate: "custom.dicomSpec", + }, + ], + packageRules: [ + { + matchDatasources: ["custom.dicomSpec"], + extractVersion: "/medical/dicom/(?\\d{4}[a-z])/", + versioning: "loose", + } + ] +} diff --git a/bin/README.md b/bin/README.md index eedf89797..13c2e1543 100644 --- a/bin/README.md +++ b/bin/README.md @@ -1,6 +1,24 @@ ## 'PIXL/bin' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[linters](./linters/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +README.md + +
+ diff --git a/bin/linters/README.md b/bin/linters/README.md index a03874a75..84698fd12 100644 --- a/bin/linters/README.md +++ b/bin/linters/README.md @@ -1,6 +1,16 @@ +A directory that contains the files used for linting. + ## 'PIXL/bin/linters' Directory Contents -### Files +
+ +

Files

+ +
+ +| **Code** | **User docs** | +| :--- | :--- | +| check_headers_exist.sh | README.md | -check_headers_exist.sh +
diff --git a/cli/README.md b/cli/README.md index bbbddeef6..4a1888c48 100644 --- a/cli/README.md +++ b/cli/README.md @@ -145,15 +145,29 @@ pytest -vs tests/test_docker_commands.py #e.g., for particular tests ## 'PIXL/cli' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[src](./src/README.md) [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | + +
-pyproject.toml -README.md diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 4603683a2..4b7888a5d 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -9,7 +9,7 @@ classifiers = ["Programming Language :: Python :: 3"] dependencies = [ "core==0.2.0rc0", "click==8.1.7", - "tqdm==4.66.4", + "tqdm==4.67.1", ] [project.optional-dependencies] diff --git a/cli/src/README.md b/cli/src/README.md index 807a572b5..c1c1c9029 100644 --- a/cli/src/README.md +++ b/cli/src/README.md @@ -1,6 +1,26 @@ +This directory contains the source files for the PIXL commands line interface. + ## 'PIXL/cli/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[pixl_cli](./pixl_cli/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +README.md + +
+ diff --git a/cli/src/pixl_cli/README.md b/cli/src/pixl_cli/README.md index 11f286869..685a139c5 100644 --- a/cli/src/pixl_cli/README.md +++ b/cli/src/pixl_cli/README.md @@ -1,18 +1,22 @@ -## 'PIXL/cli/src/pixl_cli' Directory Contents - -### Files - -main.py +This directory contains the files necessary to create a command line instance of PIXL. -_config.py - -_database.py +## 'PIXL/cli/src/pixl_cli' Directory Contents -_docker_commands.py +
+ +

Files

-_io.py +
-_message_processing.py +| **Code** | **User docs** | +| :--- | :--- | +| main.py | README.md | +| _config.py | | +| _database.py | | +| _docker_commands.py | | +| _io.py | | +| _message_processing.py | | +| __init__.py | | -__init__.py +
diff --git a/cli/src/pixl_cli/_database.py b/cli/src/pixl_cli/_database.py index 4c9c6e197..1772e15d8 100644 --- a/cli/src/pixl_cli/_database.py +++ b/cli/src/pixl_cli/_database.py @@ -89,8 +89,8 @@ def _filter_existing_images( ) -> pd.DataFrame: # DataFrame indices must batch when using df.isin (or df.index.isin) # So we re-index the DataFrames to match on the columns we want to compare - messages_df_reindexed = messages_df.set_index(["accession_number", "mrn", "study_date"]) - images_df_reindexed = images_df.set_index(["accession_number", "mrn", "study_date"]) + messages_df_reindexed = messages_df.set_index(["accession_number", "mrn", "study_uid"]) + images_df_reindexed = images_df.set_index(["accession_number", "mrn", "study_uid"]) keep_indices = ~messages_df_reindexed.index.isin(images_df_reindexed.index) return messages_df[keep_indices] @@ -101,7 +101,7 @@ def _filter_exported_messages( ) -> pd.DataFrame: merged = messages_df.merge( images_df, - on=["accession_number", "mrn", "study_date"], + on=["accession_number", "mrn", "study_uid"], how="left", validate="one_to_one", suffixes=(None, None), @@ -131,7 +131,7 @@ def all_images_for_project(project_slug: str) -> pd.DataFrame: PixlSession = sessionmaker(engine) query = ( - select(Image.accession_number, Image.study_date, Image.mrn, Image.exported_at) + select(Image.accession_number, Image.study_uid, Image.mrn, Image.exported_at) .join(Extract) .where(Extract.slug == project_slug) ) diff --git a/cli/src/pixl_cli/_io.py b/cli/src/pixl_cli/_io.py index 39bff743b..a551b4dff 100644 --- a/cli/src/pixl_cli/_io.py +++ b/cli/src/pixl_cli/_io.py @@ -64,19 +64,25 @@ def read_patient_info(resources_path: Path) -> pd.DataFrame: messages_df = _load_csv(resources_path) else: messages_df = _load_parquet(resources_path) + # Tidy up dataframe in case of whitespace or no way to identify images + unique_columns = ["project_name", "mrn", "accession_number", "study_uid"] + filtered_df = messages_df.dropna(subset=["accession_number", "study_uid"], how="all") + for column in unique_columns: + filtered_df[column] = filtered_df[column].str.strip() + filtered_df = filtered_df[ + ~(filtered_df["accession_number"].eq("") & filtered_df["study_uid"].eq("")) + ] - messages_df = messages_df.sort_values(by=["project_name", "study_date"]) - messages_df = messages_df.drop_duplicates( - subset=["project_name", "mrn", "accession_number", "study_date"] - ) + filtered_df = filtered_df.sort_values(by=["project_name", "study_date"]) + filtered_df = filtered_df.drop_duplicates(subset=unique_columns) - if len(messages_df) == 0: + if len(filtered_df) == 0: msg = f"Failed to find any messages in {resources_path}" raise ValueError(msg) - logger.info("Created {} messages from {}", len(messages_df), resources_path) + logger.info("Created {} messages from {}", len(filtered_df), resources_path) - return messages_df + return filtered_df def _load_csv(filepath: Path) -> pd.DataFrame: @@ -168,7 +174,6 @@ class DF_COLUMNS(StrEnum): # noqa: N801 "participant_id": "pseudo_patient_id", } - MAP_PARQUET_TO_MESSAGE_KEYS = { "PrimaryMrn": "mrn", "AccessionNumber": "accession_number", diff --git a/cli/tests/README.md b/cli/tests/README.md index 85977d651..2d25bff75 100644 --- a/cli/tests/README.md +++ b/cli/tests/README.md @@ -1,5 +1,10 @@ +# PIXL cli tests -Remove the db container and associated data +This directory contains the code for the tests of the PIXL command line interface. + + + +In order to remove the db container and associated data after the tests have been run use the following command: ```bash docker container rm pixl-test-db -v -f @@ -7,23 +12,22 @@ docker container rm pixl-test-db -v -f ## 'PIXL/cli/tests' Directory Contents -### Files - -conftest.py - -README.md - -test_check_env.py - -test_database.py - -test_docker_commands.py - -test_io.py +
+ +

Files

-test_messages_from_files.py +
-test_message_processing.py +| **Code** | **User docs** | +| :--- | :--- | +| conftest.py | README.md | +| test_check_env.py | | +| test_database.py | | +| test_docker_commands.py | | +| test_io.py | | +| test_messages_from_files.py | | +| test_message_processing.py | | +| test_populate.py | | -test_populate.py +
diff --git a/cli/tests/test_database.py b/cli/tests/test_database_cli_interaction.py similarity index 100% rename from cli/tests/test_database.py rename to cli/tests/test_database_cli_interaction.py diff --git a/cli/tests/test_messages_from_files.py b/cli/tests/test_messages_from_files.py index ca6c7339a..9c8e5dbb4 100644 --- a/cli/tests/test_messages_from_files.py +++ b/cli/tests/test_messages_from_files.py @@ -55,6 +55,32 @@ def test_messages_from_csv(omop_resources: Path) -> None: assert messages == expected_messages +def test_whitespace_and_na_processing(omop_resources: Path) -> None: + """ + GIVEN a csv with leading and trailing whitespace, a duplicate entry + and ones with no image identifiers (empty and whitespaces). + WHEN the messages are generated from the directory + THEN one message should be generated, with no leading or trailing whitespace + """ + # Arrange + test_csv = omop_resources / "test_whitespace_and_na_processing.csv" + messages_df = read_patient_info(test_csv) + # Act + messages = messages_from_df(messages_df) + # Assert + assert messages == [ + Message( + procedure_occurrence_id=0, + mrn="patient_identifier", + accession_number="123456789", + study_uid="1.2.3.4.5.6.7.8", + project_name="ms-pinpoint-test", + extract_generated_timestamp=datetime.datetime.fromisoformat("2023-01-01T00:01:00Z"), + study_date=datetime.date.fromisoformat("2022-01-01"), + ), + ] + + def test_messages_from_csv_multiple_projects( omop_resources: Path, rows_in_session, mock_publisher ) -> None: diff --git a/docker-compose.yml b/docker-compose.yml index 321c30a14..0f4c753b3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,8 +74,10 @@ services: hasher-api: build: context: . - dockerfile: ./docker/hasher-api/Dockerfile + dockerfile: ./docker/pixl-python/Dockerfile + target: hasher_api args: + PIXL_PACKAGE_DIR: hasher <<: *build-args-common environment: <<: [*proxy-common, *pixl-common-env] @@ -93,7 +95,6 @@ services: networks: - pixl-net healthcheck: - test: ["CMD", "curl", "-f", "http://hasher-api:8000/heart-beat"] interval: 10s timeout: 30s retries: 5 @@ -102,9 +103,11 @@ services: orthanc-anon: build: context: . - dockerfile: ./docker/orthanc-anon/Dockerfile + dockerfile: ./docker/orthanc/Dockerfile + target: pixl_orthanc_anon args: <<: *build-args-common + ORTHANC_DIR: orthanc-anon ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS} platform: linux/amd64 command: /run/secrets @@ -164,16 +167,18 @@ services: "/probes/test-aliveness.py --user=$ORTHANC_USERNAME --pwd=$ORTHANC_PASSWORD", ] start_period: 10s - retries: 2 + retries: 10 interval: 3s timeout: 2s orthanc-raw: build: context: . - dockerfile: ./docker/orthanc-raw/Dockerfile + dockerfile: ./docker/orthanc/Dockerfile + target: pixl_orthanc_raw args: <<: *build-args-common + ORTHANC_DIR: orthanc-raw ORTHANC_RAW_MAXIMUM_STORAGE_SIZE: ${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE} ORTHANC_RAW_JOB_HISTORY_SIZE: ${ORTHANC_RAW_JOB_HISTORY_SIZE} ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS} @@ -226,7 +231,7 @@ services: restart: "always" queue: - image: rabbitmq:3.13.6-management@sha256:54f115b7c9c619e59b703e326ebfc7fc762f8c7321eee34f6fc0646457fffe20 + image: rabbitmq:3.13.7-management@sha256:1c32767bb8f7afb93fe99b890c05a250936bc2836fa3fd0154058f3953207095 hostname: queue-host environment: RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} @@ -249,8 +254,10 @@ services: export-api: build: context: . - dockerfile: ./docker/export-api/Dockerfile + dockerfile: ./docker/pixl-python/Dockerfile + target: export_api args: + PIXL_PACKAGE_DIR: pixl_export <<: *build-args-common environment: <<: @@ -297,8 +304,10 @@ services: imaging-api: build: context: . - dockerfile: ./docker/imaging-api/Dockerfile + dockerfile: ./docker/pixl-python/Dockerfile + target: imaging_api args: + PIXL_PACKAGE_DIR: pixl_imaging <<: *build-args-common depends_on: queue: @@ -308,7 +317,6 @@ services: orthanc-anon: condition: service_healthy healthcheck: - test: curl -f http://0.0.0.0:8000/heart-beat interval: 10s timeout: 30s retries: 5 diff --git a/docker/README.md b/docker/README.md index d423d8cb6..083e669bc 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,6 +1,10 @@ ## 'PIXL/docker' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[export-api](./export-api/README.md) @@ -14,9 +18,17 @@ [postgres](./postgres/README.md) -### Files +
+ +
+ +

Files

+ +
-.dockerignore +| **Configuration** | **User docs** | **Housekeeping** | +| :--- | :--- | :--- | +| common.env | README.md | .dockerignore | -common.env +
diff --git a/docker/export-api/README.md b/docker/export-api/README.md deleted file mode 100644 index f12d22846..000000000 --- a/docker/export-api/README.md +++ /dev/null @@ -1,6 +0,0 @@ -## 'PIXL/docker/export-api' Directory Contents - -### Files - -Dockerfile - diff --git a/docker/hasher-api/Dockerfile b/docker/hasher-api/Dockerfile deleted file mode 100644 index 0481ea95b..000000000 --- a/docker/hasher-api/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -FROM python:3.12.4-slim-bullseye@sha256:26ce493641ad3b1c8a6202117c31340c7bbb2dc126f1aeee8ea3972730a81dc6 -SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] - -ARG TEST="false" - -# OS setup -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install --yes --no-install-recommends procps ca-certificates \ - iproute2 git curl libpq-dev curl gnupg g++ locales tzdata -RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen - -# clean up -RUN apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Install requirements before copying modules -COPY ./pixl_core/pyproject.toml ./pixl_core/pyproject.toml -COPY ./hasher/pyproject.toml . -RUN --mount=type=cache,target=/root/.cache \ - pip3 install --no-cache-dir pixl_core/ \ - && pip3 install --no-cache-dir /app/ - -COPY ./pixl_core/ pixl_core/ -COPY ./hasher/ . -RUN --mount=type=cache,target=/root/.cache \ - pip install --no-cache-dir --force-reinstall --no-deps pixl_core/ . && \ - if [ "$TEST" = "true" ]; \ - then pip install --no-cache-dir --force-reinstall --no-deps pixl_core/[test] \ - --no-cache-dir --force-reinstall --no-deps .[test]; fi - -ENTRYPOINT ["uvicorn", "hasher.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/docker/hasher-api/README.md b/docker/hasher-api/README.md index f54e14972..7500b7a27 100644 --- a/docker/hasher-api/README.md +++ b/docker/hasher-api/README.md @@ -1,6 +1,14 @@ ## 'PIXL/docker/hasher-api' Directory Contents -### Files +
+ +

Files

-Dockerfile +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
diff --git a/docker/imaging-api/Dockerfile b/docker/imaging-api/Dockerfile deleted file mode 100644 index 73fa0728b..000000000 --- a/docker/imaging-api/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -FROM python:3.12.4-slim-bullseye@sha256:26ce493641ad3b1c8a6202117c31340c7bbb2dc126f1aeee8ea3972730a81dc6 -SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] - -ARG TEST="false" - -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install --yes --no-install-recommends \ - procps \ - ca-certificates \ - iproute2 \ - libpq-dev \ - curl \ - gnupg \ - locales \ - tzdata -RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen -RUN apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -# Install requirements before copying modules -COPY ./pixl_core/pyproject.toml ./pixl_core/pyproject.toml -COPY ./pixl_imaging/pyproject.toml ./pixl_imaging/pyproject.toml -RUN --mount=type=cache,target=/root/.cache \ - pip3 install --no-cache-dir pixl_core/ \ - && pip3 install --no-cache-dir pixl_imaging/ - -COPY ./pixl_core/ pixl_core/ -COPY ./pixl_imaging/ . -RUN --mount=type=cache,target=/root/.cache \ - pip install --no-cache-dir --force-reinstall --no-deps pixl_core/ . && \ - if [ "$TEST" = "true" ]; \ - then pip install --no-cache-dir --force-reinstall --no-deps pixl_core/[test] \ - --no-cache-dir --force-reinstall --no-deps .[test]; fi - -ENTRYPOINT ["/app/scripts/migrate_and_run.sh"] diff --git a/docker/imaging-api/README.md b/docker/imaging-api/README.md index 400bee965..72ff585b6 100644 --- a/docker/imaging-api/README.md +++ b/docker/imaging-api/README.md @@ -1,6 +1,14 @@ ## 'PIXL/docker/imaging-api' Directory Contents -### Files +
+ +

Files

-Dockerfile +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
diff --git a/docker/orthanc-anon/Dockerfile b/docker/orthanc-anon/Dockerfile deleted file mode 100644 index 7415d0eac..000000000 --- a/docker/orthanc-anon/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f -SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] - -# Create a virtual environment, recommended since python 3.11 and Debian bookworm based images -# where you get a warning when installing system-wide packages. -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install --yes --no-install-recommends python3-venv tzdata -RUN python3 -m venv /.venv - -# Install curl, used in system tests -RUN apt-get --assume-yes install curl - -# Install requirements before copying modules -COPY ./pixl_core/pyproject.toml /pixl_core/pyproject.toml -COPY ./pixl_dcmd/pyproject.toml /pixl_dcmd/pyproject.toml - -RUN --mount=type=cache,target=/root/.cache \ - /.venv/bin/pip install pixl_core/ \ - && /.venv/bin/pip install pixl_dcmd/ - -ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/ -COPY ./orthanc/orthanc-anon/plugin/download_dicom_spec.py /etc/orthanc/download_dicom_spec.py -RUN --mount=type=cache,target=/root/.cache \ - python3 /etc/orthanc/download_dicom_spec.py - -COPY ./orthanc/orthanc-anon/plugin/pixl.py /etc/orthanc/pixl.py - -COPY ./pixl_core/ /pixl_core -RUN --mount=type=cache,target=/root/.cache \ - /.venv/bin/pip install --no-cache-dir --force-reinstall --no-deps ./pixl_core - -COPY ./pixl_dcmd/ /pixl_dcmd -RUN --mount=type=cache,target=/root/.cache \ - /.venv/bin/pip install --no-cache-dir --force-reinstall --no-deps ./pixl_dcmd - -ARG ORTHANC_CONCURRENT_JOBS -COPY ./orthanc/orthanc-anon/config /run/secrets - -RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json diff --git a/docker/orthanc-anon/README.md b/docker/orthanc-anon/README.md index b1f2a60bc..d2b913295 100644 --- a/docker/orthanc-anon/README.md +++ b/docker/orthanc-anon/README.md @@ -1,6 +1,14 @@ ## 'PIXL/docker/orthanc-anon' Directory Contents -### Files +
+ +

Files

-Dockerfile +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
diff --git a/docker/orthanc-raw/README.md b/docker/orthanc-raw/README.md deleted file mode 100644 index a137fb98a..000000000 --- a/docker/orthanc-raw/README.md +++ /dev/null @@ -1,6 +0,0 @@ -## 'PIXL/docker/orthanc-raw' Directory Contents - -### Files - -Dockerfile - diff --git a/docker/orthanc-raw/Dockerfile b/docker/orthanc/Dockerfile similarity index 68% rename from docker/orthanc-raw/Dockerfile rename to docker/orthanc/Dockerfile index d0e6800cf..e1d3b2482 100644 --- a/docker/orthanc-raw/Dockerfile +++ b/docker/orthanc/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f +FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f AS pixl_orthanc_apt SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] # Create a virtual environment, recommended since python 3.11 and Debian bookworm based images @@ -20,6 +20,24 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get install --yes --no-install-recommends python3-venv tzdata RUN python3 -m venv /.venv +ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/ + +# Install curl for now, but try to remove this dependency +RUN apt-get --assume-yes install curl + +FROM pixl_orthanc_apt AS pixl_orthanc_with_spec +# This part changes rarely, so do it nice and early to avoid redoing it every time we change our code. +# It does have a dependency though, which would normally be fulfilled by our project files, so install that +# manually. +# Do it in dead end build stage to discard this environment afterwards, +# and because the spec is only needed in orthanc-anon. +RUN /.venv/bin/pip install dicom-validator +COPY ./orthanc/orthanc-anon/plugin/download_dicom_spec.py /etc/orthanc/download_dicom_spec.py +RUN --mount=type=cache,target=/root/.cache \ + python3 /etc/orthanc/download_dicom_spec.py + + +FROM pixl_orthanc_apt AS pixl_orthanc_base # Install requirements before copying modules COPY ./pixl_core/pyproject.toml /pixl_core/pyproject.toml @@ -37,17 +55,22 @@ COPY ./pixl_dcmd/ /pixl_dcmd RUN --mount=type=cache,target=/root/.cache \ /.venv/bin/pip install --no-cache-dir --force-reinstall --no-deps ./pixl_dcmd -COPY ./orthanc/orthanc-raw/plugin/pixl.py /etc/orthanc/pixl.py +ARG ORTHANC_DIR +COPY ./orthanc/${ORTHANC_DIR}/plugin/pixl.py /etc/orthanc/pixl.py +COPY ./orthanc/${ORTHANC_DIR}/config /run/secrets # Orthanc can't substitute environment veriables as integers so copy and replace before running +ARG ORTHANC_CONCURRENT_JOBS +RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json + +FROM pixl_orthanc_base AS pixl_orthanc_raw + ARG ORTHANC_RAW_MAXIMUM_STORAGE_SIZE ARG ORTHANC_RAW_JOB_HISTORY_SIZE -ARG ORTHANC_CONCURRENT_JOBS ARG PIXL_DICOM_TRANSFER_TIMEOUT -COPY ./orthanc/orthanc-raw/config /run/secrets RUN sed -i "s/\${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE}/${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE:-0}/g" /run/secrets/orthanc.json RUN sed -i "s/\${ORTHANC_RAW_JOB_HISTORY_SIZE}/${ORTHANC_RAW_JOB_HISTORY_SIZE:-100}/g" /run/secrets/orthanc.json -RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json RUN sed -i "s/\${ORTHANC_RAW_STABLE_SECONDS}/${PIXL_DICOM_TRANSFER_TIMEOUT:-600}/g" /run/secrets/orthanc.json -ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/ +FROM pixl_orthanc_base AS pixl_orthanc_anon +COPY --from=pixl_orthanc_with_spec /root/dicom-validator /root/dicom-validator diff --git a/docker/orthanc/README.md b/docker/orthanc/README.md new file mode 100644 index 000000000..e949c46ad --- /dev/null +++ b/docker/orthanc/README.md @@ -0,0 +1,14 @@ +## 'PIXL/docker/orthanc-raw' Directory Contents + +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
+ diff --git a/docker/export-api/Dockerfile b/docker/pixl-python/Dockerfile similarity index 74% rename from docker/export-api/Dockerfile rename to docker/pixl-python/Dockerfile index 48f633148..84f188c57 100644 --- a/docker/export-api/Dockerfile +++ b/docker/pixl-python/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM python:3.12.4-slim-bullseye@sha256:26ce493641ad3b1c8a6202117c31340c7bbb2dc126f1aeee8ea3972730a81dc6 +FROM python:3.12.4-slim-bullseye@sha256:26ce493641ad3b1c8a6202117c31340c7bbb2dc126f1aeee8ea3972730a81dc6 AS pixl_python_base SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] ARG TEST="false" @@ -31,20 +31,32 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen RUN apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* +HEALTHCHECK CMD /usr/bin/curl -f http://0.0.0.0:8000/heart-beat || exit 1 + WORKDIR /app +# specify what we're installing using build time arg +ARG PIXL_PACKAGE_DIR # Install requirements before copying modules COPY ./pixl_core/pyproject.toml ./pixl_core/pyproject.toml -COPY ./pixl_export/pyproject.toml ./pixl_export/pyproject.toml +COPY ./$PIXL_PACKAGE_DIR/pyproject.toml ./$PIXL_PACKAGE_DIR/pyproject.toml RUN pip3 install --no-cache-dir pixl_core/ \ - && pip3 install --no-cache-dir pixl_export/ + && pip3 install --no-cache-dir $PIXL_PACKAGE_DIR/ +# Install our code COPY ./pixl_core/ pixl_core/ -COPY ./pixl_export/ . +COPY ./$PIXL_PACKAGE_DIR/ . RUN pip install --no-cache-dir --force-reinstall --no-deps pixl_core/ \ --no-cache-dir --force-reinstall --no-deps . && \ if [ "$TEST" = "true" ]; then pip install --no-cache-dir pixl_core/[test] .[test]; fi -HEALTHCHECK CMD /usr/bin/curl -f http://0.0.0.0:8000/heart-beat || exit 1 +# Each container should be run with a different entry point +FROM pixl_python_base AS export_api ENTRYPOINT ["uvicorn", "pixl_export.main:app", "--host", "0.0.0.0", "--port", "8000"] + +FROM pixl_python_base AS hasher_api +ENTRYPOINT ["uvicorn", "hasher.main:app", "--host", "0.0.0.0", "--port", "8000"] + +FROM pixl_python_base AS imaging_api +ENTRYPOINT ["/app/scripts/migrate_and_run.sh"] diff --git a/docker/pixl-python/README.md b/docker/pixl-python/README.md new file mode 100644 index 000000000..5890f1ee2 --- /dev/null +++ b/docker/pixl-python/README.md @@ -0,0 +1,14 @@ +## 'PIXL/docker/export-api' Directory Contents + +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
+ diff --git a/docker/postgres/Dockerfile b/docker/postgres/Dockerfile index 29e030a42..bf615f6e8 100644 --- a/docker/postgres/Dockerfile +++ b/docker/postgres/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM postgres:16-bookworm@sha256:d0f363f8366fbc3f52d172c6e76bc27151c3d643b870e1062b4e8bfe65baf609 +FROM postgres:16-bookworm@sha256:5620f242bbc0e17478556102327e7efcf60ab48de3607c9e0ea98800841785ec # OS setup RUN export DEBIAN_FRONTEND=noninteractive && \ diff --git a/docker/postgres/README.md b/docker/postgres/README.md index f43084e01..7135418ac 100644 --- a/docker/postgres/README.md +++ b/docker/postgres/README.md @@ -1,6 +1,14 @@ ## 'PIXL/docker/postgres' Directory Contents -### Files +
+ +

Files

-Dockerfile +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| Dockerfile | README.md | + +
diff --git a/docs/design/decision-record/0001-multiservice-architecture.md b/docs/design/decision-record/0001-multiservice-architecture.md new file mode 100644 index 000000000..c879f8153 --- /dev/null +++ b/docs/design/decision-record/0001-multiservice-architecture.md @@ -0,0 +1,28 @@ +# Multi-service Architecture + +* Status: accepted +* Deciders: Original PIXL team +* Date: 2023-11-01 (retrospectively) + +## Context and Problem Statement + +We want a software solution that has distinct functionality. How do we structure those services. + +## Decision Drivers + +* Will need to allow for concurrent processing of tasks +* Lingua franca of the team is python + +## Considered Options + +* Multi-service architecture +* Monolith architecture + +## Decision Outcome + +Chosen option: "Multi-service architecture", because it allows us to: + +- Break up our code into logical packages and services +- Get around a single python process being blocked by the global interpreter lock, as each service runs in its own process +- Forces us to consider where code should go, and restrict services from using other services code +- Works well with UCLH's restriction to deploy services using docker and docker compose, open to extend into kubernetes should we want that diff --git a/docs/design/decision-record/0002-message-processing.md b/docs/design/decision-record/0002-message-processing.md new file mode 100644 index 000000000..03601aaa5 --- /dev/null +++ b/docs/design/decision-record/0002-message-processing.md @@ -0,0 +1,47 @@ +# Message-based Processing of Images + +* Status: accepted +* Deciders: Original PIXL team, most recent changes: Stef Piatek & Paul Smith +* Date: 2024-12-12 + +## Context and Problem Statement + +- We need a way to buffer the messages awaiting processing. + We expect hundreds to tens of thousands of imaging studies to be requested per project, + and want to find each study in the source systems individually. + + +## Decision Drivers + +- Be able to process multiple research projects at the same time +- Should be persistent if services are taken down +- Allow for a secondary DICOM source to be used if study isn't found in the primary +- Limit the total number of images that are being processed for a given source system +- Studies that have already been successfully exported for a project should not get processed again + +## Considered Options + +* Use of database alone to schedule run, with the CLI driving a long-running job to process all studies in research project +* Use of queues to buffer requests that the `imaging-api` processes, database to track the status of exported studies + +## Decision Outcome + +Chosen option: `Queue for buffering requests, database to track status of export`, +because it fulfills all requirements and allows us to invest in use of generic technologies. + +## Pros and Cons of the Options + +### Database alone + + +* Good, simple to set up and use. Single solution for buffering requests and tracking status +* Bad, bespoke solution where we could use a technology that can be transferrable to other situations +* Bad, complex setup to limit total queries per source system + +### Queue with database for status + + +* Good, fulfills all requirements fairly easily, creating a queue for primary and another for secondary imaging sources +* Good, because we have previously invested in rabbitmq as a technology +* Bad, extra services to manage and extra development +* Bad, because original implementation was broken and required effort to fix. Though we learned more about the libraries we're using. diff --git a/docs/design/decision-record/0003-dicom-processing.md b/docs/design/decision-record/0003-dicom-processing.md new file mode 100644 index 000000000..b70bcd71c --- /dev/null +++ b/docs/design/decision-record/0003-dicom-processing.md @@ -0,0 +1,44 @@ +# DICOM server and processing + +* Status: accepted +* Deciders: Original PIXL team +* Date: 2023-11-01 (retrospectively) + +## Context and Problem Statement + +We need to have a DICOM server to query DICOM images, store them, anonymise them and export them. + + +## Decision Drivers + +* Will need to have a robust DICOM server that has been in use +* Keep original studies in a cache locally to reduce use of clinical imaging systems if failures in anonymisation or export +* The team's lingua franca is Python +* Per-project anonymisation profiles and custom hashing of fields will require plugins to be written for anonymisation +* UCLH infrastructure allows for running docker, but we don't have admin accounts and cannot install software directly onto the machines. + +## Considered Options + +* XNAT server +* Orthanc server + +## Decision Outcome + +Chosen option: `Orthanc`, +because it's relatively lightweight, under development and allows for python-based extensions to be written. + +## Pros and Cons of the Options + +### XNAT + +* Good, ARC has a history of using this in the medical imaging subgroup. +* Good, widely regarded +* Bad, heavyweight and has many more features than we need to use. May take longer to learn and deploy +* Bad, does not allow for python-based plugins to be used for anonymisation without getting into running docker in docker + +### Orthanc + +* Good, has been battle tested +* Good, has DICOMWeb plugin to allow for export via that modality +* Good, allows for python-based plugins and running in docker +* Bad, no previous usage within ARC. Will be teething problems diff --git a/docs/design/decision-record/0004-multiple-project-configuration.md b/docs/design/decision-record/0004-multiple-project-configuration.md new file mode 100644 index 000000000..5b7510e7a --- /dev/null +++ b/docs/design/decision-record/0004-multiple-project-configuration.md @@ -0,0 +1,68 @@ +# Multiple-project configuration + +* Status: accepted +* Deciders: Milan Malfait, Peter Tsrunchev, Jeremy Stein, Stef Piatek +* Date: 2024-03-05 + +Technical Story: [PIXL can take multiple projects](https://github.com/SAFEHR-data/PIXL/issues/330) + +## Context and Problem Statement + +Each project should be able to define its own anonymisation profile and export destinations. +How can we store the secrets. + +![pixl-multi-project-config.png](../diagrams/pixl-multi-project-config.png) + +## Decision Drivers + +* When hashing a given field for an imaging study for two different projects, each project's hash should be different. + This it to avoid inappropriate linkage of data between research projects. +* Secure storage of secrets, especially for connection to destinations and per-project hashing salts + +## Considered Options + +* file-based: env files for docker or structured files for secrets +* self-hosted secret storage service +* azure keyvault + +## Decision Outcome + +Chosen option: "azure keyvault", because its low hassle and gives us good control of secret access. + +### Positive Consequences + +* Single place for secrets, which multiple deployments can access +* Can have secrets which expire, so even if compromised we limit the amount of secret leakage possible +* Per-project salts stored in a separate keyvault than the export endpoints +* Only need to define the destination type, with the keyvalut defining all the connection details + +### Negative Consequences + +* Requires other implementations to set up their own azure storage accounts or develop new secret management +* Developers also have to update a `.env` file for running the system test +* Slight increase in cost, can be slightly offset by caching credentials + +## Pros and Cons of the Options + +### File-based + +* Good, simple to do +* Bad, will keep on expanding as time goes on which can be a pain to maintain +* Bad, no access control beyond unix permissions + +### Self-hosted secret storage + +* Good, fine-grained access control possible +* Good, free in terms of upfront cost +* Bad, another service to maintain - residual costs + +### Azure keyvault + +[example | description | pointer to more information | …] + +* Good, fine-grained access control possible +* Bad, slight increase in cost + +## Links + +* Routing of studies based on projects in [ADR-0005](0005-project-based-study-routing.md) diff --git a/docs/design/decision-record/0005-project-based-study-routing.md b/docs/design/decision-record/0005-project-based-study-routing.md new file mode 100644 index 000000000..3b2686ead --- /dev/null +++ b/docs/design/decision-record/0005-project-based-study-routing.md @@ -0,0 +1,50 @@ +# Project-based study routing + +* Status: accepted +* Deciders: Stef Piatek, Paul Smith +* Date: 2024-11-27 + +Technical Story: [PIXL can take multiple projects](https://github.com/SAFEHR-data/PIXL/issues/330) + +## Context and Problem Statement + +Each study sent to `orthanc-anon` needs to be de-identified using the project-specific configuration. +We need a way to pass along this information along with the DICOM file. + +## Considered Options + +* DICOM tag: Adding a custom DICOM tag for the project name +* custom REST API endpoint: Creating a custom REST API endpoint for `orthanc-anon` to pull the data from `orthanc-raw` + +## Decision Outcome + +Chosen option: "custom REST API endpoint", because the project tag updating was causing `orthanc-raw` to crash, +and we were no longer using study stability to control export. + +## Pros and Cons of the Options + +### DICOM tag + +Add private creator group to instances as they arrive, and a dummy value in the custom tag. +Once the study has been pulled from the DICOM source, update the tag with the filename stem of the project config. + +* Good, because you can see which study was most recently exported +* Bad, `orthanc-raw` started crashing when updating the DICOM tag via the orthanc API on large studies +* Bad, because we were having to update the DICOM tag without changing the instance UIDs. + So that we can check for missing instances for studies which already exist in `orthanc-raw` +* Bad, studies appeared where instances didn't have their custom DICOM tag updates +* Bad, could have a race condition where the same study is trying to be exported by two projects + +### Custom REST API endpoint + +Once the study has been pulled from the DICOM source, send a REST request to `orthanc-anon` with the study UID and project. +`orthanc-anon` then adds this job to a threadpool and returns a 200 status to the client. + +* Good, keeping original instances unaltered +* Good, thread-pooling allows for faster de-identification +* Good, simpler flow +* Bad, we can't alter the queue of de-identification jobs short of taking down `orthanc-anon` + +## Links + +* Related to multiple project configuration [ADR-0004](0004-multiple-project-configuration.md) diff --git a/docs/design/decision-record/0006-data-export.md b/docs/design/decision-record/0006-data-export.md new file mode 100644 index 000000000..127714c01 --- /dev/null +++ b/docs/design/decision-record/0006-data-export.md @@ -0,0 +1,45 @@ +# Export of parquet files and DICOM data + +* Status: accepted +* Deciders: Haroon Chughtai, Jeremy Stein, Milan Malfait, Ruaridh Gollifer, Stef Piatek +* Date: 2024-02-26 + +## Context and Problem Statement + +The pipeline needs to be able to export DICOM images and structured data files to different endpoints. + +## Decision Drivers + +* We expect that some projects will have more data than we can store locally. Will need a rolling export of images +* We will need to be able to export images and structured data via FTPS in an automated fashion +* We will need to be able to export images via DICOMWeb + + +## Considered Options + +* Shared python library for exporting of data, used in `orthanc-anon` and the `pixl` CLI. +* `export-api` service, which can export both DICOM and structured data files. + +## Decision Outcome + +Chosen options: "`export-api` service", for clear separation of responsibilities. + +## Pros and Cons of the Options + +### Shared python library + +Add private creator group to instances as they arrive, and a dummy value in the custom tag. +Once the study has been pulled from the DICOM source, update the tag with the filename stem of the project config. + +* Good, one less service to maintain +* Good, export via DICOMWeb is using the orthanc API already +* Bad, duplication of implementation for export +* Bad, duplication of areas where secrets are used + +### `export-api` service + +Instead of shared library the code would be in the service alone. + +* Good, single service that will access all secrets and orchestrate exports +* Good, allows caching of export secrets in a long-running service +* Bad, would require extra code for interacting with the service from the CLI for parquet export diff --git a/docs/design/decision-record/index.md b/docs/design/decision-record/index.md new file mode 100644 index 000000000..ef2f7031f --- /dev/null +++ b/docs/design/decision-record/index.md @@ -0,0 +1,19 @@ +# Architectural Decision Log + +This log lists the architectural decisions for PIXL. + +Using the [Markdown Architectural Decision Records](https://adr.github.io/madr/) + +Regenerate the content by using `adr-log -i` from this directory. +You can install it via `npm install -g adr-log`, removing the template + + + +* [ADR-0001](0001-multiservice-architecture.md) - Multi-service Architecture +* [ADR-0002](0002-message-processing.md) - Message-based Processing of Images +* [ADR-0003](0003-dicom-processing.md) - DICOM server and processing +* [ADR-0004](0004-multiple-project-configuration.md) - Multiple-project configuration +* [ADR-0005](0005-project-based-study-routing.md) - Project-based study routing +* [ADR-0006](0006-data-export.md) - Export of parquet files and DICOM data + + diff --git a/docs/design/decision-record/template.md b/docs/design/decision-record/template.md new file mode 100644 index 000000000..b779044d8 --- /dev/null +++ b/docs/design/decision-record/template.md @@ -0,0 +1,72 @@ +# [Template: short title of solved problem and solution] + +* Status: [proposed | rejected | accepted | deprecated | … | superseded by [ADR-0105](0105-example.md)] +* Deciders: [list everyone involved in the decision] +* Date: [YYYY-MM-DD when the decision was last updated] + +Technical Story: [description | ticket/issue URL] + +## Context and Problem Statement + +[Describe the context and problem statement, e.g., in free form using two to three sentences. You may want to articulate the problem in form of a question.] + +## Decision Drivers + +* [driver 1, e.g., a force, facing concern, …] +* [driver 2, e.g., a force, facing concern, …] +* … + +## Considered Options + +* [option 1] +* [option 2] +* [option 3] +* … + +## Decision Outcome + +Chosen option: "[option 1]", because [justification. e.g., only option, which meets k.o. criterion decision driver | which resolves force force | … | comes out best (see below)]. + +### Positive Consequences + +* [e.g., improvement of quality attribute satisfaction, follow-up decisions required, …] +* … + +### Negative Consequences + +* [e.g., compromising quality attribute, follow-up decisions required, …] +* … + +## Pros and Cons of the Options + +### [option 1] + +[example | description | pointer to more information | …] + +* Good, because [argument a] +* Good, because [argument b] +* Bad, because [argument c] +* … + +### [option 2] + +[example | description | pointer to more information | …] + +* Good, because [argument a] +* Good, because [argument b] +* Bad, because [argument c] +* … + +### [option 3] + +[example | description | pointer to more information | …] + +* Good, because [argument a] +* Good, because [argument b] +* Bad, because [argument c] +* … + +## Links + +* [Link type] [Link to ADR] +* … diff --git a/hasher/README.md b/hasher/README.md index b9055be2c..faea710d2 100644 --- a/hasher/README.md +++ b/hasher/README.md @@ -61,6 +61,8 @@ See the [Azure Key vault setup](../docs/setup/azure-keyvault.md) documentation f Save the credentials in `.secrets.env` and a LastPass `Hasher API secrets` note. +SK QUESTION: is the reference to Last Pass something that is specific to us or is it a dependency somebody else would need. actually I assume the whole Azure thing is rather how we have chosen to do that rather than a necessity for somebody i.e. they might use a different system for storing their hashes + ``` HASHER_API_AZ_CLIENT_ID= HASHER_API_AZ_CLIENT_PASSWORD= @@ -70,15 +72,29 @@ HASHER_API_AZ_KEY_VAULT_NAME= ## 'PIXL/hasher' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[src](./src/README.md) [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | + +
-pyproject.toml -README.md diff --git a/hasher/pyproject.toml b/hasher/pyproject.toml index 3f6eaaf5a..51c5626e4 100644 --- a/hasher/pyproject.toml +++ b/hasher/pyproject.toml @@ -8,12 +8,12 @@ requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ "core==0.2.0rc0", - "azure-identity==1.16.1", + "azure-identity==1.19.0", "azure-keyvault==4.2.0", - "fastapi==0.112.0", - "hypothesis==6.109.0", - "requests==2.32.2", - "uvicorn==0.30.4", + "fastapi==0.115.6", + "hypothesis==6.122.3", + "requests==2.32.3", + "uvicorn==0.32.1", ] [project.optional-dependencies] diff --git a/hasher/src/README.md b/hasher/src/README.md index 99a56a701..0de380702 100644 --- a/hasher/src/README.md +++ b/hasher/src/README.md @@ -1,6 +1,24 @@ ## 'PIXL/hasher/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[hasher](./hasher/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/hasher/src/hasher/README.md b/hasher/src/hasher/README.md index 40b064a8d..736088286 100644 --- a/hasher/src/hasher/README.md +++ b/hasher/src/hasher/README.md @@ -1,12 +1,17 @@ ## 'PIXL/hasher/src/hasher' Directory Contents -### Files +
+ +

Files

-endpoints.py +
-hashing.py +| **Code** | **User docs** | +| :--- | :--- | +| endpoints.py | README.md | +| hashing.py | | +| main.py | | +| __init__.py | | -main.py - -__init__.py +
diff --git a/hasher/tests/README.md b/hasher/tests/README.md index 1a161940e..9f492bde3 100644 --- a/hasher/tests/README.md +++ b/hasher/tests/README.md @@ -1,12 +1,17 @@ ## 'PIXL/hasher/tests' Directory Contents -### Files +
+ +

Files

-conftest.py +
-test_endpoints.py +| **Code** | **User docs** | +| :--- | :--- | +| conftest.py | README.md | +| test_endpoints.py | | +| test_hashing.py | | +| __init__.py | | -test_hashing.py - -__init__.py +
diff --git a/index.md b/index.md new file mode 100644 index 000000000..badac46d3 --- /dev/null +++ b/index.md @@ -0,0 +1,43 @@ + + + + +* [ADR-1](cli/README.md) - PIXL Driver + Command line interface +* [ADR-2](cli/tests/README.md) - +* [ADR-3](CODE_OF_CONDUCT.md) - Contributor Covenant Code of Conduct +* [ADR-4](CONTRIBUTING.md) - Contributing to `PIXL`. +* [ADR-5](docs/design/bigger_picture.md) - The bigger picture +* [ADR-0001](docs/design/decision-record/0001-multiservice-architecture.md) - Multi-service Architecture +* [ADR-0002](docs/design/decision-record/0002-message-processing.md) - Message-based Processing of Images +* [ADR-0003](docs/design/decision-record/0003-dicom-processing.md) - DICOM server and processing +* [ADR-0004](docs/design/decision-record/0004-secrets-storage.md) - Secrets Storage +* [ADR-6](docs/design/decision-record/index.md) - Architectural Decision Log +* [ADR-7](docs/design/decision-record/template.md) - [short title of solved problem and solution] +* [ADR-8](docs/file_types/parquet_files.md) - Parquet files you might encounter throughout PIXL +* [ADR-9](docs/services/ftp-server.md) - FTPS server +* [ADR-10](docs/services/pixl_database.md) - The PIXL database +* [ADR-11](docs/setup/azure-keyvault.md) - Azure Keyvault setup +* [ADR-12](docs/setup/developer.md) - Developer setup +* [ADR-13](docs/setup/uclh-infrastructure-setup.md) - UCLH Infrastructure setup instructions +* [ADR-14](hasher/README.md) - Hasher API +* [ADR-15](orthanc/orthanc-anon/docs/DicomServiceViaAAD.md) - Retrieving an access token for the DICOM service using and Azure AD application +* [ADR-16](orthanc/orthanc-anon/README.md) - Orthanc Anon +* [ADR-17](orthanc/orthanc-raw/README.md) - Orthanc Raw +* [ADR-18](orthanc/README.md) - ORTHANC instances +* [ADR-19](pixl_core/README.md) - Core +* [ADR-20](pixl_dcmd/README.md) - PIXL DICOM de-identifier +* [ADR-21](pixl_export/README.md) - PIXL Export API +* [ADR-22](pixl_imaging/alembic/README.md) - Alembic configuration +* [ADR-23](pixl_imaging/README.md) - PIXL Imaging API +* [ADR-24](postgres/README.md) - PIXL pipeline database +* [ADR-25](pytest-pixl/README.md) - PIXL pytest plugin +* [ADR-26](README.md) - [pixl-ci](https://github.com/SAFEHR-data/PIXL/actions/workflows/main.yml/badge.svg)](https://github.com/SAFEHR-data/PIXL/actions/workflows/main.yml) +* [ADR-27](test/README.md) - PIXL System Tests + + + + + + + + diff --git a/orthanc/README.md b/orthanc/README.md index 674d7d272..d6ea61701 100644 --- a/orthanc/README.md +++ b/orthanc/README.md @@ -12,7 +12,11 @@ functionality . ## 'PIXL/orthanc' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[assets](./assets/README.md) @@ -20,7 +24,18 @@ functionality . [orthanc-raw](./orthanc-raw/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
-README.md diff --git a/orthanc/assets/README.md b/orthanc/assets/README.md index e6468fad6..4ce46f006 100644 --- a/orthanc/assets/README.md +++ b/orthanc/assets/README.md @@ -1,10 +1,16 @@ ## 'PIXL/orthanc/assets' Directory Contents -### Files +
+ +

Files

-orthanc-anon-az-dicom.png +
-orthanc-anon-web.png +| **Images** | **User docs** | +| :--- | :--- | +| orthanc-anon-az-dicom.png | README.md | +| orthanc-anon-web.png | | +| orthanc-raw-web.png | | -orthanc-raw-web.png +
diff --git a/orthanc/orthanc-anon/README.md b/orthanc/orthanc-anon/README.md index 29417db44..d43f9ffbe 100644 --- a/orthanc/orthanc-anon/README.md +++ b/orthanc/orthanc-anon/README.md @@ -1,7 +1,7 @@ # Orthanc Anon _The Orthanc instance responsible for anonymising DICOM data from PACS/VNA and forwarding the images -to their final destination (currently the UCL Data Safe Haven)._ +to their final destination (at UCL/UCLH this is currently the UCL Data Safe Haven). ## Setup @@ -24,8 +24,8 @@ The following assumptions are made: ### Configuration -- The Docker image is based on `orthancteam/orthanc`. -- Configuration is driven through customised JSON config. files stored in the [config](./config/) +- The Docker image is based on `orthancteam/orthanc`. <--- This is unclear where is this? +- Configuration is driven through customised JSON config. files stored in the [orthanc-anon/config](./config/) directory. - The files are populated with values from environment variables and injected into the container as secrets. Orthanc interprets all `.json` files in the `/run/secrets` mount as config. files. @@ -50,7 +50,6 @@ Save credentials `.env` for 'Orthanc anon' and the Azure DICOM Service. ORTHANC_ANON_USERNAME= ORTHANC_ANON_PASSWORD= ORTHANC_ANON_AE_TITLE= -ORTHANC_ANON_HTTP_TIMEOUT=60 ENABLE_DICOM_WEB=true # DICOMweb endpoint @@ -74,7 +73,7 @@ Start the instance via Docker compose. ### Step 3 -If you have chosen to expose the portas, you should now be able to navigate the web interface at `http://localhost:`, supply the chosen credentials and will be presented with the Orthanc web interface: +If you have chosen to expose the ports, you should now be able to navigate the web interface at `http://localhost:`, supply the chosen credentials and will be presented with the Orthanc web interface: ![Orthanc Raw Web interface](../assets/orthanc-anon-web.png) ### Step 4 @@ -104,19 +103,36 @@ start. ## References -- [Cheat sheet of the REST API](https://book.orthanc-server.com/users/rest-cheatsheet.html) +- [Cheat sheet for the REST API](https://book.orthanc-server.com/users/rest-cheatsheet.html) -## 'orthanc' Directory Contents -### Subdirectories +## 'PIXL/orthanc/orthanc-anon' Directory Contents -[assets](./assets/README.md) +
+ +

Subdirectories with links to the relevant README

-[orthanc-anon](./orthanc-anon/README.md) +
-[orthanc-raw](./orthanc-raw/README.md) +[config](./config/README.md) + +[docs](./docs/README.md) + +[plugin](./plugin/README.md) + +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
-### Files -README.md diff --git a/orthanc/orthanc-anon/config/README.md b/orthanc/orthanc-anon/config/README.md index ef5de4dce..02fdbac03 100644 --- a/orthanc/orthanc-anon/config/README.md +++ b/orthanc/orthanc-anon/config/README.md @@ -1,8 +1,15 @@ ## 'PIXL/orthanc/orthanc-anon/config' Directory Contents -### Files +
+ +

Files

-dicom.json +
-orthanc.json +| **Configuration** | **User docs** | +| :--- | :--- | +| dicom.json | README.md | +| orthanc.json | | + +
diff --git a/orthanc/orthanc-anon/config/orthanc.json b/orthanc/orthanc-anon/config/orthanc.json index 6f0d26ff7..e848006db 100644 --- a/orthanc/orthanc-anon/config/orthanc.json +++ b/orthanc/orthanc-anon/config/orthanc.json @@ -1,7 +1,4 @@ { - "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] - }, "Name" : "${ORTHANC_NAME}", "RemoteAccessAllowed" : true, "RegisteredUsers": { diff --git a/orthanc/orthanc-anon/docs/README.md b/orthanc/orthanc-anon/docs/README.md index 8a6fa25bf..594e155de 100644 --- a/orthanc/orthanc-anon/docs/README.md +++ b/orthanc/orthanc-anon/docs/README.md @@ -1,10 +1,25 @@ ## 'PIXL/orthanc/orthanc-anon/docs' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[assets](./assets/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| DicomServiceViaAAD.md | +| README.md | -DicomServiceViaAAD.md +
diff --git a/orthanc/orthanc-anon/docs/assets/README.md b/orthanc/orthanc-anon/docs/assets/README.md index 484b4f3e3..9dc439e65 100644 --- a/orthanc/orthanc-anon/docs/assets/README.md +++ b/orthanc/orthanc-anon/docs/assets/README.md @@ -1,6 +1,14 @@ ## 'PIXL/orthanc/orthanc-anon/docs/assets' Directory Contents -### Files +
+ +

Files

-DicomServiceAuthority.png +
+ +| **Images** | **User docs** | +| :--- | :--- | +| DicomServiceAuthority.png | README.md | + +
diff --git a/orthanc/orthanc-anon/plugin/README.md b/orthanc/orthanc-anon/plugin/README.md index 6f5c0eba0..51a17ee9e 100644 --- a/orthanc/orthanc-anon/plugin/README.md +++ b/orthanc/orthanc-anon/plugin/README.md @@ -1,8 +1,15 @@ ## 'PIXL/orthanc/orthanc-anon/plugin' Directory Contents -### Files +
+ +

Files

-download_dicom_spec.py +
-pixl.py +| **Code** | **User docs** | +| :--- | :--- | +| download_dicom_spec.py | README.md | +| pixl.py | | + +
diff --git a/orthanc/orthanc-anon/plugin/download_dicom_spec.py b/orthanc/orthanc-anon/plugin/download_dicom_spec.py index 5bde1b1e3..04e4f3f4d 100644 --- a/orthanc/orthanc-anon/plugin/download_dicom_spec.py +++ b/orthanc/orthanc-anon/plugin/download_dicom_spec.py @@ -17,7 +17,7 @@ from dicom_validator.spec_reader.edition_reader import EditionReader -edition = "current" +edition = "2024e" download_path = str(Path.home() / "dicom-validator") edition_reader = EditionReader(download_path) destination = edition_reader.get_revision(edition, recreate_json=False) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index b58490b7e..e879bc168 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -26,6 +26,7 @@ import sys import threading import traceback +from collections import defaultdict from concurrent.futures import ThreadPoolExecutor from io import BytesIO from time import sleep @@ -35,19 +36,26 @@ import pydicom import requests from core.exceptions import PixlDiscardError, PixlSkipInstanceError +from core.project_config.pixl_config_model import load_project_config from decouple import config from loguru import logger from pydicom import dcmread import orthanc +from pixl_dcmd.dicom_helpers import get_study_info from pixl_dcmd.main import ( - anonymise_and_validate_dicom, + anonymise_dicom_and_update_db, + parse_validation_results, write_dataset_to_bytes, ) if TYPE_CHECKING: from typing import Any + from core.project_config.pixl_config_model import PixlConfig + + from pixl_dcmd.dicom_helpers import StudyInfo + ORTHANC_USERNAME = config("ORTHANC_USERNAME") ORTHANC_PASSWORD = config("ORTHANC_PASSWORD") ORTHANC_URL = "http://localhost:8042" @@ -165,28 +173,6 @@ def AzureDICOMTokenRefresh() -> None: return None -def Send(study_id: str) -> None: - """ - Send the resource to the appropriate destination. - Throws an exception if the image has already been exported. - """ - msg = f"Sending {study_id}" - logger.debug(msg) - notify_export_api_of_readiness(study_id) - - -def notify_export_api_of_readiness(study_id: str): - """ - Tell export-api that our data is ready and it should download it from us and upload - as appropriate - """ - url = EXPORT_API_URL + "/export-dicom-from-orthanc" - payload = {"study_id": study_id} - timeout: float = config("PIXL_DICOM_TRANSFER_TIMEOUT", default=180, cast=float) - response = requests.post(url, json=payload, timeout=timeout) - response.raise_for_status() - - def should_export() -> bool: """ Checks whether ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT environment variable is @@ -235,97 +221,108 @@ def ImportStudiesFromRaw(output, uri, **request): # noqa: ARG001 payload = json.loads(request["body"]) study_resource_ids = payload["ResourceIDs"] study_uids = payload["StudyInstanceUIDs"] + project_name = payload["ProjectName"] - for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): - executor.submit(_import_study_from_raw, study_resource_id, study_uid) + executor.submit(_import_studies_from_raw, study_resource_ids, study_uids, project_name) response = json.dumps({"Message": "Ok"}) output.AnswerBuffer(response, "application/json") -def _import_study_from_raw(study_resource_id: str, study_uid: str) -> None: +def _import_studies_from_raw( + study_resource_ids: list[str], study_uids: list[str], project_name: str +) -> None: """ - Import a study from Orthanc Raw. + Import studies from Orthanc Raw. Args: - study_resource_id: Resource ID of the study in Orthanc Raw - study_uid: Corresponding StudyInstanceUID + study_resource_ids: Resource IDs of the study in Orthanc Raw + project_name: Name of the project - - Pull a study from Orthanc Raw based on its resource ID + - Pull studies from Orthanc Raw based on its resource ID - Iterate over instances and anonymise them - - Re-upload the study via the dicom-web api - - Notify the PIXL export-api to send the study the to relevant endpoint + - Upload the studies to orthanc-anon + - Notify the PIXL export-api to send the studies to the relevant endpoint for the project """ - zipped_study_bytes = get_study_zip_archive_from_raw(resourceId=study_resource_id) + anonymised_study_uids = [] + + for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): + logger.debug("Processing project '{}', study '{}' ", project_name, study_uid) + anonymised_uid = _anonymise_study_and_upload(study_resource_id, project_name) + if anonymised_uid: + anonymised_study_uids.append(anonymised_uid) + + if not should_export(): + logger.debug("Not exporting study {} as auto-routing is disabled", anonymised_study_uids) + return + + # ensure we only have unique resource ids by using a set + resource_ids = { + _get_study_resource_id(anonymised_study_uid) + for anonymised_study_uid in anonymised_study_uids + } + + logger.debug( + "Notify export API to retrieve study resources. Original UID {} Anon UID: {}", + study_resource_ids, + resource_ids, + ) + + for resource_id in resource_ids: + send_study(study_id=resource_id, project_name=project_name) + + +def _anonymise_study_and_upload(study_resource_id: str, project_name: str) -> str | None: + zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) + + study_info = _get_study_info_from_first_file(zipped_study_bytes) + logger.info("Processing project '{}', {}", project_name, study_info) with ZipFile(zipped_study_bytes) as zipped_study: try: anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( zipped_study=zipped_study, - study_uid=study_uid, + study_info=study_info, + project_name=project_name, ) + except PixlDiscardError as discard: + logger.warning( + "Failed to anonymize project: '{}', {}: {}", project_name, study_info, discard + ) + return None except Exception: # noqa: BLE001 - logger.exception("Failed to anonymize study: {} ", study_uid) - return + logger.exception("Failed to anonymize project: '{}', {}", project_name, study_info) + return None _upload_instances(anonymised_instances_bytes) + return anonymised_study_uid - if not should_export(): - logger.debug("Not exporting study {} as auto-routing is disabled", anonymised_study_uid) - return - - anonymised_study_resource_id = _get_study_resource_id(anonymised_study_uid) - logger.debug( - "Notify export API to retrieve study resource. Original UID {} Anon UID: {}", - study_uid, - anonymised_study_uid, - ) - Send(study_id=anonymised_study_resource_id) - -def get_study_zip_archive_from_raw(resourceId: str) -> BytesIO: +def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO: """Download zip archive of study resource from Orthanc Raw.""" - query = f"{ORTHANC_RAW_URL}/studies/{resourceId}/archive" + query = f"{ORTHANC_RAW_URL}/studies/{resource_id}/archive" response = requests.get( query, auth=(config("ORTHANC_RAW_USERNAME"), config("ORTHANC_RAW_PASSWORD")), timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", default=180, cast=int), ) response.raise_for_status() - logger.debug("Downloaded data for resource {} from Orthanc Raw", resourceId) + logger.debug("Downloaded data for resource {} from Orthanc Raw", resource_id) return BytesIO(response.content) -def _get_study_resource_id(study_uid: str) -> str: - """ - Get the resource ID for an existing study based on its StudyInstanceUID. - - Returns None if there are no resources with the given StudyInstanceUID. - Returns the resource ID if there is a single resource with the given StudyInstanceUID. - Returns None if there are multiple resources with the given StudyInstanceUID and deletes - the studies. - """ - data = json.dumps( - { - "Level": "Study", - "Query": { - "StudyInstanceUID": study_uid, - }, - } - ) - study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data)) - if not study_resource_ids: - message = f"No study found with StudyInstanceUID {study_uid}" - raise ValueError(message) - if len(study_resource_ids) > 1: - message = f"Multiple studies found with StudyInstanceUID {study_uid}" - raise ValueError(message) - - return study_resource_ids[0] +def _get_study_info_from_first_file(zipped_study_bytes) -> StudyInfo: + with ZipFile(zipped_study_bytes) as zipped_study: + file_info = zipped_study.infolist()[0] + with zipped_study.open(file_info) as file: + dataset = dcmread(file) + return get_study_info(dataset) -def _anonymise_study_instances(zipped_study: ZipFile, study_uid: str) -> tuple[list[bytes], str]: +def _anonymise_study_instances( + zipped_study: ZipFile, study_info: StudyInfo, project_name: str +) -> tuple[list[bytes], str]: """ Iterate over all instances and anonymise them. @@ -333,37 +330,56 @@ def _anonymise_study_instances(zipped_study: ZipFile, study_uid: str) -> tuple[l Return a list of the bytes of anonymised instances, and the anonymised StudyInstanceUID. """ + config = load_project_config(project_name) anonymised_instances_bytes = [] - logger.debug("Zipped study infolist: {}", zipped_study.infolist()) + skipped_instance_counts = defaultdict(int) + dicom_validation_errors = {} + for file_info in zipped_study.infolist(): with zipped_study.open(file_info) as file: logger.debug("Reading file {}", file) dataset = dcmread(file) - - logger.info("Anonymising file: {} for study: {}", file, study_uid) try: - anonymised_instances_bytes.append(_anonymise_dicom_instance(dataset)) + anonymised_instance, instance_validation_errors = _anonymise_dicom_instance( + dataset, config + ) except PixlSkipInstanceError as e: - logger.warning( - "Skipping instance {} for study {}: {}", + logger.debug( + "Skipping instance {} for {}: {}", dataset[0x0008, 0x0018].value, - study_uid, + study_info, e, ) + skipped_instance_counts[str(e)] += 1 else: + anonymised_instances_bytes.append(anonymised_instance) anonymised_study_uid = dataset[0x0020, 0x000D].value + dicom_validation_errors |= instance_validation_errors if not anonymised_instances_bytes: - message = f"All instances have been skipped for study {study_uid}" + message = f"All instances have been skipped for study: {dict(skipped_instance_counts)}" raise PixlDiscardError(message) + logger.debug( + "Project '{}' {}, skipped instances: {}", + project_name, + study_info, + dict(skipped_instance_counts), + ) + + if dicom_validation_errors: + logger.warning( + "The anonymisation introduced the following validation errors:\n{}", + parse_validation_results(dicom_validation_errors), + ) + logger.success("Finished anonymising project: '{}', {}", project_name, study_info) return anonymised_instances_bytes, anonymised_study_uid -def _anonymise_dicom_instance(dataset: pydicom.Dataset) -> bytes: +def _anonymise_dicom_instance(dataset: pydicom.Dataset, config: PixlConfig) -> tuple[bytes, dict]: """Anonymise a DICOM instance.""" - anonymise_and_validate_dicom(dataset, config_path=None, synchronise_pixl_db=True) - return write_dataset_to_bytes(dataset) + validation_errors = anonymise_dicom_and_update_db(dataset, config=config) + return write_dataset_to_bytes(dataset), validation_errors def _upload_instances(instances_bytes: list[bytes]) -> None: @@ -384,6 +400,56 @@ def _upload_instances(instances_bytes: list[bytes]) -> None: upload_response.raise_for_status() +def _get_study_resource_id(study_uid: str) -> str: + """ + Get the resource ID for an existing study based on its StudyInstanceUID. + + Returns None if there are no resources with the given StudyInstanceUID. + Returns the resource ID if there is a single resource with the given StudyInstanceUID. + Returns None if there are multiple resources with the given StudyInstanceUID and deletes + the studies. + """ + data = json.dumps( + { + "Level": "Study", + "Query": { + "StudyInstanceUID": study_uid, + }, + } + ) + study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data)) + if not study_resource_ids: + message = f"No study found with StudyInstanceUID {study_uid}" + raise ValueError(message) + if len(study_resource_ids) > 1: + message = f"Multiple studies found with StudyInstanceUID {study_uid}" + raise ValueError(message) + + return study_resource_ids[0] + + +def send_study(study_id: str, project_name: str) -> None: + """ + Send the resource to the appropriate destination. + Throws an exception if the image has already been exported. + """ + msg = f"Sending {study_id}" + logger.debug(msg) + notify_export_api_of_readiness(study_id, project_name) + + +def notify_export_api_of_readiness(study_id: str, project_name: str) -> None: + """ + Tell export-api that our data is ready and it should download it from us and upload + as appropriate + """ + url = EXPORT_API_URL + "/export-dicom-from-orthanc" + payload = {"study_id": study_id, "project_name": project_name} + timeout: float = config("HTTP_TIMEOUT", default=30, cast=float) + response = requests.post(url, json=payload, timeout=timeout) + response.raise_for_status() + + orthanc.RegisterOnChangeCallback(OnChange) orthanc.RegisterRestCallback("/heart-beat", OnHeartBeat) orthanc.RegisterRestCallback("/import-from-raw", ImportStudiesFromRaw) diff --git a/orthanc/orthanc-raw/README.md b/orthanc/orthanc-raw/README.md index 2e695cc3a..e95ea819d 100644 --- a/orthanc/orthanc-raw/README.md +++ b/orthanc/orthanc-raw/README.md @@ -22,7 +22,7 @@ available shortly when the service is started). ### Configuration -- The Docker image is a deployment of `orthancteam/orthanc` with some extra configuration +- The Docker image is a deployment of `orthancteam/orthanc` with some extra configuration <--- is orthancteam/orthanc supposed to point to somewhere in the tree - `ORTHANC_RAW_MAXIMUM_STORAGE_SIZE` to limit the storage size - `ORTHANC_RAW_JOB_HISTORY_SIZE` has been increased so that while there is concurrent processing, the job should always exist for being able to query its status @@ -109,17 +109,31 @@ team. ## References - - [Cheat sheet of the REST API](https://book.orthanc-server.com/users/rest-cheatsheet.html) + - [Cheat sheet for the REST API](https://book.orthanc-server.com/users/rest-cheatsheet.html) ## 'PIXL/orthanc/orthanc-raw' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[config](./config/README.md) [plugin](./plugin/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | -README.md +
diff --git a/orthanc/orthanc-raw/config/README.md b/orthanc/orthanc-raw/config/README.md index 6d345e63e..2933d2b62 100644 --- a/orthanc/orthanc-raw/config/README.md +++ b/orthanc/orthanc-raw/config/README.md @@ -1,10 +1,17 @@ ## 'PIXL/orthanc/orthanc-raw/config' Directory Contents -### Files +
+ +

Files

-dicom.json +
-orthanc.json +| **Configuration** | **User docs** | +| :--- | :--- | +| dicom.json | README.md | +| orthanc.json | | +| postgres.json | | -postgres.json + +
diff --git a/orthanc/orthanc-raw/config/orthanc.json b/orthanc/orthanc-raw/config/orthanc.json index a18c70697..b53374e30 100644 --- a/orthanc/orthanc-raw/config/orthanc.json +++ b/orthanc/orthanc-raw/config/orthanc.json @@ -1,6 +1,5 @@ { "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] }, "DefaultPrivateCreator" : "UCLH PIXL", "Name" : "${ORTHANC_NAME}", @@ -52,8 +51,7 @@ // Sequences tags are not supported. "ExtraMainDicomTags" : { "Instance" : [ - "SOPClassUID", - "000d,1001" + "SOPClassUID" ], "Series" : [], "Study": [], diff --git a/orthanc/orthanc-raw/plugin/README.md b/orthanc/orthanc-raw/plugin/README.md index bfebf0781..13b57c61e 100644 --- a/orthanc/orthanc-raw/plugin/README.md +++ b/orthanc/orthanc-raw/plugin/README.md @@ -1,6 +1,14 @@ ## 'PIXL/orthanc/orthanc-raw/plugin' Directory Contents -### Files +
+ +

Files

-pixl.py +
+ +| **Code** | **User docs** | +| :--- | :--- | +| pixl.py | README.md | + +
diff --git a/orthanc/orthanc-raw/plugin/pixl.py b/orthanc/orthanc-raw/plugin/pixl.py index 79c010086..22f8265ff 100644 --- a/orthanc/orthanc-raw/plugin/pixl.py +++ b/orthanc/orthanc-raw/plugin/pixl.py @@ -22,16 +22,12 @@ import os import sys -from io import BytesIO from typing import TYPE_CHECKING -from core.dicom_tags import DICOM_TAG_PROJECT_NAME, add_private_tag from decouple import config from loguru import logger -from pydicom import dcmread import orthanc -from pixl_dcmd.main import write_dataset_to_bytes from pixl_dcmd.tagrecording import record_dicom_headers if TYPE_CHECKING: @@ -53,11 +49,11 @@ def OnHeartBeat(output, uri, **request): # noqa: ARG001 output.AnswerBuffer("OK\n", "text/plain") -def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: +def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: # noqa: ARG001 """Optionally record headers from the received DICOM instance.""" if should_record_headers(): record_dicom_headers(receivedDicom) - return modify_dicom_tags(receivedDicom, origin) + return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None def should_record_headers() -> bool: @@ -68,30 +64,5 @@ def should_record_headers() -> bool: return os.environ.get("ORTHANC_RAW_RECORD_HEADERS", "false").lower() == "true" -def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any: - """ - A new incoming DICOM file needs to have the project name private tag added here, so - that the API will later allow us to edit it. - However, we don't know its correct value at this point, so just create it with an obvious - placeholder value. - """ - if origin != orthanc.InstanceOrigin.DICOM_PROTOCOL: - # don't keep resetting the tag values if this was triggered by an API call! - logger.trace("doing nothing as change triggered by API") - return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None - dataset = dcmread(BytesIO(receivedDicom)) - # See the orthanc.json config file for where this tag is given a nickname - # The private block is the first free block >= 0x10. - # We can't directly control it, but the orthanc config requires it to be - # hardcoded to 0x10 - # https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_7.8.html - - # Add project name as private tag, at this point, the value is unknown - private_block = add_private_tag(dataset, DICOM_TAG_PROJECT_NAME) - - logger.debug("added new private block starting at 0x{:04x}", private_block.block_start) - return orthanc.ReceivedInstanceAction.MODIFY, write_dataset_to_bytes(dataset) - - orthanc.RegisterReceivedInstanceCallback(ReceivedInstanceCallback) orthanc.RegisterRestCallback("/heart-beat", OnHeartBeat) diff --git a/pixl_core/README.md b/pixl_core/README.md index 5b9f784fb..9d5c943a8 100644 --- a/pixl_core/README.md +++ b/pixl_core/README.md @@ -15,7 +15,7 @@ Specifically, it defines: - Pydantic models for [project configuration](./src/core/project_config/pixl_config_model.py) - [Secrets management](./src/core/project_config/secrets.py) via an Azure Key Vault. - Handling of [uploads over FTPS](./src/core/uploader/_ftps.py), used to transfer images and parquet files - to the DSH (Data Safe Haven) + to the relevant FTPS server (at UCL this is the DSH (Data Safe Haven)) - [Uploading DICOM files to a DICOMWeb server](./src/core/uploader/_dicomweb.py) - [Uploading DICOM files to XNAT](./src/core/uploader/_xnat.py) @@ -69,7 +69,7 @@ and export of stable DICOM to orthanc-anon while still pulling from the VNA. ### OMOP ES files -Public parquet exports from OMOP ES that should be transferred outside the hospital are copied to +Public parquet exports from OMOP ES that may be transferred outside the hospital are copied to the `exports` directory at the repository base. Within this directory each project has a directory, with all extracts stored in `all_extracts` and @@ -99,7 +99,7 @@ for convenience `latest` is a symlink to the most recent extract. ## Project configuration The `project_config` module provides the functionality to handle -[project configurations](../README.md#configure-a-new-project). +[project configurations](../README.md#configure-a-new-project). <== SK comment I'm not sure this goes to exactly the right place OR the name in the # is misleading ### Design @@ -115,7 +115,7 @@ such as the `FTPSUploader` implement the actual upload functionality. The creden uploading are queried from an **Azure Keyvault** instance (implemented in `core.project_config.secrets`), for which the setup instructions are in the [top-level README](../README.md#project-secrets) -When an extract is ready to be published to the DSH, the PIXL pipeline will upload the **Public** +When an extract is ready to be published to the relevant FTPS server (UCL DSH), the PIXL pipeline will upload the **Public** and **Radiology** [_parquet_ files](../docs/file_types/parquet_files.md) to the `` directory where the DICOM datasets are stored (see the directory structure below). The uploading is controlled by `upload_parquet_files` in [`upload.py`](./src/core/upload.py) which takes a `ParquetExport` @@ -124,7 +124,7 @@ by the `export-patient-data` API endpoint defined in the [Export API](../pixl_export/src/pixl_export/main.py), which in turn is called by the `export_patient_data` command in the [PIXL CLI](../cli/README.md). -Once the parquet files have been uploaded to the DSH, the directory structure will look like this: +Once the parquet files have been uploaded, the directory structure will look like this: ```sh @@ -220,7 +220,7 @@ with existing session and series data: ### XNAT testing setup -For unit testing, we use [`xnat4tests`](https://github.com/Australian-Imaging-Service/xnat4tests) to spin up an XNAT +For unit testing, we use the publicly available [`xnat4tests`](https://github.com/Australian-Imaging-Service/xnat4tests) to spin up an XNAT instance in a Docker container. Secrets are not used for these unit testing. Instead, the following environment variables are used to configure XNAT for testing: @@ -237,15 +237,28 @@ in the `XNAT_USER_NAME` and `XNAT_PASSWORD` environment variables. ## 'PIXL/pixl_core' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[src](./src/README.md) [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | -pyproject.toml +
-README.md diff --git a/pixl_core/pyproject.toml b/pixl_core/pyproject.toml index b3251a9bc..e57aa6881 100644 --- a/pixl_core/pyproject.toml +++ b/pixl_core/pyproject.toml @@ -7,35 +7,35 @@ readme = "README.md" requires-python = ">=3.9" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "aio_pika==9.4.2", - "azure-identity==1.16.1", + "aio_pika==9.5.3", + "azure-identity==1.19.0", "azure-keyvault==4.2.0", - "fastapi==0.112.0", - "jsonpickle==3.2.2", - "loguru==0.7.2", - "pandas==2.2.2", + "fastapi==0.115.6", + "jsonpickle==4.0.0", + "loguru==0.7.3", + "pandas==2.2.3", "pika==1.3.2", - "psycopg2-binary==2.9.9", - "pyarrow==17.0.0", - "pydantic==2.8.2", + "psycopg2-binary==2.9.10", + "pyarrow==18.1.0", + "pydantic==2.10.3", "python-decouple==3.8", "python-slugify==8.0.4", - "PyYAML==6.0.1", - "requests==2.32.2", - "sqlalchemy==2.0.32", + "PyYAML==6.0.2", + "requests==2.32.3", + "sqlalchemy==2.0.36", "token-bucket==0.3.0", "xnat==0.6.2", ] [project.optional-dependencies] test = [ - "httpx==0.27.*", - "pytest==8.3.2", - "pytest-asyncio==0.23.8", - "pytest-check==2.3.1", - "pytest-cov==5.0.0", + "httpx==0.28.*", + "pytest==8.3.4", + "pytest-asyncio==0.24.0", + "pytest-check==2.4.1", + "pytest-cov==6.0.0", "pytest-pixl==0.2.0rc0", - "xnat4tests==0.3.11", + "xnat4tests==0.3.12", ] dev = [ "mypy", diff --git a/pixl_core/src/README.md b/pixl_core/src/README.md index 56763ccf2..849af740e 100644 --- a/pixl_core/src/README.md +++ b/pixl_core/src/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pixl_core/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[core](./core/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pixl_core/src/core/README.md b/pixl_core/src/core/README.md index c450b32c7..a16c0b96d 100644 --- a/pixl_core/src/core/README.md +++ b/pixl_core/src/core/README.md @@ -1,6 +1,10 @@ ## 'PIXL/pixl_core/src/core' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[db](./db/README.md) @@ -14,13 +18,20 @@ [uploader](./uploader/README.md) -### Files +
-dicom_tags.py +
+ +

Files

-exceptions.py +
-exports.py +| **Code** | **User docs** | +| :--- | :--- | +| dicom_tags.py | README.md | +| exceptions.py | | +| exports.py | | +| __init__.py | | -__init__.py +
diff --git a/pixl_core/src/core/db/README.md b/pixl_core/src/core/db/README.md index 2b3e187dd..a59fe00ef 100644 --- a/pixl_core/src/core/db/README.md +++ b/pixl_core/src/core/db/README.md @@ -1,10 +1,16 @@ ## 'PIXL/pixl_core/src/core/db' Directory Contents -### Files +
+ +

Files

-models.py +
-queries.py +| **Code** | **User docs** | +| :--- | :--- | +| models.py | README.md | +| queries.py | | +| __init__.py | | -__init__.py +
diff --git a/pixl_core/src/core/dicom_tags.py b/pixl_core/src/core/dicom_tags.py index 134f1ac92..684085429 100644 --- a/pixl_core/src/core/dicom_tags.py +++ b/pixl_core/src/core/dicom_tags.py @@ -16,7 +16,6 @@ This information is currently duplicated in - pixl_imaging/tests/orthanc_raw_config/orthanc.json - orthanc/orthanc-raw/config/orthanc.json - - projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml For now you will have to manually keep these in step. """ @@ -68,17 +67,6 @@ def acceptable_private_block(self, actual_private_block: int) -> bool: return self.required_private_block == actual_private_block -DICOM_TAG_PROJECT_NAME = PrivateDicomTag( - group_id=0x000D, - required_private_block=0x10, - offset_id=0x01, - creator_string="UCLH PIXL", - tag_nickname="UCLHPIXLProjectName", - vr="LO", # LO = Long string max 64 - unknown_value="__pixl_unknown_value__", -) - - def add_private_tag( dataset: Dataset, private_tag: PrivateDicomTag, value: Optional[str | bytes] = None ) -> PrivateBlock: diff --git a/pixl_core/src/core/patient_queue/README.md b/pixl_core/src/core/patient_queue/README.md index f53cead1f..3ac2b3449 100644 --- a/pixl_core/src/core/patient_queue/README.md +++ b/pixl_core/src/core/patient_queue/README.md @@ -1,14 +1,18 @@ ## 'PIXL/pixl_core/src/core/patient_queue' Directory Contents -### Files +
+ +

Files

-message.py +
-producer.py +| **Code** | **User docs** | +| :--- | :--- | +| message.py | README.md | +| producer.py | | +| subscriber.py | | +| _base.py | | +| __init__.py | | -subscriber.py - -_base.py - -__init__.py +
diff --git a/pixl_core/src/core/project_config/README.md b/pixl_core/src/core/project_config/README.md index 4a8232810..529758a4f 100644 --- a/pixl_core/src/core/project_config/README.md +++ b/pixl_core/src/core/project_config/README.md @@ -1,12 +1,17 @@ ## 'PIXL/pixl_core/src/core/project_config' Directory Contents -### Files +
+ +

Files

-pixl_config_model.py +
-secrets.py +| **Code** | **User docs** | +| :--- | :--- | +| pixl_config_model.py | README.md | +| secrets.py | | +| tag_operations.py | | +| __init__.py | | -tag_operations.py - -__init__.py +
diff --git a/pixl_core/src/core/rest_api/README.md b/pixl_core/src/core/rest_api/README.md index 2d481757b..8dd17e3ad 100644 --- a/pixl_core/src/core/rest_api/README.md +++ b/pixl_core/src/core/rest_api/README.md @@ -1,8 +1,15 @@ ## 'PIXL/pixl_core/src/core/rest_api' Directory Contents -### Files +
+ +

Files

-router.py +
-__init__.py +| **Code** | **User docs** | +| :--- | :--- | +| router.py | README.md | +| __init__.py | | + +
diff --git a/pixl_core/src/core/token_buffer/README.md b/pixl_core/src/core/token_buffer/README.md index d25fd7b64..8b5d341b2 100644 --- a/pixl_core/src/core/token_buffer/README.md +++ b/pixl_core/src/core/token_buffer/README.md @@ -1,10 +1,16 @@ ## 'PIXL/pixl_core/src/core/token_buffer' Directory Contents -### Files +
+ +

Files

-models.py +
-tokens.py +| **Code** | **User docs** | +| :--- | :--- | +| models.py | README.md | +| tokens.py | | +| __init__.py | | -__init__.py +
diff --git a/pixl_core/src/core/uploader/README.md b/pixl_core/src/core/uploader/README.md index 1a0e6a444..09a39793d 100644 --- a/pixl_core/src/core/uploader/README.md +++ b/pixl_core/src/core/uploader/README.md @@ -1,16 +1,19 @@ ## 'PIXL/pixl_core/src/core/uploader' Directory Contents -### Files - -base.py - -_dicomweb.py - -_ftps.py - -_orthanc.py - -_xnat.py - -__init__.py +
+ +

Files

+ +
+ +| **Code** | **User docs** | +| :--- | :--- | +| base.py | README.md | +| _dicomweb.py | | +| _ftps.py | | +| _orthanc.py | | +| _xnat.py | | +| __init__.py | | + +
diff --git a/pixl_core/src/core/uploader/_dicomweb.py b/pixl_core/src/core/uploader/_dicomweb.py index 90603395d..173b4b7c8 100644 --- a/pixl_core/src/core/uploader/_dicomweb.py +++ b/pixl_core/src/core/uploader/_dicomweb.py @@ -123,6 +123,7 @@ def _setup_dicomweb_credentials(self) -> None: "Username": self.endpoint_user, "Password": self.endpoint_password, "HasDelete": True, + "Timeout": self.http_timeout, } headers = {"content-type": "application/json"} diff --git a/pixl_core/src/core/uploader/_ftps.py b/pixl_core/src/core/uploader/_ftps.py index 4f5035049..a25ad7b99 100644 --- a/pixl_core/src/core/uploader/_ftps.py +++ b/pixl_core/src/core/uploader/_ftps.py @@ -87,7 +87,7 @@ def _upload_dicom_image( self.send_via_ftps( zip_content, study_tags.pseudo_anon_image_id, - remote_directory=study_tags.project_slug, + remote_directory=self.project_slug, ) def send_via_ftps( @@ -195,10 +195,10 @@ def _create_and_set_as_cwd_multi_path(ftp: FTP_TLS, remote_multi_dir: Path) -> N def _create_and_set_as_cwd(ftp: FTP_TLS, project_dir: str) -> None: try: - ftp.cwd(project_dir) - logger.debug("'{}' exists on remote ftp, so moving into it", project_dir) - except ftplib.error_perm: - logger.info("creating '{}' on remote ftp and moving into it", project_dir) - # Directory doesn't exist, so create it ftp.mkd(project_dir) - ftp.cwd(project_dir) + except ftplib.error_perm: + logger.debug("'{}' exists on remote ftp, so moving into it", project_dir) + else: + logger.info("created '{}' on remote ftp and moving into it", project_dir) + + ftp.cwd(project_dir) diff --git a/pixl_core/src/core/uploader/_orthanc.py b/pixl_core/src/core/uploader/_orthanc.py index 6eee8f241..e00606e3e 100644 --- a/pixl_core/src/core/uploader/_orthanc.py +++ b/pixl_core/src/core/uploader/_orthanc.py @@ -40,14 +40,13 @@ class StudyTags: """Tags for a study.""" pseudo_anon_image_id: str - project_slug: str patient_id: str def get_tags_by_study(study_id: str) -> StudyTags: """ Queries the Orthanc server at the study level, returning the - Study Instance UID, UCLHPIXLProjectName, and PatientID DICOM tags. + Study Instance UID and PatientID DICOM tags. BEWARE: post-anonymisation, the Study Instance UID is NOT the Study Instance UID, it's the pseudo-anonymised ID generated randomly. """ @@ -58,7 +57,6 @@ def get_tags_by_study(study_id: str) -> StudyTags: json_response = json.loads(response_study.content.decode()) return StudyTags( pseudo_anon_image_id=json_response["StudyInstanceUID"], - project_slug=json_response["UCLHPIXLProjectName"], patient_id=json_response["PatientID"], ) diff --git a/pixl_core/src/core/uploader/_xnat.py b/pixl_core/src/core/uploader/_xnat.py index 6f4bfff56..a8c345605 100644 --- a/pixl_core/src/core/uploader/_xnat.py +++ b/pixl_core/src/core/uploader/_xnat.py @@ -91,7 +91,7 @@ def upload_to_xnat( data=zip_content, overwrite=self.overwrite, destination=self.destination, - project=study_tags.project_slug, + project=self.project_slug, subject=study_tags.patient_id, experiment=study_tags.pseudo_anon_image_id, content_type="application/zip", diff --git a/pixl_core/src/core/uploader/base.py b/pixl_core/src/core/uploader/base.py index 58b2fcdb2..887776513 100644 --- a/pixl_core/src/core/uploader/base.py +++ b/pixl_core/src/core/uploader/base.py @@ -68,7 +68,7 @@ def upload_dicom_and_update_database(self, study_id: str) -> None: "Starting {} upload of '{}' for {}", self.__class__.__name__.removesuffix("Uploader"), study_tags.pseudo_anon_image_id, - study_tags.project_slug, + self.project_slug, ) self._upload_dicom_image(study_id, study_tags) logger.success( diff --git a/pixl_core/tests/README.md b/pixl_core/tests/README.md index 621d1b3c7..94ae3ea10 100644 --- a/pixl_core/tests/README.md +++ b/pixl_core/tests/README.md @@ -1,6 +1,10 @@ ## 'PIXL/pixl_core/tests' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[data](./data/README.md) @@ -12,11 +16,19 @@ [uploader](./uploader/README.md) -### Files +
+ +
+ +

Files

-conftest.py +
-docker-compose.yml +| **Code** | **User docs** | +| :--- | :--- | +| conftest.py | README.md | +| docker-compose.yml | | +| test_exports.py | | -test_exports.py +
diff --git a/pixl_core/tests/data/README.md b/pixl_core/tests/data/README.md index 065c917e7..8c3111207 100644 --- a/pixl_core/tests/data/README.md +++ b/pixl_core/tests/data/README.md @@ -1,10 +1,16 @@ ## 'PIXL/pixl_core/tests/data' Directory Contents -### Files +
+ +

Files

-dicom_series.zip +
-public.zip +| **Data** | **User docs** | +| :--- | :--- | +| dicom_series.zip | README.md | +| public.zip | | +| xnat_project.xml | | -xnat_project.xml +
diff --git a/pixl_core/tests/patient_queue/README.md b/pixl_core/tests/patient_queue/README.md index 895a4d9fc..02a555e07 100644 --- a/pixl_core/tests/patient_queue/README.md +++ b/pixl_core/tests/patient_queue/README.md @@ -1,10 +1,16 @@ ## 'PIXL/pixl_core/tests/patient_queue' Directory Contents -### Files +
+ +

Files

-test_message.py +
-test_producer.py +| **Code** | **User docs** | +| :--- | :--- | +| test_message.py | README.md | +| test_producer.py | | +| test_subscriber.py | | -test_subscriber.py +
diff --git a/pixl_core/tests/project_config/README.md b/pixl_core/tests/project_config/README.md index eb889f4e8..c5751ce88 100644 --- a/pixl_core/tests/project_config/README.md +++ b/pixl_core/tests/project_config/README.md @@ -1,8 +1,15 @@ ## 'PIXL/pixl_core/tests/project_config' Directory Contents -### Files +
+ +

Files

-test_project_config.py +
-test_secrets.py +| **Code** | **User docs** | +| :--- | :--- | +| test_project_config.py | README.md | +| test_secrets.py | | + +
diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index 3a09ae6d5..56ae5964d 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -41,7 +41,7 @@ def base_yaml_data(): return { "project": {"name": "myproject", "modalities": ["DX", "CR"]}, "tag_operation_files": { - "base": ["test-extract-uclh-omop-cdm.yaml"], + "base": ["base.yaml"], "manufacturer_overrides": ["mri-diffusion.yaml"], }, "destination": {"dicom": "ftps", "parquet": "ftps"}, diff --git a/pixl_core/tests/token_buffer/README.md b/pixl_core/tests/token_buffer/README.md index 259ef6888..58e597e77 100644 --- a/pixl_core/tests/token_buffer/README.md +++ b/pixl_core/tests/token_buffer/README.md @@ -1,6 +1,14 @@ ## 'PIXL/pixl_core/tests/token_buffer' Directory Contents -### Files +
+ +

Files

-test_tokens.py +
+ +| **Code** | **User docs** | +| :--- | :--- | +| test_tokens.py | README.md | + +
diff --git a/pixl_core/tests/uploader/README.md b/pixl_core/tests/uploader/README.md index e960568d5..a20250f05 100644 --- a/pixl_core/tests/uploader/README.md +++ b/pixl_core/tests/uploader/README.md @@ -1,12 +1,17 @@ ## 'PIXL/pixl_core/tests/uploader' Directory Contents -### Files +
+ +

Files

-test_base.py +
-test_dicomweb.py +| **Code** | **User docs** | +| :--- | :--- | +| test_base.py | README.md | +| test_dicomweb.py | | +| test_ftps.py | | +| test_xnat.py | | -test_ftps.py - -test_xnat.py +
diff --git a/pixl_core/tests/uploader/test_base.py b/pixl_core/tests/uploader/test_base.py index 2f66d8413..e1806fe3a 100644 --- a/pixl_core/tests/uploader/test_base.py +++ b/pixl_core/tests/uploader/test_base.py @@ -32,11 +32,12 @@ class DumbUploader(Uploader): def __init__(self, pseudo_study_uid) -> None: """Initialise the mock uploader with hardcoded values for FTPS config.""" + self.project_slug = "project_slug" self.pseudo_study_uid = pseudo_study_uid def _get_tags_by_study(self, study_id: str) -> StudyTags: logger.info("Mocked getting tags for: {} to return {}", study_id, self.pseudo_study_uid) - return StudyTags(self.pseudo_study_uid, "project_slug", "patient-id") + return StudyTags(self.pseudo_study_uid, "patient-id") def _upload_dicom_image( self, @@ -47,7 +48,7 @@ def _upload_dicom_image( "Mocked uploader with no upload functionality for {}, {}, {}", study_id, study_tags.pseudo_anon_image_id, - study_tags.project_slug, + self.project_slug, study_tags.patient_id, ) diff --git a/pixl_core/tests/uploader/test_dicomweb.py b/pixl_core/tests/uploader/test_dicomweb.py index 2506c7c3e..63342a2b5 100644 --- a/pixl_core/tests/uploader/test_dicomweb.py +++ b/pixl_core/tests/uploader/test_dicomweb.py @@ -96,7 +96,6 @@ def test_upload_dicom_image( """Tests that DICOM image can be uploaded to a DICOMWeb server""" study_tags = StudyTags( pseudo_anon_image_id=not_yet_exported_dicom_image.pseudo_study_uid, - project_slug="project", patient_id="patient", ) dicomweb_uploader._upload_dicom_image( # noqa: SLF001 diff --git a/pixl_core/tests/uploader/test_xnat.py b/pixl_core/tests/uploader/test_xnat.py index 447539cf0..2205a0c11 100644 --- a/pixl_core/tests/uploader/test_xnat.py +++ b/pixl_core/tests/uploader/test_xnat.py @@ -30,7 +30,7 @@ class MockXNATUploader(XNATUploader): """Mock XNATUploader for testing.""" - def __init__(self) -> None: + def __init__(self, project_slug: str) -> None: """Initialise the mock uploader with hardcoded values for FTPS config.""" self.host = os.environ["XNAT_HOST"] self.user = os.environ["XNAT_USER_NAME"] @@ -39,12 +39,19 @@ def __init__(self) -> None: self.url = f"http://{self.host}:{self.port}" self.destination = os.environ["XNAT_DESTINATION"] self.overwrite = os.environ["XNAT_OVERWRITE"] + self.project_slug = project_slug + + +@pytest.fixture(scope="session") +def xnat_project_slug() -> str: + """Name of the XNAT project""" + return "some-project-slug" @pytest.fixture() -def xnat_uploader() -> MockXNATUploader: +def xnat_uploader(xnat_project_slug) -> MockXNATUploader: """Return a MockXNATUploader object.""" - return MockXNATUploader() + return MockXNATUploader(project_slug=xnat_project_slug) @pytest.fixture() @@ -74,13 +81,12 @@ def xnat_study_tags() -> StudyTags: """Return a StudyTags object for the study to be uploaded to XNAT.""" return StudyTags( pseudo_anon_image_id="1.3.6.1.4.1.14519.5.2.1.99.1071.12985477682660597455732044031486", - project_slug="some-project-slug", patient_id="987654321", ) @pytest.fixture(scope="session") -def xnat_server(xnat_study_tags) -> Generator: +def xnat_server(xnat_project_slug) -> Generator: """ Start the XNAT server. @@ -134,7 +140,7 @@ def xnat_server(xnat_study_tags) -> Generator: accepted_status=[200, 409], ) session.put( - path=f"/data/projects/{xnat_study_tags.project_slug}/users/Owners/pixl", + path=f"/data/projects/{xnat_project_slug}/users/Owners/pixl", accepted_status=[200], ) @@ -154,7 +160,7 @@ def xnat_server(xnat_study_tags) -> Generator: user="admin", password="admin", # noqa: S106 ) as session: - project = session.projects[xnat_study_tags.project_slug] + project = session.projects[xnat_project_slug] for subject in project.subjects.values(): session.delete( path=f"/data/projects/{project.id}/subjects/{subject.label}", @@ -175,8 +181,8 @@ def test_upload_to_xnat(zip_dicoms, xnat_uploader, xnat_study_tags) -> None: user=xnat_uploader.user, password=xnat_uploader.password, ) as session: - assert xnat_study_tags.project_slug in session.projects - project = session.projects[xnat_study_tags.project_slug] + assert xnat_uploader.project_slug in session.projects + project = session.projects[xnat_uploader.project_slug] assert xnat_study_tags.patient_id in project.subjects subject = project.subjects[xnat_study_tags.patient_id] diff --git a/pixl_dcmd/README.md b/pixl_dcmd/README.md index a614e865d..c10eb3c7c 100644 --- a/pixl_dcmd/README.md +++ b/pixl_dcmd/README.md @@ -8,33 +8,37 @@ For external users, the `pixl_dcmd` package provides the following functionality - `anonymise_dicom()`: Applies the [anonymisation operations](#tag-scheme-anonymisation) for the appropriate tag scheme using [Kitware Dicom Anonymizer](https://github.com/KitwareMedical/dicom-anonymizer) and deletes any tags not mentioned in the tag scheme. The dataset is updated in place. - - There is also an option to synchronise to the PIXL database, external users can avoid this - to just run the allow-list and applying the tag scheme. - - Will throw a `PixlSkipInstanceError` for any series based on the project config file. Specifically, an error + - Will throw a `PixlSkipInstanceError` for any series based on the project config file. {SK: this sentence doesn't quite make sense to me} Specifically, an error will be thrown if: - the series description matches any series in `series_filters` (usually to remove localiser series) - the modality of the DICOM is not in `modalities` - `anonymise_and_validate_dicom()`: Compares DICOM validation issues before and after calling `anonymise_dicom` - and returns a dictionary of the new issues. Can also avoid synchronising with PIXL database + and returns a dictionary of the new issues ```python +import os import pathlib import pydicom +from core.project_config.pixl_config_model import load_config_and_validate from pixl_dcmd import anonymise_and_validate_dicom +config_dir = pathlib.Path().cwd().parents[2] / "projects" / "configs" +config_path = config_dir / "test-external-user.yaml" +os.environ["PROJECT_CONFIGS_DIR"] = config_dir.as_posix() # needed to validate config +config = load_config_and_validate(config_path) + dataset_path = pydicom.data.get_testdata_file( - "MR-SIEMENS-DICOM-WithOverlays.dcm", download=True + "MR-SIEMENS-DICOM-WithOverlays.dcm", download=True, ) -config_path = pathlib.Path(__file__).parents[2] / "projects/configs/test-extract-uclh-omop-cdm.yaml" -# updated inplace dataset = pydicom.dcmread(dataset_path) -validation_issues = anonymise_and_validate_dicom(dataset, config_path=config_path, synchronise_pixl_db=False) + +# the dataset is updated inplace +validation_issues = anonymise_and_validate_dicom(dataset, config=config) assert validation_issues == {} assert dataset != pydicom.dcmread(dataset_path) ``` - ## Installation Install the Python dependencies from the `pixl_dcmd` directory: @@ -58,18 +62,31 @@ and optionally a `manufacturer_overrides`. If a `manufacturer_overrides` is defined, it will be used to override the `base` tags, if the manufacturer of the DICOM file matches the manufacturer in the `manufacturer_overrides`. Any tags in the `manufacturer_overrides` that are not in the `base` will be added to the scheme as well. +[SK: Between this and the previous read me I'm not sure this is totally clear, is it possible to have a full example of the yml file linked so that base and a manufacturer_overrides make a bit more sense ] ## 'PIXL/pixl_dcmd' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[src](./src/README.md) [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
-pyproject.toml +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | -README.md +
diff --git a/pixl_dcmd/pyproject.toml b/pixl_dcmd/pyproject.toml index 421976315..fa0168384 100644 --- a/pixl_dcmd/pyproject.toml +++ b/pixl_dcmd/pyproject.toml @@ -9,13 +9,13 @@ classifiers = ["Programming Language :: Python :: 3"] dependencies = [ "core==0.2.0rc0", "arrow==1.3.0", - "dicom-anonymizer==1.0.12", - "dicom-validator==0.6.2", + "dicom-anonymizer==1.0.13.post1", + "dicom-validator==0.6.3", "logger==1.4", "pydicom==2.4.4", "pydicom-data", "python-decouple==3.8", - "requests==2.32.2", + "requests==2.32.3", "types-requests~=2.28", ] @@ -23,7 +23,7 @@ dependencies = [ test = [ "core[test]==0.2.0rc0", "dcm2niix==1.0.20220715", - "nibabel==5.2.1", + "nibabel==5.3.2", ] dev = [ "core[dev]==0.2.0rc0", diff --git a/pixl_dcmd/src/README.md b/pixl_dcmd/src/README.md index 2ba0cd0e1..bd4b95bbb 100644 --- a/pixl_dcmd/src/README.md +++ b/pixl_dcmd/src/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pixl_dcmd/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[pixl_dcmd](./pixl_dcmd/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pixl_dcmd/src/pixl_dcmd/README.md b/pixl_dcmd/src/pixl_dcmd/README.md index e94fbfd07..312dfdca5 100644 --- a/pixl_dcmd/src/pixl_dcmd/README.md +++ b/pixl_dcmd/src/pixl_dcmd/README.md @@ -1,22 +1,30 @@ ## 'PIXL/pixl_dcmd/src/pixl_dcmd' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

-[resources](./resources/README.md) - -### Files +
-main.py - -tagrecording.py +[resources](./resources/README.md) -_database.py +
-_dicom_helpers.py +
+ +

Files

-_tag_schemes.py +
-_version.py +| **Code** | **User docs** | +| :--- | :--- | +| main.py | README.md | +| tagrecording.py | | +| _database.py | | +| _dicom_helpers.py | | +| _tag_schemes.py | | +| _version.py | | +| __init__.py | | -__init__.py +
diff --git a/pixl_dcmd/src/pixl_dcmd/_database.py b/pixl_dcmd/src/pixl_dcmd/_database.py index d8f691de5..5ad1756d5 100644 --- a/pixl_dcmd/src/pixl_dcmd/_database.py +++ b/pixl_dcmd/src/pixl_dcmd/_database.py @@ -24,7 +24,7 @@ from sqlalchemy import URL, create_engine, exists from sqlalchemy.orm import sessionmaker, exc -from pixl_dcmd._dicom_helpers import StudyInfo +from pixl_dcmd.dicom_helpers import StudyInfo url = URL.create( drivername="postgresql+psycopg2", @@ -39,7 +39,7 @@ def get_uniq_pseudo_study_uid_and_update_db( - project_slug: str, study_info: StudyInfo + project_slug: str, original_study_info: StudyInfo ) -> UID: """ Checks if record (by slug and study info) exists in the database, @@ -50,7 +50,7 @@ def get_uniq_pseudo_study_uid_and_update_db( with PixlSession() as pixl_session, pixl_session.begin(): existing_image = get_unexported_image( project_slug, - study_info, + original_study_info, pixl_session, ) if existing_image.pseudo_study_uid is None: @@ -64,8 +64,8 @@ def get_uniq_pseudo_study_uid_and_update_db( def get_pseudo_patient_id_and_update_db( - project_slug: str, study_info: StudyInfo, pseudo_patient_id: str -) -> None: + project_slug: str, original_study_info: StudyInfo, pseudo_patient_id: str +) -> str: """ Checks if record (by slug and study info) exists in the database, gets the pseudo_paitent_id if it is not None otherwise use the @@ -76,7 +76,7 @@ def get_pseudo_patient_id_and_update_db( with PixlSession() as pixl_session, pixl_session.begin(): existing_image = get_unexported_image( project_slug, - study_info, + original_study_info, pixl_session, ) if existing_image.pseudo_patient_id is None: @@ -125,7 +125,7 @@ def get_unexported_image( """ Get an existing, non-exported (for this project) image record from the database identified by the study UID. If no result is found, retry with querying on - MRN + accession number. If this fails as well, raise a PixlDiscardError. + MRN + accession number. If this fails as well, raise a NoResultFound. """ try: existing_image: Image = ( diff --git a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py b/pixl_dcmd/src/pixl_dcmd/dicom_helpers.py similarity index 75% rename from pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py rename to pixl_dcmd/src/pixl_dcmd/dicom_helpers.py index 925fbdd75..8babad0d8 100644 --- a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py +++ b/pixl_dcmd/src/pixl_dcmd/dicom_helpers.py @@ -15,31 +15,23 @@ from __future__ import annotations +import threading +import typing +from contextlib import contextmanager, redirect_stdout from dataclasses import dataclass import logging +from io import StringIO from pathlib import Path +from typing import Generator + +from loguru import logger -from core.dicom_tags import DICOM_TAG_PROJECT_NAME from dicom_validator.spec_reader.edition_reader import EditionReader from dicom_validator.validator.iod_validator import IODValidator -from loguru import logger from pydicom import Dataset - -def get_project_name_as_string(dataset: Dataset) -> str: - raw_slug = dataset.get_private_item( - DICOM_TAG_PROJECT_NAME.group_id, - DICOM_TAG_PROJECT_NAME.offset_id, - DICOM_TAG_PROJECT_NAME.creator_string, - ).value - # Get both strings and bytes, which is fun - if isinstance(raw_slug, bytes): - logger.debug(f"Bytes slug {raw_slug!r}") - slug = raw_slug.decode("utf-8").strip() - else: - logger.debug(f"String slug '{raw_slug}'") - slug = raw_slug - return slug +if typing.TYPE_CHECKING: + from loguru import Logger class DicomValidator: @@ -48,8 +40,9 @@ def __init__(self, edition: str = "current"): # Default from dicom_validator but defining here to be explicit standard_path = str(Path.home() / "dicom-validator") - edition_reader = EditionReader(standard_path) - destination = edition_reader.get_revision(self.edition, False) + with _redirect_stdout_to_debug(logger): + edition_reader = EditionReader(standard_path) + destination = edition_reader.get_revision(self.edition, False) json_path = Path(destination, "json") self.dicom_info = EditionReader.load_dicom_info(json_path) @@ -87,6 +80,26 @@ def validate_anonymised(self, dataset: Dataset) -> dict: return self.diff_errors +thread_local = threading.local() + + +@contextmanager +def _redirect_stdout_to_debug(_logger: Logger) -> Generator[None, None, None]: + """Within the context manager, redirect all print statements to debug statements.""" + + # sys.stdout is shared across all threads so use thread-local storage + if not hasattr(thread_local, "stdout"): + thread_local.stdout = StringIO() + + with redirect_stdout(thread_local.stdout): + yield + + thread_local.stdout.seek(0) + output = thread_local.stdout.readlines() + for line in output: + _logger.debug(line.strip()) + + @dataclass class StudyInfo: """Identifiers used for an imaging study""" diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index 1186d4a81..7d0015506 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -14,33 +14,33 @@ from __future__ import annotations import typing +from functools import lru_cache from io import BytesIO import requests from core.exceptions import PixlSkipInstanceError -from core.project_config import load_project_config, load_tag_operations +from core.project_config import load_tag_operations from decouple import config from dicomanonymizer.simpledicomanonymizer import ( - actions_map_name_functions, + ActionsMapNameFunctions, anonymize_dataset, ) from loguru import logger from pydicom import DataElement, Dataset, dcmwrite -from core.project_config.pixl_config_model import PixlConfig, load_config_and_validate +from core.project_config.pixl_config_model import PixlConfig from pixl_dcmd._database import ( get_uniq_pseudo_study_uid_and_update_db, get_pseudo_patient_id_and_update_db, ) -from pixl_dcmd._dicom_helpers import ( +from pixl_dcmd.dicom_helpers import ( DicomValidator, - get_project_name_as_string, get_study_info, ) from pixl_dcmd._tag_schemes import _scheme_list_to_dict, merge_tag_schemes if typing.TYPE_CHECKING: - from pathlib import Path + from pixl_dcmd.dicom_helpers import StudyInfo def write_dataset_to_bytes(dataset: Dataset) -> bytes: @@ -59,54 +59,54 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: series_description = dataset.get("SeriesDescription") if cfg.is_series_excluded(series_description): - logger.info("FILTERING OUT series description: {}", series_description) + logger.debug("FILTERING OUT series description: {}", series_description) return True return False +def anonymise_dicom_and_update_db( + dataset: Dataset, + *, + config: PixlConfig, +) -> dict: + """Anonymise and validate a DICOM dataset and update the PIXL database.""" + identifiable_study_info = get_study_info(dataset) + validation_errors = anonymise_and_validate_dicom(dataset, config=config) + _generate_pseudo_uids_and_synchronise_pixl_db( + dataset=dataset, + project_name=config.project.name, + identifiable_study_info=identifiable_study_info, + ) + return validation_errors + + def anonymise_and_validate_dicom( dataset: Dataset, *, - config_path: Path | None = None, - synchronise_pixl_db: bool = True, + config: PixlConfig, ) -> dict: """ Anonymise dataset using allow list and compare DICOM validation errors before and after anonymising. - If synchronise_pixl_db is True, then synchronise with the pixl database. - If the pixl database has a value for set for the pseudo identifier, then update the - DICOM data with the value, otherwise save the anonymised data from the DICOM dataset - to the pixl database. - - pseudo_study_uid -> DICOM study uid tag - - pseudo_patient_id -> DICOM patient identifier tag - :param dataset: DICOM dataset to be anonymised, updated in place - :param config_path: path to config, for external users. - if not set then this will be determined using the PIXL project tag in the DICOM - :param synchronise_pixl_db: synchronise the anonymisation with the pixl database + :param config: Project config to use for anonymisation :return: dictionary of validation errors """ # Set up Dicom validator and validate the original dataset - dicom_validator = DicomValidator(edition="current") + dicom_validator = DicomValidator(edition="2024e") dicom_validator.validate_original(dataset) - anonymise_dicom( - dataset, config_path=config_path, synchronise_pixl_db=synchronise_pixl_db - ) + anonymise_dicom(dataset, config=config) # Validate the anonymised dataset validation_errors = dicom_validator.validate_anonymised(dataset) - if validation_errors: - logger.warning( - "The anonymisation introduced the following validation errors:\n{}", - _parse_validation_results(validation_errors), - ) return validation_errors def anonymise_dicom( - dataset: Dataset, config_path: Path | None = None, synchronise_pixl_db: bool = True + dataset: Dataset, + config: PixlConfig, ) -> None: """ Anonymises a DICOM dataset as Received by Orthanc in place. @@ -115,61 +115,37 @@ def anonymise_dicom( - recursively applying tag operations based on the config file - deleting any tags not in the tag scheme recursively - If synchronise_pixl_db is True, then synchronise with the pixl database. - If the pixl database has a value for set for the pseudo identifier, then update the - DICOM data with the value, otherwise save the anonymised data from the DICOM dataset - to the pixl database. - - pseudo_study_uid -> DICOM study uid tag - - pseudo_patient_id -> DICOM patient identifier tag - :param dataset: DICOM dataset to be anonymised, updated in place - :param config_path: path to config, for external users. - if not set then this will be determined using the PIXL project tag in the DICOM - :param synchronise_pixl_db: synchronise the anonymisation with the pixl database + :param config: Project config to use for anonymisation """ study_info = get_study_info(dataset) - - if config_path: - project_config = load_config_and_validate(config_path) - project_name = project_config.project.name - else: - project_name = get_project_name_as_string(dataset) - project_config = load_project_config(project_name) - logger.debug(f"Processing instance for project {project_name}: {study_info}") + logger.debug( + f"Processing instance for project {config.project.name}: {study_info}" + ) # Do before anonymisation in case someone decides to delete the # Series Description tag as part of anonymisation. - if _should_exclude_series(dataset, project_config): + if _should_exclude_series(dataset, config): msg = "DICOM instance discarded due to its series description" raise PixlSkipInstanceError(msg) - if dataset.Modality not in project_config.project.modalities: + if dataset.Modality not in config.project.modalities: msg = f"Dropping DICOM Modality: {dataset.Modality}" raise PixlSkipInstanceError(msg) - logger.info("Anonymising received instance: {}", study_info) + logger.debug("Anonymising instance for: {}", study_info) # Merge tag schemes - tag_operations = load_tag_operations(project_config) + tag_operations = load_tag_operations(config) tag_scheme = merge_tag_schemes(tag_operations, manufacturer=dataset.Manufacturer) logger.debug( - f"Applying DICOM tag anonymisation according to {project_config.tag_operation_files}" + f"Applying DICOM tag anonymisation according to {config.tag_operation_files}" ) logger.trace(f"Tag scheme: {tag_scheme}") _enforce_allowlist(dataset, tag_scheme, recursive=True) - _anonymise_dicom_from_scheme(dataset, project_name, tag_scheme) - - if synchronise_pixl_db: - # Update the dataset with the new pseudo study ID - dataset[0x0020, 0x000D].value = get_uniq_pseudo_study_uid_and_update_db( - project_name, study_info - ) - anonymised_study_info = get_study_info(dataset) - dataset[0x0010, 0x0020].value = get_pseudo_patient_id_and_update_db( - project_name, study_info, anonymised_study_info.mrn - ) # type: ignore[func-returns-value] + _anonymise_dicom_from_scheme(dataset, config.project.name, tag_scheme) def _anonymise_dicom_from_scheme( @@ -216,7 +192,7 @@ def _convert_schema_to_actions( _dataset, project_slug, _tag ) continue - tag_actions[group_el] = actions_map_name_functions[tag["op"]] + tag_actions[group_el] = ActionsMapNameFunctions[tag["op"]].value.function return tag_actions @@ -246,6 +222,7 @@ def _secure_hash( dataset[grp, el].value = hashed_value +@lru_cache(maxsize=1000) def _hash_values(pat_value: str, project_slug: str, hash_len: int = 0) -> str: """ Utility function for hashing values using the hasher API. @@ -284,9 +261,37 @@ def _allowlist_tag(dataset: Dataset, de: DataElement, tag_scheme: list[dict]) -> del dataset[de.tag] -def _parse_validation_results(results: dict) -> str: +def parse_validation_results(results: dict) -> str: """Parse the validation results into a human-readable string.""" res_str = "" for key, value in results.items(): res_str += f"{key}: {value}\n" return res_str + + +def _generate_pseudo_uids_and_synchronise_pixl_db( + dataset: Dataset, + project_name: str, + identifiable_study_info: StudyInfo, +) -> None: + """ + Synchronise the anonymisation with the pixl database. + + If the pixl database has a value for set for the pseudo identifier, then update the + DICOM data with the value, otherwise save the anonymised data from the DICOM dataset + to the pixl database. + + - pseudo_study_uid -> DICOM study uid tag + - pseudo_patient_id -> DICOM patient identifier tag + """ + dataset[0x0020, 0x000D].value = get_uniq_pseudo_study_uid_and_update_db( + project_name, + identifiable_study_info, + ) + + anonymised_study_info = get_study_info(dataset) + dataset[0x0010, 0x0020].value = get_pseudo_patient_id_and_update_db( + project_name, + identifiable_study_info, + anonymised_study_info.mrn, + ) diff --git a/pixl_dcmd/src/pixl_dcmd/resources/README.md b/pixl_dcmd/src/pixl_dcmd/resources/README.md index b6d19ecc6..2a9a5e9df 100644 --- a/pixl_dcmd/src/pixl_dcmd/resources/README.md +++ b/pixl_dcmd/src/pixl_dcmd/resources/README.md @@ -1,6 +1,14 @@ ## 'PIXL/pixl_dcmd/src/pixl_dcmd/resources' Directory Contents -### Files +
+ +

Files

-recorded-headers.yml +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| recorded-headers.yml | README.md | + +
diff --git a/pixl_dcmd/tests/README.md b/pixl_dcmd/tests/README.md index 9f05da066..5447c3203 100644 --- a/pixl_dcmd/tests/README.md +++ b/pixl_dcmd/tests/README.md @@ -1,16 +1,19 @@ ## 'PIXL/pixl_dcmd/tests' Directory Contents -### Files - -conftest.py - -test_database.py - -test_dicom_validator.py - -test_main.py - -test_tagrecording.py - -test_tag_schemes.py +
+ +

Files

+ +
+ +| **Code** | **User docs** | +| :--- | :--- | +| conftest.py | README.md | +| test_database.py | | +| test_dicom_validator.py | | +| test_main.py | | +| test_tagrecording.py | | +| test_tag_schemes.py | | + +
diff --git a/pixl_dcmd/tests/conftest.py b/pixl_dcmd/tests/conftest.py index e7e7b324b..35234b899 100644 --- a/pixl_dcmd/tests/conftest.py +++ b/pixl_dcmd/tests/conftest.py @@ -19,19 +19,16 @@ import os import pathlib import tempfile +import typing from collections.abc import Generator from typing import Optional -from pixl_dcmd._dicom_helpers import get_study_info +from pixl_dcmd.dicom_helpers import get_study_info from core.project_config import load_project_config import pytest import pytest_pixl.dicom import requests from core.db.models import Base, Extract, Image -from core.dicom_tags import ( - DICOM_TAG_PROJECT_NAME, - add_private_tag, -) from pydicom import Dataset, dcmread from pytest_pixl.dicom import generate_dicom_dataset from sqlalchemy import Engine, create_engine @@ -47,6 +44,11 @@ from pathlib import Path from decouple import config + +if typing.TYPE_CHECKING: + from core.project_config.pixl_config_model import PixlConfig + + PROJECT_CONFIGS_DIR = Path(config("PROJECT_CONFIGS_DIR")) STUDY_DATE = datetime.date.fromisoformat("2023-01-01") @@ -124,67 +126,6 @@ def row_for_testing_image_with_pseudo_patient_id( return db_session -def ids_for_parameterised_test(val: pathlib.Path) -> str: - """Generate test ID for parameterised tests""" - return str(val.stem) - - -@pytest.fixture() -@pytest.mark.parametrize( - ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test -) -def row_for_dicom_testing(db_session, yaml_file) -> Session: - """ - Insert a test row for the fake DICOM dataset generated by - pytest_pixl.dicom.generate_dicom_dataset. - """ - - config = load_project_config(yaml_file.stem) - modality = config.project.modalities[0] - - extract = Extract(slug=config.project.name) - ds = pytest_pixl.dicom.generate_dicom_dataset(Modality=modality) - study_info = get_study_info(ds) - - image_not_exported = Image( - mrn=study_info.mrn, - accession_number=study_info.accession_number, - study_uid=study_info.study_uid, - study_date=STUDY_DATE, - extract=extract, - ) - with db_session: - db_session.add_all([extract, image_not_exported]) - db_session.commit() - - return db_session - - -@pytest.fixture() -def row_for_single_dicom_testing(db_session) -> Session: - """ - Insert a test row for the fake DICOM dataset generated by - pytest_pixl.dicom.generate_dicom_dataset. - """ - - extract = Extract(slug=TEST_PROJECT_SLUG) - ds = pytest_pixl.dicom.generate_dicom_dataset() - study_info = get_study_info(ds) - - image_not_exported = Image( - mrn=study_info.mrn, - accession_number=study_info.accession_number, - study_uid=study_info.study_uid, - study_date=STUDY_DATE, - extract=extract, - ) - with db_session: - db_session.add_all([extract, image_not_exported]) - db_session.commit() - - return db_session - - @pytest.fixture() def directory_of_mri_dicoms() -> Generator[pathlib.Path, None, None]: """Directory containing MRI DICOMs suitable for testing.""" @@ -277,66 +218,15 @@ def mock_get(key, default) -> Optional[str]: @pytest.fixture() -def vanilla_dicom_image_DX(row_for_dicom_testing) -> Dataset: - """ - A DICOM image with diffusion data to test the anonymisation process. - Private tags were added to match the tag operations defined in the project config, so we can - test whether the anonymisation process works as expected when defining overrides. - The row_for_mri_dicom_testing dependency is to make sure the database is populated with the - project slug, which is used to anonymise the DICOM image. - """ - ds = generate_dicom_dataset(Modality="DX") - - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - - return ds - - -@pytest.fixture() -def vanilla_single_dicom_image_DX(row_for_single_dicom_testing) -> Dataset: +def vanilla_dicom_image_DX() -> Dataset: """ A DICOM image with diffusion data to test the anonymisation process. Private tags were added to match the tag operations defined in the project config, so we can test whether the anonymisation process works as expected when defining overrides. - The row_for_single_dicom_testing dependency is to make sure the database is populated with the - project slug, which is used to anonymise the DICOM image. """ - ds = generate_dicom_dataset(Modality="DX") + return generate_dicom_dataset(Modality="DX") - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - - return ds - -@pytest.fixture() -def vanilla_dicom_image_MR(row_for_dicom_testing) -> Dataset: - """ - A DICOM image with MX data to test the anonymisation process. - Private tags were added to match the tag operations defined in the project config, so we can - test whether the anonymisation process works as expected when defining overrides. - The row_for_mri_dicom_testing dependency is to make sure the database is populated with the - project slug, which is used to anonymise the DICOM image. - """ - ds = generate_dicom_dataset(Modality="MR") - - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - - return ds +@pytest.fixture(scope="module") +def test_project_config() -> PixlConfig: + return load_project_config(TEST_PROJECT_SLUG) diff --git a/pixl_dcmd/tests/test_database.py b/pixl_dcmd/tests/test_database.py index 5c5a1b455..59d39f4df 100644 --- a/pixl_dcmd/tests/test_database.py +++ b/pixl_dcmd/tests/test_database.py @@ -22,7 +22,7 @@ get_uniq_pseudo_study_uid_and_update_db, get_pseudo_patient_id_and_update_db, ) -from pixl_dcmd._dicom_helpers import StudyInfo +from pixl_dcmd.dicom_helpers import StudyInfo from sqlalchemy.orm import Session STUDY_DATE = datetime.date.fromisoformat("2023-01-01") diff --git a/pixl_dcmd/tests/test_dicom_validator.py b/pixl_dcmd/tests/test_dicom_validator.py index b930e6d46..7fd1e0710 100644 --- a/pixl_dcmd/tests/test_dicom_validator.py +++ b/pixl_dcmd/tests/test_dicom_validator.py @@ -15,24 +15,25 @@ from __future__ import annotations import pytest -from pixl_dcmd._dicom_helpers import DicomValidator +from pixl_dcmd.dicom_helpers import DicomValidator from pixl_dcmd.main import anonymise_dicom from pydicom import Dataset -def test_validation_check_works(vanilla_single_dicom_image_DX: Dataset) -> None: +def test_validation_check_works(vanilla_dicom_image_DX: Dataset) -> None: """ GIVEN a DICOM dataset WHEN the dataset is validated against itself (withouth anonymisation) THEN no errors should be raised """ validator = DicomValidator() - validator.validate_original(vanilla_single_dicom_image_DX) - assert not validator.validate_anonymised(vanilla_single_dicom_image_DX) + validator.validate_original(vanilla_dicom_image_DX) + assert not validator.validate_anonymised(vanilla_dicom_image_DX) def test_validation_after_anonymisation_works( - vanilla_single_dicom_image_DX: Dataset, + vanilla_dicom_image_DX: Dataset, + test_project_config, ) -> None: """ GIVEN a DICOM dataset @@ -40,17 +41,17 @@ def test_validation_after_anonymisation_works( THEN no errors should be raised """ validator = DicomValidator() - validator.validate_original(vanilla_single_dicom_image_DX) - anonymise_dicom(vanilla_single_dicom_image_DX) + validator.validate_original(vanilla_dicom_image_DX) + anonymise_dicom(vanilla_dicom_image_DX, config=test_project_config) - assert not validator.validate_anonymised(vanilla_single_dicom_image_DX) + assert not validator.validate_anonymised(vanilla_dicom_image_DX) @pytest.fixture() -def non_compliant_dicom_image(vanilla_single_dicom_image_DX: Dataset) -> Dataset: +def non_compliant_dicom_image(vanilla_dicom_image_DX: Dataset) -> Dataset: """A DICOM dataset that is not compliant with the DICOM standard.""" - del vanilla_single_dicom_image_DX.PatientName - return vanilla_single_dicom_image_DX + del vanilla_dicom_image_DX.PatientName + return vanilla_dicom_image_DX def test_validation_passes_for_non_compliant_dicom(non_compliant_dicom_image) -> None: @@ -65,7 +66,7 @@ def test_validation_passes_for_non_compliant_dicom(non_compliant_dicom_image) -> def test_validation_fails_after_invalid_tag_modification( - vanilla_single_dicom_image_DX, + vanilla_dicom_image_DX, ) -> None: """ GIVEN a DICOM dataset @@ -73,9 +74,9 @@ def test_validation_fails_after_invalid_tag_modification( THEN validation should return a non-empty list of errors """ validator = DicomValidator() - validator.validate_original(vanilla_single_dicom_image_DX) - del vanilla_single_dicom_image_DX.PatientName - validation_result = validator.validate_anonymised(vanilla_single_dicom_image_DX) + validator.validate_original(vanilla_dicom_image_DX) + del vanilla_dicom_image_DX.PatientName + validation_result = validator.validate_anonymised(vanilla_dicom_image_DX) assert len(validation_result) == 1 assert "Patient" in validation_result.keys() diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 85c08db8b..78cb2f87d 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -17,24 +17,27 @@ import re from pathlib import Path import logging +import typing import nibabel import numpy as np import pydicom import pytest import sqlalchemy +from pytest_check import check from core.db.models import Image from core.dicom_tags import ( - DICOM_TAG_PROJECT_NAME, PrivateDicomTag, add_private_tag, create_private_tag, ) from core.project_config import load_project_config, load_tag_operations +from core.project_config.pixl_config_model import load_config_and_validate from decouple import config -from pixl_dcmd._dicom_helpers import get_study_info +from pixl_dcmd.dicom_helpers import get_study_info from pixl_dcmd.main import ( + anonymise_dicom_and_update_db, _anonymise_dicom_from_scheme, anonymise_and_validate_dicom, anonymise_dicom, @@ -43,28 +46,32 @@ ) from pytest_pixl.dicom import generate_dicom_dataset from pytest_pixl.helpers import run_subprocess -from conftest import ids_for_parameterised_test + +if typing.TYPE_CHECKING: + from core.project_config.pixl_config_model import PixlConfig PROJECT_CONFIGS_DIR = Path(config("PROJECT_CONFIGS_DIR")) TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" @pytest.fixture(scope="module") -def tag_scheme() -> list[dict]: +def tag_scheme(test_project_config: PixlConfig) -> list[dict]: """Base tag scheme for testing.""" - tag_ops = load_tag_operations(load_project_config(TEST_PROJECT_SLUG)) + tag_ops = load_tag_operations(test_project_config) return tag_ops.base[0] -def _mri_diffusion_tags(manufacturer: str = "Philips") -> list[PrivateDicomTag]: +def _get_mri_diffusion_tags( + config: PixlConfig, + manufacturer: str, +) -> list[PrivateDicomTag]: """ Private DICOM tags for testing the anonymisation process. These tags from `/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml` so we can test whether the manufacturer overrides work during anonymisation """ - project_config = load_project_config(TEST_PROJECT_SLUG) - tag_ops = load_tag_operations(project_config) - mri_diffusion_overrides = tag_ops.manufacturer_overrides[0] + tag_ops = load_tag_operations(config) + mri_diffusion_overrides = tag_ops.manufacturer_overrides[1] manufacturer_overrides = [ override @@ -79,7 +86,7 @@ def _mri_diffusion_tags(manufacturer: str = "Philips") -> list[PrivateDicomTag]: @pytest.fixture() -def mri_diffusion_dicom_image() -> pydicom.Dataset: +def mri_diffusion_dicom_image(test_project_config: PixlConfig) -> pydicom.Dataset: """ A DICOM image with diffusion data to test the anonymisation process. Private tags were added to match the tag operations defined in the project config, so we can @@ -87,18 +94,12 @@ def mri_diffusion_dicom_image() -> pydicom.Dataset: """ manufacturer = "Philips" ds = generate_dicom_dataset(Manufacturer=manufacturer, Modality="DX") - tags = _mri_diffusion_tags(manufacturer) + tags = _get_mri_diffusion_tags( + config=test_project_config, manufacturer=manufacturer + ) for tag in tags: add_private_tag(ds, tag) - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - return ds @@ -113,24 +114,28 @@ def test_enforce_allowlist_removes_overlay_plane() -> None: assert (0x6000, 0x3000) not in ds -def test_anonymisation(vanilla_single_dicom_image_DX: pydicom.Dataset) -> None: +def test_anonymisation( + vanilla_dicom_image_DX: pydicom.Dataset, + test_project_config: PixlConfig, +) -> None: """ Test whether anonymisation works as expected on a vanilla DICOM dataset """ - orig_patient_id = vanilla_single_dicom_image_DX.PatientID - orig_patient_name = vanilla_single_dicom_image_DX.PatientName - orig_study_date = vanilla_single_dicom_image_DX.StudyDate + orig_patient_id = vanilla_dicom_image_DX.PatientID + orig_patient_name = vanilla_dicom_image_DX.PatientName + orig_study_date = vanilla_dicom_image_DX.StudyDate - anonymise_dicom(vanilla_single_dicom_image_DX) + anonymise_dicom(vanilla_dicom_image_DX, config=test_project_config) - assert vanilla_single_dicom_image_DX.PatientID != orig_patient_id - assert vanilla_single_dicom_image_DX.PatientName != orig_patient_name - assert vanilla_single_dicom_image_DX.StudyDate != orig_study_date + assert vanilla_dicom_image_DX.PatientID != orig_patient_id + assert vanilla_dicom_image_DX.PatientName != orig_patient_name + assert vanilla_dicom_image_DX.StudyDate != orig_study_date def test_anonymise_unimplemented_tag( - vanilla_single_dicom_image_DX: pydicom.Dataset, + vanilla_dicom_image_DX: pydicom.Dataset, + test_project_config: PixlConfig, ) -> None: """ GIVEN DICOM data with an OB data type tag within a sequence @@ -145,20 +150,20 @@ def test_anonymise_unimplemented_tag( nested_block.add_new(0x0011, "OB", b"") # create private sequence tag with the nested dataset - block = vanilla_single_dicom_image_DX.private_block( - 0x0013, "VR OB CREATOR", create=True - ) + block = vanilla_dicom_image_DX.private_block(0x0013, "VR OB CREATOR", create=True) block.add_new(0x0010, "SQ", [nested_ds]) - anonymise_dicom(vanilla_single_dicom_image_DX) + anonymise_dicom(vanilla_dicom_image_DX, config=test_project_config) - assert (0x0013, 0x0010) in vanilla_single_dicom_image_DX - assert (0x0013, 0x1010) in vanilla_single_dicom_image_DX - sequence = vanilla_single_dicom_image_DX[(0x0013, 0x1010)] + assert (0x0013, 0x0010) in vanilla_dicom_image_DX + assert (0x0013, 0x1010) in vanilla_dicom_image_DX + sequence = vanilla_dicom_image_DX[(0x0013, 0x1010)] assert (0x0013, 0x1011) not in sequence[0] -def test_anonymise_and_validate_as_external_user() -> None: +def test_anonymise_and_validate_as_external_user( + test_project_config: PixlConfig, +) -> None: """ GIVEN an example MR dataset and configuration to anonymise this WHEN the anonymisation and validation is called not using PIXL infrastructure @@ -170,18 +175,24 @@ def test_anonymise_and_validate_as_external_user() -> None: dataset_path = pydicom.data.get_testdata_file( "MR-SIEMENS-DICOM-WithOverlays.dcm", download=True ) + dataset = pydicom.dcmread(dataset_path) + config_path = ( pathlib.Path(__file__).parents[2] / "projects/configs/test-external-user.yaml" ) - dataset = pydicom.dcmread(dataset_path) - validation_issues = anonymise_and_validate_dicom( - dataset, config_path=config_path, synchronise_pixl_db=False - ) + config = load_config_and_validate(config_path) + + validation_issues = anonymise_and_validate_dicom(dataset, config=config) assert validation_issues == {} assert dataset != pydicom.dcmread(dataset_path) +def ids_for_parameterised_test(val: pathlib.Path) -> str: + """Generate test ID for parameterised tests""" + return str(val.stem) + + @pytest.mark.parametrize( ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test ) @@ -192,23 +203,24 @@ def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: WHEN the anonymisation and validation process is run THEN the dataset should be anonymised and validated without any warnings or errors """ - caplog.clear() caplog.set_level(logging.WARNING) config = load_project_config(yaml_file.stem) - modality = config.project.modalities[0] - dicom_image = request.getfixturevalue(f"vanilla_dicom_image_{modality}") - config_path = pathlib.Path(yaml_file) - - validation_errors = anonymise_and_validate_dicom( - dicom_image, config_path=config_path, synchronise_pixl_db=True - ) - - assert "WARNING" not in [record.levelname for record in caplog.records] - assert not validation_errors + for modality in config.project.modalities: + caplog.clear() + dicom_image = generate_dicom_dataset(Modality=modality) + validation_errors = anonymise_and_validate_dicom( + dicom_image, + config=config, + ) + with check: + assert "WARNING" not in [record.levelname for record in caplog.records] + assert not validation_errors +@pytest.mark.usefixtures() def test_anonymisation_with_overrides( - mri_diffusion_dicom_image: pydicom.Dataset, row_for_single_dicom_testing + mri_diffusion_dicom_image: pydicom.Dataset, + test_project_config: PixlConfig, ) -> None: """ Test that the anonymisation process works with manufacturer overrides. @@ -223,7 +235,7 @@ def test_anonymisation_with_overrides( original_patient_id = mri_diffusion_dicom_image.PatientID original_private_tag = mri_diffusion_dicom_image[(0x2001, 0x1003)] - anonymise_dicom(mri_diffusion_dicom_image) + anonymise_dicom(mri_diffusion_dicom_image, config=test_project_config) # Whitelisted tags should still be present assert (0x0010, 0x0020) in mri_diffusion_dicom_image @@ -232,27 +244,26 @@ def test_anonymisation_with_overrides( assert mri_diffusion_dicom_image[(0x2001, 0x1003)] == original_private_tag -def test_image_already_exported_throws(rows_in_session, exported_dicom_dataset): +@pytest.mark.usefixtures("rows_in_session") +def test_image_already_exported_throws(test_project_config, exported_dicom_dataset): """ GIVEN a dicom image which has no un-exported rows in the pipeline database WHEN the dicom tag scheme is applied THEN an exception will be thrown as """ - # Make sure the project name tag is added for anonymisation to work - add_private_tag(exported_dicom_dataset, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = exported_dicom_dataset.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - exported_dicom_dataset[ - block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id) - ].value = TEST_PROJECT_SLUG with pytest.raises(sqlalchemy.exc.NoResultFound): - anonymise_dicom(exported_dicom_dataset) + anonymise_dicom_and_update_db( + exported_dicom_dataset, + config=test_project_config, + ) def test_pseudo_identifier_processing( - rows_in_session, monkeypatch, exported_dicom_dataset, not_exported_dicom_dataset + rows_in_session, + monkeypatch, + exported_dicom_dataset, + not_exported_dicom_dataset, + test_project_config, ): """ GIVEN a dicom image that hasn't been exported in the pipeline db @@ -286,7 +297,11 @@ def fake_uid(cls): mrn = exported_study_info.mrn fake_hash = "-".join(list(mrn)) print("fake_hash = ", fake_hash) - anonymise_dicom(not_exported_dicom_dataset) + + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) + image = ( rows_in_session.query(Image) .filter(Image.accession_number == not_exported_study_info.accession_number) @@ -299,7 +314,9 @@ def fake_uid(cls): def test_pseudo_patient_id_processing( - row_for_testing_image_with_pseudo_patient_id, not_exported_dicom_dataset + row_for_testing_image_with_pseudo_patient_id, + not_exported_dicom_dataset, + test_project_config, ): """ GIVEN an `Image` entity in the database which has a `pseudo_patient_id` set @@ -318,7 +335,9 @@ def test_pseudo_patient_id_processing( != original_image.pseudo_patient_id ) - anonymise_dicom(not_exported_dicom_dataset) + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) anonymised_image: Image = ( row_for_testing_image_with_pseudo_patient_id.query(Image) @@ -333,7 +352,11 @@ def test_pseudo_patient_id_processing( ) -def test_no_pseudo_patient_id_processing(rows_in_session, not_exported_dicom_dataset): +def test_no_pseudo_patient_id_processing( + rows_in_session, + not_exported_dicom_dataset, + test_project_config, +): """ GIVEN an `Image` entity in the database which doesn't have a `pseudo_patient_id` set WHEN the matching DICOM data is anonymised @@ -342,7 +365,9 @@ def test_no_pseudo_patient_id_processing(rows_in_session, not_exported_dicom_dat """ study_info = get_study_info(not_exported_dicom_dataset) - anonymise_dicom(not_exported_dicom_dataset) + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) anonymised_image: Image = ( rows_in_session.query(Image) @@ -382,9 +407,7 @@ def dicom_series_to_exclude() -> list[pydicom.Dataset]: def _make_dicom(series_description) -> pydicom.Dataset: - ds = generate_dicom_dataset(SeriesDescription=series_description) - add_private_tag(ds, DICOM_TAG_PROJECT_NAME, "test-extract-uclh-omop-cdm") - return ds + return generate_dicom_dataset(SeriesDescription=series_description) def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): @@ -396,7 +419,9 @@ def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): def test_can_nifti_convert_post_anonymisation( - row_for_single_dicom_testing, tmp_path, directory_of_mri_dicoms, tag_scheme + tmp_path, + directory_of_mri_dicoms, + tag_scheme, ): """Can a DICOM image that has passed through our tag processing be converted to NIFTI""" # Create a directory to store anonymised DICOM files diff --git a/pixl_dcmd/tests/test_tag_schemes.py b/pixl_dcmd/tests/test_tag_schemes.py index 108bb1e39..442bf6ec0 100644 --- a/pixl_dcmd/tests/test_tag_schemes.py +++ b/pixl_dcmd/tests/test_tag_schemes.py @@ -16,6 +16,7 @@ from pathlib import Path import pytest +import pytest_check from core.project_config import load_project_config from core.project_config.tag_operations import TagOperations, load_tag_operations from decouple import config @@ -39,7 +40,7 @@ def test_merge_base_only_tags(base_only_tag_scheme): THEN the result should be the same as the base file """ tags = merge_tag_schemes(base_only_tag_scheme) - expected = [*base_only_tag_scheme.base[0], *base_only_tag_scheme.base[1]] + expected = [tag for base in base_only_tag_scheme.base for tag in base] count_tags = dict() for tag in expected: key = f"{tag['group']:04x},{tag['element']:04x}" @@ -49,9 +50,9 @@ def test_merge_base_only_tags(base_only_tag_scheme): count_tags[key] = 1 for key, values in count_tags.items(): - assert ( - values == 1 - ), f"{key} is replicated please check config files to remove it" + pytest_check.equal( + values, 1, msg=f"{key} is replicated please check config files to remove it" + ) assert tags == expected diff --git a/pixl_export/README.md b/pixl_export/README.md index 89481df38..9c7bc85c5 100644 --- a/pixl_export/README.md +++ b/pixl_export/README.md @@ -3,7 +3,7 @@ The Export API provides HTTP endpoints to control the copying of EHR data from the OMOP extract to its destination (eg. FTPS). It also uploads DICOM data to its destination after it has been processed by the Imaging API and orthanc(s). -It no longer accepts messages from rabbitmq. +It no longer accepts messages from rabbitmq. ## Installation @@ -38,23 +38,36 @@ pytest ## Usage -Usage should be from the CLI driver, which calls the HTTP endpoints. +Usage should be from the [CLI driver](../cli/README.md), which calls the HTTP endpoints. ## Notes -- The height/weight/GCS value is extracted only within a 24 h time window +- The height/weight/GCS value is extracted only within a 24 h time window ## 'PIXL/pixl_export' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[src](./src/README.md) [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | -pyproject.toml +
-README.md diff --git a/pixl_export/pyproject.toml b/pixl_export/pyproject.toml index 08bd1726e..16a5b74a9 100644 --- a/pixl_export/pyproject.toml +++ b/pixl_export/pyproject.toml @@ -8,8 +8,8 @@ requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ "core==0.2.0rc0", - "uvicorn==0.30.4", - "azure-storage-blob==12.21.0", + "uvicorn==0.32.1", + "azure-storage-blob==12.24.0", ] [project.optional-dependencies] diff --git a/pixl_export/src/README.md b/pixl_export/src/README.md index 06ab4efea..3e4e55566 100644 --- a/pixl_export/src/README.md +++ b/pixl_export/src/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pixl_export/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[pixl_export](./pixl_export/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pixl_export/src/pixl_export/README.md b/pixl_export/src/pixl_export/README.md index c21da40db..a40140515 100644 --- a/pixl_export/src/pixl_export/README.md +++ b/pixl_export/src/pixl_export/README.md @@ -1,12 +1,17 @@ ## 'PIXL/pixl_export/src/pixl_export' Directory Contents -### Files +
+ +

Files

-main.py +
-_databases.py +| **Code** | **User docs** | +| :--- | :--- | +| main.py | README.md | +| _databases.py | | +| _queries.py | | +| __init__.py | | -_queries.py - -__init__.py +
diff --git a/pixl_export/src/pixl_export/main.py b/pixl_export/src/pixl_export/main.py index ad98fc442..1333df0ac 100644 --- a/pixl_export/src/pixl_export/main.py +++ b/pixl_export/src/pixl_export/main.py @@ -21,13 +21,13 @@ datetime, # noqa: TCH003, always import datetime otherwise pydantic throws error ) from pathlib import Path +from typing import Annotated from core.exports import ParquetExport from core.rest_api.router import router from core.uploader import get_uploader -from core.uploader._orthanc import get_tags_by_study from decouple import config # type: ignore [import-untyped] -from fastapi import FastAPI, HTTPException +from fastapi import Body, FastAPI, HTTPException from fastapi.responses import JSONResponse from loguru import logger from pydantic import BaseModel @@ -61,12 +61,6 @@ class ExportPatientData(BaseModel): output_dir: Path = EXPORT_API_EXPORT_ROOT_DIR -class StudyData(BaseModel): - """Uniquely identify a study when talking to the API""" - - study_id: str - - @app.post( "/export-patient-data", summary="Copy all matching radiology reports in the PIXL DB to a parquet file \ @@ -97,16 +91,16 @@ def export_patient_data(export_params: ExportPatientData) -> None: "/export-dicom-from-orthanc", summary="Download a zipped up study from orthanc anon and upload it via the appropriate route", ) -def export_dicom_from_orthanc(study_data: StudyData) -> None: +def export_dicom_from_orthanc( + study_id: Annotated[str, Body()], + project_name: Annotated[str, Body()], +) -> None: """ Download zipped up study data from orthanc anon and route it appropriately. Intended only for orthanc-anon to call, as only it knows when its data is ready for download. Because we're post-anonymisation, the "StudyInstanceUID" tag returned is actually the Pseudo Study UID (a randomly selected, but consistent UID). """ - study_id = study_data.study_id - project_slug = get_tags_by_study(study_id).project_slug - - uploader = get_uploader(project_slug) + uploader = get_uploader(project_name) logger.debug("Sending {} via '{}'", study_id, type(uploader).__name__) uploader.upload_dicom_and_update_database(study_id) diff --git a/pixl_export/tests/README.md b/pixl_export/tests/README.md index 607a81078..e308b1232 100644 --- a/pixl_export/tests/README.md +++ b/pixl_export/tests/README.md @@ -1,10 +1,15 @@ ## 'PIXL/pixl_export/tests' Directory Contents -### Files +
+ +

Files

-conftest.py +
-docker-compose.yml +| **Configuration** | **Code** | **User docs** | +| :--- | :--- | :--- | +| docker-compose.yml | conftest.py | README.md | +| | test_app.py | | -test_app.py +
diff --git a/pixl_imaging/README.md b/pixl_imaging/README.md index 26463b37c..0b0f6dbf9 100644 --- a/pixl_imaging/README.md +++ b/pixl_imaging/README.md @@ -20,14 +20,14 @@ If the study has be identified in VNA or PACS, a query to `orthanc-raw` is made exists locally. If it does exist locally, a check is made to ensure all instances exist locally and any missing instances are retrieved. If the study does not exist locally, the entire study is retrieved from the archive. -Once the study and all its instances are in `orthanc-raw`, the study is sent to `orthanc-anon` via a C-STORE +Once the study and all its instances are in `orthanc-raw`, the study is sent to `orthanc-anon` via a C-STORE operation. >[!NOTE] > When querying the archives, if we do not know the `StudyInstanceUID` we will query by MRN and Accession Number. > This may result in multiple studies being found in the archives. In this instance, all studies returned by the > query will be retrieved and sent to Orthanc Anon for anonymisation. In Orthanc Anon, the studies will be combined -> into a single study as they share the same MRN and Accession Number. +> into a single study as they share the same MRN and Accession Number. ## Installation @@ -63,7 +63,11 @@ The `SKIP_ALEMBIC` environmental variable is used to control whether migrations ## 'PIXL/pixl_imaging' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[alembic](./alembic/README.md) @@ -73,9 +77,19 @@ The `SKIP_ALEMBIC` environmental variable is used to control whether migrations [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | + -pyproject.toml +
-README.md diff --git a/pixl_imaging/alembic/README.md b/pixl_imaging/alembic/README.md index d7054661f..4fefece03 100644 --- a/pixl_imaging/alembic/README.md +++ b/pixl_imaging/alembic/README.md @@ -10,9 +10,9 @@ as the entry point is [migrate_and_run.sh](../scripts/migrate_and_run.sh). ## Creating a new migration after editing the database model -For convenience, the [autogenerate-migration.sh](autogenerate-migration.sh) has been made. +For convenience, the [autogenerate-migration.sh](autogenerate-migration.sh) script has been created. -Which you can run giving a name for the migration like this: +It can be run giving a name for the migration like this: ```shell cd alembic @@ -22,7 +22,7 @@ cd alembic - This creates a postgres container - Runs the existing migrations - Checks for differences between the SQLAlchemy [models](../../pixl_core/src/core/db/models.py) - and creates a new migration in [versions](versions) + and creates a new migration in the [versions](versions) directory - Takes down the postgres container There's a couple of manual steps: @@ -32,21 +32,31 @@ There's a couple of manual steps: ## 'PIXL/pixl_imaging/alembic' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[versions](./versions/README.md) -### Files +
-alembic.ini +
+ +

Files

-autogenerate-migration.sh +
-env.py +| **Configuration** | **User docs** | +| :--- | :--- | +| alembic.ini | README.md | +| autogenerate-migration.sh | | +| env.py | | +| migrations.env | | +| script.py.mako | | -migrations.env +
-README.md -script.py.mako diff --git a/pixl_imaging/alembic/versions/README.md b/pixl_imaging/alembic/versions/README.md index ea043622a..67da1fc1d 100644 --- a/pixl_imaging/alembic/versions/README.md +++ b/pixl_imaging/alembic/versions/README.md @@ -1,12 +1,17 @@ ## 'PIXL/pixl_imaging/alembic/versions' Directory Contents -### Files +
+ +

Files

-83dcb3812628_add_study_uid_column_to_image_table.py +
-bcaef54e2bfe_create_extract_and_image_tables.py +| **Code** | **User docs** | +| :--- | :--- | +| 83dcb3812628_add_study_uid_column_to_image_table.py | README.md | +| bcaef54e2bfe_create_extract_and_image_tables.py | | +| cb5ee12a6e20_replace_hashed_id_with_pseudo_study_uid.py | | +| d947cc715eb1_add_pseudo_patient_id_column_to_image_table.py | | -cb5ee12a6e20_replace_hashed_id_with_pseudo_study_uid.py - -d947cc715eb1_add_pseudo_patient_id_column_to_image_table.py +
diff --git a/pixl_imaging/pyproject.toml b/pixl_imaging/pyproject.toml index d3cd0de33..55d6e3992 100644 --- a/pixl_imaging/pyproject.toml +++ b/pixl_imaging/pyproject.toml @@ -8,10 +8,10 @@ requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ "core==0.2.0rc0", - "aiohttp==3.10.1", - "alembic==1.13.2", + "aiohttp==3.10.11", + "alembic==1.14.0", "pydicom==2.4.4", - "uvicorn==0.30.4", + "uvicorn==0.32.1", ] [project.optional-dependencies] diff --git a/pixl_imaging/scripts/README.md b/pixl_imaging/scripts/README.md index afd8a5659..ae1e0718a 100644 --- a/pixl_imaging/scripts/README.md +++ b/pixl_imaging/scripts/README.md @@ -1,6 +1,14 @@ ## 'PIXL/pixl_imaging/scripts' Directory Contents -### Files +
+ +

Files

-migrate_and_run.sh +
+ +| **Code** | **User docs** | +| :--- | :--- | +| migrate_and_run.sh | README.md | + +
diff --git a/pixl_imaging/src/README.md b/pixl_imaging/src/README.md index a8f1b4ac5..d56a9acbc 100644 --- a/pixl_imaging/src/README.md +++ b/pixl_imaging/src/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pixl_imaging/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[pixl_imaging](./pixl_imaging/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pixl_imaging/src/pixl_imaging/README.md b/pixl_imaging/src/pixl_imaging/README.md index 8250982db..dbe9a22e4 100644 --- a/pixl_imaging/src/pixl_imaging/README.md +++ b/pixl_imaging/src/pixl_imaging/README.md @@ -1,12 +1,17 @@ ## 'PIXL/pixl_imaging/src/pixl_imaging' Directory Contents -### Files +
+ +

Files

-main.py +
-_orthanc.py +| **Code** | **User docs** | +| :--- | :--- | +| main.py | README.md | +| _orthanc.py | | +| _processing.py | | +| __init__.py | | -_processing.py - -__init__.py +
diff --git a/pixl_imaging/src/pixl_imaging/_orthanc.py b/pixl_imaging/src/pixl_imaging/_orthanc.py index 079b0b152..08132036d 100644 --- a/pixl_imaging/src/pixl_imaging/_orthanc.py +++ b/pixl_imaging/src/pixl_imaging/_orthanc.py @@ -64,9 +64,16 @@ async def get_local_study(self, study_id: str) -> Any: """Query local Orthanc instance for study.""" return await self._get(f"/studies/{study_id}") + async def get_local_study_statistics(self, study_id: str) -> Any: + """Query local Orthanc instance for study statistics.""" + return await self._get(f"/studies/{study_id}/statistics") + async def get_local_study_instances(self, study_id: str) -> Any: """Get the instances of a study.""" - return await self._get(f"/studies/{study_id}/instances") + return await self._get( + f"/studies/{study_id}/instances?short=true", + timeout=self.dicom_timeout, # this API call can sometimes take several minutes + ) async def query_remote(self, data: dict, modality: str) -> Optional[str]: """Query a particular modality, available from this node""" @@ -100,32 +107,6 @@ async def get_remote_query_answer_instances(self, query_id: str, answer_id: str) ) return response["ID"] - async def modify_private_tags_by_study( - self, - *, - study_id: str, - private_creator: str, - tag_replacement: dict, - ) -> None: - # According to the docs, you can't modify tags for an instance using the instance API - # (the best you can do is download a modified version), so do it via the studies API. - # KeepSource=false needed to stop it making a copy - # https://orthanc.uclouvain.be/api/index.html#tag/Studies/paths/~1studies~1{id}~1modify/post - response = await self._post( - f"/studies/{study_id}/modify", - { - "PrivateCreator": private_creator, - "Permissive": False, - "Replace": tag_replacement, - "Asynchronous": True, - "Force": True, - "Keep": ["StudyInstanceUID", "SeriesInstanceUID", "SOPInstanceUID"], - }, - ) - logger.debug("Modify studies Job: {}", response) - job_id = str(response["ID"]) - await self.wait_for_job_success_or_raise(job_id, "modify", timeout=self.dicom_timeout) - async def retrieve_study_from_remote(self, query_id: str) -> str: response = await self._post( f"/queries/{query_id}/retrieve", @@ -180,13 +161,15 @@ async def job_state(self, job_id: str) -> Any: # See: https://book.orthanc-server.com/users/advanced-rest.html#jobs-monitoring return await self._get(f"/jobs/{job_id}") - async def _get(self, path: str) -> Any: + async def _get(self, path: str, timeout: int | None = None) -> Any: + # Optionally override default http timeout + http_timeout = timeout or self.http_timeout async with ( aiohttp.ClientSession() as session, session.get( f"{self._url}{path}", auth=self._auth, - timeout=self.http_timeout, + timeout=http_timeout, ) as response, ): return await _deserialise(response) @@ -288,7 +271,10 @@ def __init__(self) -> None: ) async def notify_anon_to_retrieve_study_resources( - self, orthanc_raw: PIXLRawOrthanc, resource_ids: list[str] + self, + orthanc_raw: PIXLRawOrthanc, + resource_ids: list[str], + project_name: str, ) -> Any: """Notify Orthanc Anon of study resources to retrieve from Orthanc Raw.""" resources_info = [ @@ -301,5 +287,9 @@ async def notify_anon_to_retrieve_study_resources( await self._post( path="/import-from-raw", - data={"ResourceIDs": resource_ids, "StudyInstanceUIDs": study_uids}, + data={ + "ResourceIDs": resource_ids, + "StudyInstanceUIDs": study_uids, + "ProjectName": project_name, + }, ) diff --git a/pixl_imaging/src/pixl_imaging/_processing.py b/pixl_imaging/src/pixl_imaging/_processing.py index 8a174cbd9..a1a92274d 100644 --- a/pixl_imaging/src/pixl_imaging/_processing.py +++ b/pixl_imaging/src/pixl_imaging/_processing.py @@ -19,7 +19,6 @@ from typing import TYPE_CHECKING, Any, Optional from zoneinfo import ZoneInfo -from core.dicom_tags import DICOM_TAG_PROJECT_NAME from core.exceptions import PixlDiscardError, PixlOutOfHoursError, PixlStudyNotInPrimaryArchiveError from decouple import config @@ -82,7 +81,6 @@ async def _process_message( - retrieve the study from the VNA / PACS Then: - - set the project name tag for the study if it's not already set - send the study to Orthanc Anon if ORTHANC_AUTOROUTE_RAW_TO_ANON is True - if the C-STORE operation to Orthanc Anon is successful, and ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT is True, send the study to the appropriate destination @@ -120,39 +118,28 @@ async def _process_message( modality=archive.value, ) - # Now that study has arrived in orthanc raw, we can set its project name tag via the API - logger.debug("Get existing study before setting project name") resources = await _get_study_resources( orthanc_raw=orthanc_raw, study=study, ) - for resource in resources: - if not await _project_name_is_correct( - project_name=study.message.project_name, - resource=resource, - ): - await _add_project_to_study( - project_name=study.message.project_name, - orthanc_raw=orthanc_raw, - study=resource["ID"], - ) - if not orthanc_raw.autoroute_to_anon: logger.debug("Auto-routing to Orthanc Anon is not enabled. Not sending study {}", resources) return await orthanc_anon.notify_anon_to_retrieve_study_resources( - orthanc_raw=orthanc_raw, resource_ids=[resource["ID"] for resource in resources] + orthanc_raw=orthanc_raw, + resource_ids=resources, + project_name=study.message.project_name, ) async def _get_study_resources( orthanc_raw: PIXLRawOrthanc, study: ImagingStudy, -) -> list[dict]: +) -> list[str]: """Get a list of existing resources for a study in Orthanc Raw.""" - existing_resources: list[dict] = await study.query_local(orthanc_raw, project_tag=True) + existing_resources: list[str] = await study.query_local(orthanc_raw) logger.debug( 'Found {} existing resources for study "{}"', @@ -163,40 +150,6 @@ async def _get_study_resources( return existing_resources -async def _project_name_is_correct( - project_name: str, - resource: dict, -) -> bool: - """ - Check if the project name is different from the project tags. - - Returns True if the project name is in the project tags, False otherwise. - """ - project_tags = ( - resource["RequestedTags"].get(DICOM_TAG_PROJECT_NAME.tag_nickname), - resource["RequestedTags"].get( - "Unknown Tag & Data" - ), # Fallback for testing where we're not using the entire plugin, remains undefined - ) - return project_name in project_tags - - -async def _add_project_to_study( - project_name: str, - orthanc_raw: PIXLRawOrthanc, - study: str, -) -> None: - logger.debug("Adding private tag to study ID {}", study) - await orthanc_raw.modify_private_tags_by_study( - study_id=study, - private_creator=DICOM_TAG_PROJECT_NAME.creator_string, - tag_replacement={ - # The tag here needs to be defined in orthanc's dictionary - DICOM_TAG_PROJECT_NAME.tag_nickname: project_name, - }, - ) - - async def _find_study_in_archive_or_raise( orthanc_raw: Orthanc, study: ImagingStudy, @@ -299,7 +252,7 @@ async def _retrieve_study(orthanc_raw: Orthanc, study_query_id: str) -> None: async def _retrieve_missing_instances( - resources: list[dict], + resources: list[str], orthanc_raw: Orthanc, study: ImagingStudy, study_query_id: str, @@ -323,22 +276,16 @@ async def _retrieve_missing_instances( async def _get_missing_instances( - orthanc_raw: Orthanc, study: ImagingStudy, resources: list[dict], study_query_id: str + orthanc_raw: Orthanc, study: ImagingStudy, resources: list[str], study_query_id: str ) -> list[dict[str, str]]: """ Check if any study instances are missing from Orthanc Raw. Return a list of missing instance UIDs (empty if none missing) """ - # First get all SOPInstanceUIDs for the study that are in Orthanc Raw - orthanc_raw_sop_instance_uids = [] - for resource in resources: - study_instances = await orthanc_raw.get_local_study_instances(study_id=resource["ID"]) - orthanc_raw_sop_instance_uids.extend( - [instance["MainDicomTags"]["SOPInstanceUID"] for instance in study_instances] - ) + missing_instances: list[dict[str, str]] = [] - # Now query the VNA / PACS for the study instances + # First query the VNA / PACS for the study instances study_query_answers = await orthanc_raw.get_remote_query_answers(study_query_id) instances_queries_and_answers = [] for answer_id in study_query_answers: @@ -349,13 +296,25 @@ async def _get_missing_instances( instances_queries_and_answers.extend( [(instances_query_id, answer) for answer in instances_query_answers] ) + num_remote_instances = len(instances_queries_and_answers) - missing_instances: list[dict[str, str]] = [] + num_local_instances = 0 + for resource in resources: + study_statistics = await orthanc_raw.get_local_study_statistics(study_id=resource) + num_local_instances += int(study_statistics["CountInstances"]) - if len(instances_queries_and_answers) == len(orthanc_raw_sop_instance_uids): + if num_remote_instances == num_local_instances: logger.debug("No missing instances for study {}", study.message.study_uid) return missing_instances + # Get all SOPInstanceUIDs for the study that are in Orthanc Raw + orthanc_raw_sop_instance_uids = [] + for resource in resources: + study_instances = await orthanc_raw.get_local_study_instances(study_id=resource) + orthanc_raw_sop_instance_uids.extend( + [instance["MainDicomTags"]["0008,0018"] for instance in study_instances] + ) + # If the SOPInstanceUID is not in the list of instances in Orthanc Raw # retrieve the instance from the VNA / PACS query_tags = ["0020,000d", "0020,000e", "0008,0018"] @@ -414,20 +373,10 @@ def orthanc_query_dict(self) -> dict: }, } - @property - def query_project_name(self) -> dict: - """Dictionary to query a study, returning the PIXL_PROJECT tags for each study.""" - return { - "RequestedTags": [DICOM_TAG_PROJECT_NAME.tag_nickname], - "Expand": True, - } - - async def query_local(self, node: Orthanc, *, project_tag: bool = False) -> Any: - """Does this study exist in an Orthanc instance/node, optionally query for project tag.""" + async def query_local(self, node: Orthanc) -> Any: + """Does this study exist in an Orthanc instance/node.""" if self.message.study_uid: uid_query = self.orthanc_uid_query_dict - if project_tag: - uid_query = uid_query | self.query_project_name query_response = await node.query_local(uid_query) if query_response: @@ -444,7 +393,5 @@ async def query_local(self, node: Orthanc, *, project_tag: bool = False) -> Any: ) mrn_accession_query = self.orthanc_query_dict - if project_tag: - mrn_accession_query = mrn_accession_query | self.query_project_name return await node.query_local(mrn_accession_query) diff --git a/pixl_imaging/tests/README.md b/pixl_imaging/tests/README.md index 8903dd81c..98447e8ec 100644 --- a/pixl_imaging/tests/README.md +++ b/pixl_imaging/tests/README.md @@ -1,14 +1,25 @@ ## 'PIXL/pixl_imaging/tests' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[orthanc_raw_config](./orthanc_raw_config/README.md) -### Files +
+ +
+ +

Files

-conftest.py +
-docker-compose.yml +| **Configuration** | **Code** | **User docs** | +| :--- | :--- | :--- | +| docker-compose.yml | conftest.py | README.md | +| | test_imaging_processing.py | | -test_imaging_processing.py +
diff --git a/pixl_imaging/tests/docker-compose.yml b/pixl_imaging/tests/docker-compose.yml index 6d247a52d..a72c8b865 100644 --- a/pixl_imaging/tests/docker-compose.yml +++ b/pixl_imaging/tests/docker-compose.yml @@ -68,9 +68,11 @@ services: orthanc-raw: build: context: ../../ - dockerfile: ./docker/orthanc-raw/Dockerfile + dockerfile: ./docker/orthanc/Dockerfile + target: pixl_orthanc_raw args: PIXL_DICOM_TRANSFER_TIMEOUT: 30 + ORTHANC_DIR: orthanc-raw environment: ORTHANC_NAME: "PIXL: Raw" ORTHANC_USERNAME: "orthanc" diff --git a/pixl_imaging/tests/orthanc_raw_config/README.md b/pixl_imaging/tests/orthanc_raw_config/README.md index 3b95d29ca..3deb7691a 100644 --- a/pixl_imaging/tests/orthanc_raw_config/README.md +++ b/pixl_imaging/tests/orthanc_raw_config/README.md @@ -1,8 +1,15 @@ ## 'PIXL/pixl_imaging/tests/orthanc_raw_config' Directory Contents -### Files +
+ +

Files

-dicom.json +
-orthanc.json +| **Configuration** | **User docs** | +| :--- | :--- | +| dicom.json | README.md | +| orthanc.json | | + +
diff --git a/pixl_imaging/tests/orthanc_raw_config/orthanc.json b/pixl_imaging/tests/orthanc_raw_config/orthanc.json index 8fac67c5d..00cc51049 100644 --- a/pixl_imaging/tests/orthanc_raw_config/orthanc.json +++ b/pixl_imaging/tests/orthanc_raw_config/orthanc.json @@ -1,7 +1,4 @@ { - "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] - }, "DefaultPrivateCreator" : "UCLH PIXL", "Name" : "${ORTHANC_NAME}", "RemoteAccessAllowed" : true, diff --git a/postgres/README.md b/postgres/README.md index e532b9469..f17a85177 100644 --- a/postgres/README.md +++ b/postgres/README.md @@ -8,11 +8,17 @@ See [/pixl_imaging/alembic](../pixl_imaging/alembic) for how these are defined ## 'PIXL/postgres' Directory Contents -### Files +
+ +

Files

-pixl-db_init.sh +
+ +| **Configuration** | **Code** | **User docs** | +| :--- | :--- | :--- | +| postgres.conf | pixl-db_init.sh | README.md | + +
-postgres.conf -README.md diff --git a/projects/README.md b/projects/README.md index dcfdcd654..f393a0d79 100644 --- a/projects/README.md +++ b/projects/README.md @@ -1,8 +1,26 @@ ## 'PIXL/projects' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[configs](./configs/README.md) [exports](./exports/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/projects/configs/README.md b/projects/configs/README.md index 351680b9b..15420eb30 100644 --- a/projects/configs/README.md +++ b/projects/configs/README.md @@ -1,22 +1,30 @@ ## 'PIXL/projects/configs' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

-[tag-operations](./tag-operations/README.md) - -### Files +
-ms-pinpoint.yaml - -test-external-user.yaml +[tag-operations](./tag-operations/README.md) -test-extract-uclh-omop-cdm-dicomweb.yaml +
-test-extract-uclh-omop-cdm-xnat.yaml +
+ +

Files

-test-extract-uclh-omop-cdm.yaml +
-uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +| **Configuration** | **User docs** | +| :--- | :--- | +| ms-pinpoint.yaml | README.md | +| test-external-user.yaml | | +| test-extract-uclh-omop-cdm-dicomweb.yaml | | +| test-extract-uclh-omop-cdm-xnat.yaml | | +| test-extract-uclh-omop-cdm.yaml | | +| uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml | | +| uclh-prostate-mri-external-dataset.yaml | | -uclh-prostate-mri-external-dataset.yaml +
diff --git a/projects/configs/prognosis-ai.yaml b/projects/configs/prognosis-ai.yaml new file mode 100644 index 000000000..f1292856e --- /dev/null +++ b/projects/configs/prognosis-ai.yaml @@ -0,0 +1,34 @@ +# Copyright (c) 2024 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "prognosis-ai" + modalities: ["MR"] + +tag_operation_files: + base: + - "base.yaml" #Expected base config file for any project + - "mri.yaml" + - "ion-neuro-db.yaml" + manufacturer_overrides: ["mri.yaml"] + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/tag-operations/README.md b/projects/configs/tag-operations/README.md index 83c53c681..d97782d7a 100644 --- a/projects/configs/tag-operations/README.md +++ b/projects/configs/tag-operations/README.md @@ -1,24 +1,31 @@ ## 'PIXL/projects/configs/tag-operations' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

-[manufacturer-overrides](./manufacturer-overrides/README.md) - -### Files - -base.yaml +
-diffusion-weighted-mri.yaml - -mri.yaml +[manufacturer-overrides](./manufacturer-overrides/README.md) -ms-pinpoint.yaml +
-test-external-user.yaml +
+ +

Files

-test-extract-uclh-omop-cdm.yaml +
-uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +| **Configuration** | **User docs** | +| :--- | :--- | +| base.yaml | README.md | +| diffusion-weighted-mri.yaml | | +| mri.yaml | | +| ms-pinpoint.yaml | | +| test-external-user.yaml | | +| test-extract-uclh-omop-cdm.yaml | | +| uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml | | +| xray.yaml | | -xray.yaml +
diff --git a/projects/configs/tag-operations/base.yaml b/projects/configs/tag-operations/base.yaml index 49fc7659a..8b7c3fc58 100644 --- a/projects/configs/tag-operations/base.yaml +++ b/projects/configs/tag-operations/base.yaml @@ -15,17 +15,6 @@ # Default `base.yaml` configuration file with commented links to their respective image modalities # `base.yaml` configuration file is expected to be part of all projects configuration files -#################################### 000d Group ################################### -# -# -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" #################################### 0008 Group ################################### # # @@ -62,7 +51,7 @@ group: 0x0008 element: 0x0020 op: "replace" -- name: "General Study" +- name: "Study Time" group: 0x0008 element: 0x0030 op: "replace" @@ -94,7 +83,7 @@ group: 0x0008 element: 0x1090 op: "keep" -- name: "Referenced Series Sequence" +- name: "Referenced SOP Instance UID" group: 0x0008 element: 0x1155 op: "replace_UID" @@ -106,6 +95,14 @@ group: 0x0008 element: 0x9205 op: "keep" +- name: "Volumetric Properties" + group: 0x0008 + element: 0x9206 + op: "keep" +- name: "Volume Based Calculation Technique" + group: 0x0008 + element: 0x9207 + op: "keep" #################################### 0010 Group ################################### # # @@ -138,16 +135,28 @@ #################################### 0018 Group ################################### # # -- name: "Scan Options Attribute" +- name: "Scan Options" group: 0x0018 element: 0x0022 op: "replace" #CT, MR, X-Ray +- name: Slice Thickness + group: 0x0018 + element: 0x0050 + op: keep - name: "Software Version" group: 0x0018 element: 0x1020 op: "keep" +- name: "Field Of View Dimension" + group: 0x0018 + element: 0x1149 + op: "keep" #CT, MR, PET, US, X-Ray, and RT Images +- name: kVp + group: 0x0018 + element: 0x0060 + op: keep - name: "Focal Spot" group: 0x0018 element: 0x1190 @@ -363,6 +372,7 @@ element: 0x0140 op: "replace_UID" #CT, MR, PET, US, X-Ray, and RT Images + #################################### 3006 Group ################################### # # diff --git a/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml b/projects/configs/tag-operations/ion-neuro-db.yaml similarity index 69% rename from projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml rename to projects/configs/tag-operations/ion-neuro-db.yaml index c90d6d5ec..ad15c76cb 100644 --- a/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/tag-operations/ion-neuro-db.yaml @@ -12,17 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Configuration for extracting data from UCLH OMOP CDM DICOM files +# Specific tag operations for ion-neuro-db project -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 +- name: "Acquisition Date" + group: 0x0008 + element: 0x0022 op: "keep" -- name: "VR OB Creator" - group: 0x0013 - element: 0x0010 - op: "keep" -- name: "VR OB sequence" - group: 0x0013 - element: 0x1010 - op: "replace" + + diff --git a/projects/configs/tag-operations/manufacturer-overrides/README.md b/projects/configs/tag-operations/manufacturer-overrides/README.md index b77c531e3..d0c3127b0 100644 --- a/projects/configs/tag-operations/manufacturer-overrides/README.md +++ b/projects/configs/tag-operations/manufacturer-overrides/README.md @@ -1,8 +1,17 @@ -## 'manufacturer-overrides' Directory Contents +## 'PIXL/projects/configs/tag-operations/manufacturer-overrides' Directory Contents -### Files +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| mri-diffusion.yaml | README.md | +| mri.yaml | | + +
-mri-diffusion.yaml -mri.yaml diff --git a/projects/configs/tag-operations/mr-spectroscopy.yaml b/projects/configs/tag-operations/mr-spectroscopy.yaml new file mode 100644 index 000000000..5042d9979 --- /dev/null +++ b/projects/configs/tag-operations/mr-spectroscopy.yaml @@ -0,0 +1,190 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Device Serial Number + group: 0x0018 + element: 0x1000 + op: keep +- name: Content Qualification + group: 0x0018 + element: 0x9004 + op: keep +- name: Pulse Sequence Name + group: 0x0018 + element: 0x9005 + op: keep +- name: Echo Pulse Sequence + group: 0x0018 + element: 0x9008 + op: keep +- name: Multi-planar Excitation + group: 0x0018 + element: 0x9012 + op: keep +- name: Steady State Pulse Sequence + group: 0x0018 + element: 0x9017 + op: keep +- name: Echo Planar Pulse Sequence + group: 0x0018 + element: 0x9018 + op: keep +- name: Spectrally Selected Suppression + group: 0x0018 + element: 0x9025 + op: keep +- name: Geometry of k-Space Traversal + group: 0x0018 + element: 0x9032 + op: keep +- name: Segmented k-Space Traversal + group: 0x0018 + element: 0x9033 + op: keep +- name: Spectral Width + group: 0x0018 + element: 0x9052 + op: keep +- name: Chemical Shift Reference + group: 0x0018 + element: 0x9053 + op: keep +- name: Volume Localization Technique + group: 0x0018 + element: 0x9054 + op: keep +- name: De-coupling + group: 0x0018 + element: 0x9059 + op: keep +- name: k-space Filtering + group: 0x0018 + element: 0x9064 + op: keep +- name: Time Domain Filtering + group: 0x0018 + element: 0x9065 + op: keep +- name: Number of Zero Fills + group: 0x0018 + element: 0x9066 + op: keep +- name: Baseline Correction + group: 0x0018 + element: 0x9067 + op: keep +- name: Acquisition Duration + group: 0x0018 + element: 0x9073 + op: keep +- name: Number of k-Space Trajectories + group: 0x0018 + element: 0x9093 + op: keep +- name: Transmitter Frequency + group: 0x0018 + element: 0x9098 + op: keep +- name: Resonant Nucleus + group: 0x0018 + element: 0x9100 + op: keep +- name: Frequency Correction + group: 0x0018 + element: 0x9101 + op: keep +- name: Slab Thickness + group: 0x0018 + element: 0x9104 + op: keep +- name: Slab Orientation + group: 0x0018 + element: 0x9105 + op: keep +- name: Mid Slab Position + group: 0x0018 + element: 0x9106 + op: keep +- name: Multiple Spin Echo + group: 0x0018 + element: 0x9011 + op: keep +- name: Volume Localization Sequence + group: 0x0018 + element: 0x9126 + op: keep +- name: Applicable Safety Standard Agency + group: 0x0018 + element: 0x9174 + op: keep +- name: First Order Phase Correction + group: 0x0018 + element: 0x9198 + op: keep +- name: Water Referenced Phase Correction + group: 0x0018 + element: 0x9199 + op: keep +- name: MR Spectroscopy Acquisition Type + group: 0x0018 + element: 0x9200 + op: keep +- name: Dimension Organization UID + group: 0x0020 + element: 0x9164 + op: keep +- name: Dimension Index Pointer + group: 0x0020 + element: 0x9165 + op: keep +- name: Dimension Organization Sequence + group: 0x0020 + element: 0x9221 + op: keep +- name: Dimension Index Sequence + group: 0x0020 + element: 0x9222 + op: keep +- name: Number of Frames + group: 0x0028 + element: 0x0008 + op: keep +- name: Data Point Rows + group: 0x0028 + element: 0x9001 + op: keep +- name: Data Point Columns + group: 0x0028 + element: 0x9002 + op: keep +- name: Signal Domain Columns + group: 0x0028 + element: 0x9003 + op: keep +- name: Data Representation + group: 0x0028 + element: 0x9108 + op: keep +- name: Acquisition Context Sequence + group: 0x0040 + element: 0x0555 + op: keep +- name: Shared Functional Groups Sequence + group: 0x5200 + element: 0x9229 + op: keep +- name: Spectroscopy Data + group: 0x5600 + element: 0x0020 + op: keep diff --git a/projects/configs/tag-operations/mri.yaml b/projects/configs/tag-operations/mri.yaml index 16f6e5000..cf8f5a722 100644 --- a/projects/configs/tag-operations/mri.yaml +++ b/projects/configs/tag-operations/mri.yaml @@ -20,14 +20,18 @@ ################################### 0008 Group ################################### # # -- name: "Volumetric Properties" +- name: Acquisition DateTime group: 0x0008 - element: 0x9206 - op: "keep" -- name: "Volume Based Calculation Technique" + element: 0x002A + op: replace +- name: Content Date group: 0x0008 - element: 0x9207 - op: "keep" + element: 0x0023 + op: replace +- name: Content Time + group: 0x0008 + element: 0x0033 + op: replace - name: "Complex Image Component" group: 0x0008 element: 0x9208 @@ -58,10 +62,6 @@ group: 0x0018 element: 0x0024 op: "keep" -- name: "Slice Thickness" - group: 0x0018 - element: 0x0050 - op: "keep" - name: "Repetition Time" group: 0x0018 element: 0x0080 @@ -112,22 +112,6 @@ group: 0x0018 element: 0x0094 op: "keep" -#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180094 -#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189103/00180094 -#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009229/00189125/00180094 -#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189125/00180094 -- name: "Field Of View Dimension" - group: 0x0018 - element: 0x1149 - op: "keep" -- name: "Imager Pixel Spacing" - group: 0x0018 - element: 0x1164 - op: "keep" -- name: "Grid" - group: 0x0018 - element: 0x1166 - op: "keep" - name: "Receive Coil Name" group: 0x0018 element: 0x1250 diff --git a/projects/configs/tag-operations/rt-dose.yaml b/projects/configs/tag-operations/rt-dose.yaml new file mode 100644 index 000000000..7d68eca0a --- /dev/null +++ b/projects/configs/tag-operations/rt-dose.yaml @@ -0,0 +1,54 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Referenced SOP Class UID + group: 0x0008 + element: 0x1150 + op: keep +- name: Referenced SOP Instance UID + group: 0x0008 + element: 0x1155 + op: replace_UID +- name: Number of Frames + group: 0x0028 + element: 0x0008 + op: keep +- name: Frame Increment Pointer + group: 0x0028 + element: 0x0009 + op: keep +- name: Dose Units + group: 0x3004 + element: 0x0002 + op: keep +- name: Dose Type + group: 0x3004 + element: 0x0004 + op: keep +- name: Dose Summation Type + group: 0x3004 + element: 0x000a + op: keep +- name: Grid Frame Offset Vector + group: 0x3004 + element: 0x000c + op: keep +- name: Dose Grid Scaling + group: 0x3004 + element: 0x000e + op: keep +- name: Referenced RT Plan Sequence + group: 0x300c + element: 0x0002 + op: keep diff --git a/projects/configs/tag-operations/rt-plan.yaml b/projects/configs/tag-operations/rt-plan.yaml new file mode 100644 index 000000000..ce91f3a12 --- /dev/null +++ b/projects/configs/tag-operations/rt-plan.yaml @@ -0,0 +1,46 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Operators Name + group: 0x0008 + element: 0x1070 + op: replace +- name: Referenced SOP Class UID + group: 0x0008 + element: 0x1150 + op: keep +- name: Referenced SOP Instance UID + group: 0x0008 + element: 0x1155 + op: replace_UID +- name: RT Plan Label + group: 0x300a + element: 0x0002 + op: keep +- name: RT Plan Date + group: 0x300a + element: 0x0006 + op: replace +- name: RT Plan Time + group: 0x300a + element: 0x0007 + op: replace +- name: RT Plan Geometry + group: 0x300a + element: 0x000c + op: keep +- name: Referenced Structure Set Sequence + group: 0x300c + element: 0x0060 + op: keep diff --git a/projects/configs/tag-operations/rt-struct.yaml b/projects/configs/tag-operations/rt-struct.yaml new file mode 100644 index 000000000..33434a880 --- /dev/null +++ b/projects/configs/tag-operations/rt-struct.yaml @@ -0,0 +1,114 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Referenced SOP Class UID + group: 0x0008 + element: 0x1150 + op: keep +- name: Referenced SOP Instance UID + group: 0x0008 + element: 0x1155 + op: replace_UID +- name: Structure Set Label + group: 0x3006 + element: 0x0002 + op: keep +- name: Structure Set Date + group: 0x3006 + element: 0x0008 + op: replace +- name: Structure Set Time + group: 0x3006 + element: 0x0009 + op: replace +- name: Contour Image Sequence + group: 0x3006 + element: 0x0016 + op: keep +- name: Structure Set ROI Sequence + group: 0x3006 + element: 0x0020 + op: keep +- name: ROI Number + group: 0x3006 + element: 0x0022 + op: keep +- name: Referenced Frame of Reference Sequence + group: 0x3006 + element: 0x0010 + op: keep +- name: RT Referenced Study Sequence + group: 0x3006 + element: 0x0012 + op: keep +- name: RT Referenced Series Sequence + group: 0x3006 + element: 0x0014 + op: keep +- name: Referenced Frame of Reference UID + group: 0x3006 + element: 0x0024 + op: replace_UID +- name: ROI Name + group: 0x3006 + element: 0x0026 + op: keep +- name: ROI Display Color + group: 0x3006 + element: 0x002a + op: keep +- name: ROI Generation Algorithm + group: 0x3006 + element: 0x0036 + op: keep +- name: ROI Contour Sequence + group: 0x3006 + element: 0x0039 + op: keep +- name: Contour Sequence + group: 0x3006 + element: 0x0040 + op: keep +- name: Contour Geometric Type + group: 0x3006 + element: 0x0042 + op: keep +- name: Number of Contour Points + group: 0x3006 + element: 0x0046 + op: keep +- name: Contour Data + group: 0x3006 + element: 0x0050 + op: keep +- name: RT ROI Observations Sequence + group: 0x3006 + element: 0x0080 + op: keep +- name: Observation Number + group: 0x3006 + element: 0x0082 + op: keep +- name: Referenced ROI Number + group: 0x3006 + element: 0x0084 + op: keep +- name: RT ROI Interpreted Type + group: 0x3006 + element: 0x00A4 + op: keep +- name: ROI Interpreter + group: 0x3006 + element: 0x00A6 + op: keep diff --git a/projects/configs/tag-operations/test-external-user.yaml b/projects/configs/tag-operations/test-external-user.yaml index f316d40fe..b979a95f8 100644 --- a/projects/configs/tag-operations/test-external-user.yaml +++ b/projects/configs/tag-operations/test-external-user.yaml @@ -14,14 +14,6 @@ # Configuration of Standard MR Image for external user -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" - name: "Specific Character Set" group: 0x0008 element: 0x0005 diff --git a/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml deleted file mode 100644 index d313d1647..000000000 --- a/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Configuration for extracting data from UCLH Nasogastric Tube Project DICOM files - -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" -- name: "Specific Character Set" - group: 0x0008 - element: 0x0005 - op: "keep" -- name: "Image Type" - group: 0x0008 - element: 0x0008 - op: "keep" -- name: "SOP Class UID" - group: 0x0008 - element: 0x0016 - op: "keep" -- name: "SOP Instance UID" - group: 0x0008 - element: 0x0018 - op: "replace_UID" -- name: "Study Date" - group: 0x0008 - element: 0x0020 - op: "replace" -- name: "Instance Creator UID" - group: 0x0008 - element: 0x0014 - op: "replace_UID" -- name: "Study Time" - group: 0x0008 - element: 0x0030 - op: "replace" -- name: "Accession Number" - group: 0x0008 - element: 0x0050 - op: "replace" -- name: "Modality" - group: 0x0008 - element: 0x0060 - op: "keep" -- name: "Modalities In Study" - group: 0x0008 - element: 0x0061 - op: "keep" -- name: "Manufacturer" - group: 0x0008 - element: 0x0070 - op: "keep" -- name: "Referring Physicians Name" - group: 0x0008 - element: 0x0090 - op: "replace" -- name: "Study Description" - group: 0x0008 - element: 0x1030 - op: "keep" -- name: "Series Description" - group: 0x0008 - element: 0x103e - op: "keep" -- name: "Manufacturers Model Name" - group: 0x0008 - element: 0x1090 - op: "keep" -- name: "Referenced SOP Instance UID" - group: 0x0008 - element: 0x1155 - op: "replace_UID" -- name: "Patients Name" - group: 0x0010 - element: 0x0010 - op: "replace" -- name: "Patient ID" - group: 0x0010 - element: 0x0020 - op: "secure-hash" -- name: "Patients Birth Date" - group: 0x0010 - element: 0x0030 - op: "replace" -- name: "Patients Sex" - group: 0x0010 - element: 0x0040 - op: "keep" -- name: "Patients Size" - group: 0x0010 - element: 0x1020 - op: "keep" -- name: "Patients Weight" - group: 0x0010 - element: 0x1030 - op: "keep" -- name: "Body Part Examined" - group: 0x0018 - element: 0x0015 - op: "keep" -- name: "Scanning Sequence" - group: 0x0018 - element: 0x0020 - op: "keep" -- name: "Scanning Variant" - group: 0x0018 - element: 0x0021 - op: "keep" -- name: "Scan Options" - group: 0x0018 - element: 0x0022 - op: "keep" -- name: "MR Acquisition Type" - group: 0x0018 - element: 0x0023 - op: "keep" -- name: "Slice Thickness" - group: 0x0018 - element: 0x0050 - op: "keep" -- name: "kVp" - group: 0x0018 - element: 0x0060 - op: "keep" -- name: "Echo Time" - group: 0x0018 - element: 0x0081 - op: "keep" -- name: "Echo Train Length" - group: 0x0018 - element: 0x0091 - op: "keep" -- name: "Software Version" - group: 0x0018 - element: 0x1020 - op: "keep" -- name: "Field Of View Dimension" - group: 0x0018 - element: 0x1149 - op: "keep" -- name: "Exposure Time" - group: 0x0018 - element: 0x1150 - op: "keep" -- name: "X Ray Tube Current" - group: 0x0018 - element: 0x1151 - op: "keep" -- name: "Exposure" - group: 0x0018 - element: 0x1152 - op: "keep" -- name: "Exposure In Uas" - group: 0x0018 - element: 0x1153 - op: "keep" -- name: "Image Area Dose Product" - group: 0x0018 - element: 0x115e - op: "keep" -- name: "Imager Pixel Spacing" - group: 0x0018 - element: 0x1164 - op: "keep" -- name: "Grid" - group: 0x0018 - element: 0x1166 - op: "keep" -- name: "Focal Spot" - group: 0x0018 - element: 0x1190 - op: "keep" -- name: "Acquisition Device Processing Description" - group: 0x0018 - element: 0x1400 - op: "keep" -- name: "Exposure Index" - group: 0x0018 - element: 0x1411 - op: "keep" -- name: "Target Exposure Index" - group: 0x0018 - element: 0x1412 - op: "keep" -- name: "Deviation Index" - group: 0x0018 - element: 0x1413 - op: "keep" -- name: "Positioner Type" - group: 0x0018 - element: 0x1508 - op: "keep" -- name: "Collemator Shape" - group: 0x0018 - element: 0x1700 - op: "keep" -- name: "Vertices Of The Polygonal Collimator" - group: 0x0018 - element: 0x1720 - op: "keep" -- name: "Patient Position" - group: 0x0018 - element: 0x5100 - op: "keep" -- name: "View Position" - group: 0x0018 - element: 0x5101 - op: "keep" -- name: "Sensitivity" - group: 0x0018 - element: 0x6000 - op: "keep" -- name: "Detector Temperature" - group: 0x0018 - element: 0x7001 - op: "keep" -- name: "Detector Type" - group: 0x0018 - element: 0x7004 - op: "keep" -- name: "Detector Configuration" - group: 0x0018 - element: 0x7005 - op: "keep" -- name: "Detector ID" - group: 0x0018 - element: 0x700a - op: "keep" -- name: "Detector Binning" - group: 0x0018 - element: 0x701a - op: "keep" -- name: "Detector Element Physical Size" - group: 0x0018 - element: 0x7020 - op: "keep" -- name: "Detector Element Spacing" - group: 0x0018 - element: 0x7022 - op: "keep" -- name: "Detector Active Shape" - group: 0x0018 - element: 0x7024 - op: "keep" -- name: "Detector Active Dimensions" - group: 0x0018 - element: 0x7026 - op: "keep" -- name: "Field Of View Origin" - group: 0x0018 - element: 0x7030 - op: "keep" -- name: "Field Of View Rotation" - group: 0x0018 - element: 0x7032 - op: "keep" -- name: "Field Of View Horizontal Flip" - group: 0x0018 - element: 0x7034 - op: "keep" -- name: "Grid Focal Distance" - group: 0x0018 - element: 0x704c - op: "keep" -- name: "Exposure Control Mode" - group: 0x0018 - element: 0x7060 - op: "keep" -- name: "Study Instance UID" - group: 0x0020 - element: 0x000d - op: "replace_UID" -- name: "Series Instance UID" - group: 0x0020 - element: 0x000e - op: "replace_UID" -- name: "Study ID" - group: 0x0020 - element: 0x0010 - op: "replace" -- name: "Series Number" - group: 0x0020 - element: 0x0011 - op: "keep" -- name: "Image Number" - group: 0x0020 - element: 0x0013 - op: "keep" -- name: "Patient Orientation" - group: 0x0020 - element: 0x0020 - op: "keep" -- name: "Image Position (Patient)" - group: 0x0020 - element: 0x0032 - op: "keep" -- name: "Image Orientation (Patient)" - group: 0x0020 - element: 0x0037 - op: "keep" -- name: "Position Reference Indicator" - group: 0x0020 - element: 0x1040 - op: "keep" -- name: "Patient Orientation Code Sequence" - group: 0x0054 - element: 0x0410 - op: "keep" -- name: "Image Laterality" - group: 0x0020 - element: 0x0062 - op: "keep" -- name: "Samples Per Pixel" - group: 0x0028 - element: 0x0002 - op: "keep" -- name: "Photometric Interpretation" - group: 0x0028 - element: 0x0004 - op: "keep" -- name: "Rows" - group: 0x0028 - element: 0x0010 - op: "keep" -- name: "Columns" - group: 0x0028 - element: 0x0011 - op: "keep" -- name: "Pixel Spacing" - group: 0x0028 - element: 0x0030 - op: "keep" -- name: "Bits Allocated" - group: 0x0028 - element: 0x0100 - op: "keep" -- name: "Bits Stored" - group: 0x0028 - element: 0x0101 - op: "keep" -- name: "High Bit" - group: 0x0028 - element: 0x0102 - op: "keep" -- name: "Pixel Representation" - group: 0x0028 - element: 0x0103 - op: "keep" -- name: "Quality Control Image" - group: 0x0028 - element: 0x0300 - op: "keep" -- name: "Burned In Annotation" - group: 0x0028 - element: 0x0301 - op: "keep" -- name: "Pixel Spacing Calibration Type" - group: 0x0028 - element: 0x0a02 - op: "keep" -- name: "Pixel Spacing Calibration Description" - group: 0x0028 - element: 0x0a04 - op: "keep" -- name: "Pixel Intensity Relationship" - group: 0x0028 - element: 0x1040 - op: "keep" -- name: "Pixel Intensity Relationship Sign" - group: 0x0028 - element: 0x1041 - op: "keep" -- name: "Window Center" - group: 0x0028 - element: 0x1050 - op: "keep" -- name: "Window Width" - group: 0x0028 - element: 0x1051 - op: "keep" -- name: "Rescale Intercept" - group: 0x0028 - element: 0x1052 - op: "keep" -- name: "Rescale Slope" - group: 0x0028 - element: 0x1053 - op: "keep" -- name: "Rescale Type" - group: 0x0028 - element: 0x1054 - op: "keep" -- name: "Window Center And Width Explanation" - group: 0x0028 - element: 0x1055 - op: "keep" -- name: "Lossy Image Compression" - group: 0x0028 - element: 0x2110 - op: "keep" -- name: "VOI LUT Sequence" - group: 0x0028 - element: 0x3010 - op: "keep" -- name: "View Code Sequence" - group: 0x0054 - element: 0x0220 - op: "keep" -- name: "Frame of Reference UID" - group: 0x0020 - element: 0x0052 - op: "replace_UID" -- name: "Synchronization Frame of Reference UID" - group: 0x0020 - element: 0x0200 - op: "replace_UID" -- name: "Storage Media File-set UID" - group: 0x0088 - element: 0x0140 - op: "replace_UID" -- name: "UID" - group: 0x0040 - element: 0xA124 - op: "replace_UID" -- name: "Referenced Frame of Reference UID" - group: 0x3006 - element: 0x0024 - op: "replace_UID" -- name: "Related Frame of Reference UID" - group: 0x3006 - element: 0x00C2 - op: "replace_UID" -- name: "Pixel Data" - group: 0x7fe0 - element: 0x0010 - op: "keep" diff --git a/projects/configs/tag-operations/xray.yaml b/projects/configs/tag-operations/xray.yaml index 947b0879a..e7952c767 100644 --- a/projects/configs/tag-operations/xray.yaml +++ b/projects/configs/tag-operations/xray.yaml @@ -12,78 +12,167 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Default configuration to extend base for x-rays - -- name: "Volumetric Properties" - group: 0x0008 - element: 0x9206 - op: "keep" -- name: "Volume Based Calculation Technique" - group: 0x0008 - element: 0x9207 - op: "keep" -- name: "Field Of View Dimension" - group: 0x0018 - element: 0x1149 - op: "keep" -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181149 -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181149 -#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181149 -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181149 -#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-detector/00181149 -#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-detector/00181149 -#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-detector/00181149 -- name: "Imager Pixel Spacing" +- name: Patients Size + group: 0x0010 + element: 0x1020 + op: keep +- name: Patients Weight + group: 0x0010 + element: 0x1030 + op: keep +- name: Body Part Examined + group: 0x0018 + element: 0x0015 + op: keep +- name: Exposure Time + group: 0x0018 + element: 0x1150 + op: keep +- name: X Ray Tube Current + group: 0x0018 + element: 0x1151 + op: keep +- name: Exposure + group: 0x0018 + element: 0x1152 + op: keep +- name: Exposure In Uas + group: 0x0018 + element: 0x1153 + op: keep +- name: Image Area Dose Product + group: 0x0018 + element: 0x115e + op: keep +- name: Imager Pixel Spacing group: 0x0018 element: 0x1164 - op: "keep" -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181164 -#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181164 -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181164 -#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-detector/00181164 -#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-detector/00181164 -#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-detector/00181164 -- name: "Grid" + op: keep +- name: Grid group: 0x0018 element: 0x1166 - op: "keep" -#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181166 -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181166 -#https://dicom.innolitics.com/ciods/digital-x-ray-image/x-ray-grid/00181166 -#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/x-ray-grid/00181166 -#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/x-ray-grid/00181166 -#https://dicom.innolitics.com/ciods/x-ray-3d-craniofacial-image/x-ray-3d-craniofacial-acquisition/00189507/00181166 -#https://dicom.innolitics.com/ciods/x-ray-3d-angiographic-image/x-ray-3d-angiographic-acquisition/00189507/00181166 -#https://dicom.innolitics.com/ciods/breast-projection-x-ray-image/breast-projection-x-ray-image-multi-frame-functional-groups/52009229/00189555/00181166 -- name: "Acquisition Device Processing Description" + op: keep +- name: Acquisition Device Processing Description group: 0x0018 element: 0x1400 - op: "keep" -#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-image/00181400 -#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-image/00181400 -#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-image/00181400 -#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-image/00181400 -#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-image/00181400 -#https://dicom.innolitics.com/ciods/x-ray-3d-craniofacial-image/x-ray-3d-craniofacial-image-contributing-sources/00189506/00181400 -#https://dicom.innolitics.com/ciods/x-ray-3d-angiographic-image/x-ray-3d-angiographic-image-contributing-sources/00189506/00181400 -#https://dicom.innolitics.com/ciods/breast-projection-x-ray-image/breast-projection-x-ray-image-multi-frame-functional-groups/52009229/00189412/00181400 -- name: "Pixel Spacing Calibration Type" + op: keep +- name: Exposure Index + group: 0x0018 + element: 0x1411 + op: keep +- name: Target Exposure Index + group: 0x0018 + element: 0x1412 + op: keep +- name: Deviation Index + group: 0x0018 + element: 0x1413 + op: keep +- name: Positioner Type + group: 0x0018 + element: 0x1508 + op: keep +- name: Collemator Shape + group: 0x0018 + element: 0x1700 + op: keep +- name: Vertices Of The Polygonal Collimator + group: 0x0018 + element: 0x1720 + op: keep +- name: View Position + group: 0x0018 + element: 0x5101 + op: keep +- name: Sensitivity + group: 0x0018 + element: 0x6000 + op: keep +- name: Detector Temperature + group: 0x0018 + element: 0x7001 + op: keep +- name: Detector Type + group: 0x0018 + element: 0x7004 + op: keep +- name: Detector Configuration + group: 0x0018 + element: 0x7005 + op: keep +- name: Detector ID + group: 0x0018 + element: 0x700a + op: keep +- name: Detector Binning + group: 0x0018 + element: 0x701a + op: keep +- name: Detector Element Physical Size + group: 0x0018 + element: 0x7020 + op: keep +- name: Detector Element Spacing + group: 0x0018 + element: 0x7022 + op: keep +- name: Detector Active Shape + group: 0x0018 + element: 0x7024 + op: keep +- name: Detector Active Dimensions + group: 0x0018 + element: 0x7026 + op: keep +- name: Field Of View Origin + group: 0x0018 + element: 0x7030 + op: keep +- name: Field Of View Rotation + group: 0x0018 + element: 0x7032 + op: keep +- name: Field Of View Horizontal Flip + group: 0x0018 + element: 0x7034 + op: keep +- name: Grid Focal Distance + group: 0x0018 + element: 0x704c + op: keep +- name: Exposure Control Mode + group: 0x0018 + element: 0x7060 + op: keep +- name: Quality Control Image + group: 0x0028 + element: 0x0300 + op: keep +- name: Pixel Spacing Calibration Type group: 0x0028 element: 0x0a02 - op: "keep" -#RT, X-Ray -- name: "Pixel Spacing Calibration Description" + op: keep +- name: Pixel Spacing Calibration Description group: 0x0028 element: 0x0a04 - op: "keep" -#X-Ray -- name: "Pixel Intensity Relationship" + op: keep +- name: Pixel Intensity Relationship group: 0x0028 element: 0x1040 - op: "keep" -#X-Ray -- name: "Pixel Intensity Relationship Sign" + op: keep +- name: Pixel Intensity Relationship Sign group: 0x0028 element: 0x1041 - op: "keep" -#X-Ray + op: keep +- name: View Code Sequence + group: 0x0054 + element: 0x1050 + op: keep +- name: Patient Orientation Code Sequence + group: 0x0054 + element: 0x0410 + op: keep +- name: Presentation LUT Shape + group: 0x2050 + element: 0x0020 + op: keep diff --git a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml index bec9c9bc3..a2463fb51 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml @@ -15,12 +15,13 @@ project: name: "test-extract-uclh-omop-cdm-dicomweb" azure_kv_alias: "test" - modalities: ["DX", "CR"] + modalities: ["DX", "CR", "MR"] tag_operation_files: base: - "base.yaml" #Expected base config file for any project - "mri.yaml" + - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] series_filters: diff --git a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml index 9db5648d5..2ed67c450 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml @@ -21,6 +21,7 @@ tag_operation_files: base: - "base.yaml" #Expected base config file for any project - "mri.yaml" + - "xray.yaml" manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] series_filters: diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index 7ff106a1f..52199ef1e 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -21,8 +21,8 @@ tag_operation_files: base: - "base.yaml" #Expected base config file for any project - "mri.yaml" - - "test-extract-uclh-omop-cdm.yaml" - manufacturer_overrides: ["mri-diffusion.yaml"] + - "xray.yaml" + manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] series_filters: - "localizer" diff --git a/projects/configs/test-mr-spectroscopy.yaml b/projects/configs/test-mr-spectroscopy.yaml new file mode 100644 index 000000000..6571deb1e --- /dev/null +++ b/projects/configs/test-mr-spectroscopy.yaml @@ -0,0 +1,35 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "test-mr-spectroscopy" + modalities: ["MR"] + +tag_operation_files: + base: + - "base.yaml" + - "mri.yaml" + - "mr-spectroscopy.yaml" + manufacturer_overrides: + - "mri.yaml" + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/test-radiotherapy.yaml b/projects/configs/test-radiotherapy.yaml new file mode 100644 index 000000000..029610699 --- /dev/null +++ b/projects/configs/test-radiotherapy.yaml @@ -0,0 +1,39 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "test-radiotherapy" + modalities: + - "CT" + - "RTDOSE" + - "RTPLAN" + - "RTSTRUCT" + +tag_operation_files: + base: + - "base.yaml" + - "rt-dose.yaml" + - "rt-plan.yaml" + - "rt-struct.yaml" + manufacturer_overrides: null + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml index 6b49a09e5..c54eac399 100644 --- a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml @@ -20,7 +20,7 @@ project: tag_operation_files: base: - "base.yaml" #Expected base config file for any project - - "uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml" + - "xray.yaml" manufacturer_overrides: null destination: dicom: "ftps" diff --git a/pytest-pixl/README.md b/pytest-pixl/README.md index 30c8c4c21..2a17fcff4 100644 --- a/pytest-pixl/README.md +++ b/pytest-pixl/README.md @@ -27,6 +27,8 @@ the following environment variables: ## Available testing utilities +Functions from 'PIXL/pytest-pixl/src/pytest_pixl': + - `dicom.write_volume`: write a volume of MRI DICOMs for testing - `dicom.generate_dicom_dataset`: generate a DICOM dataset for testing @@ -46,7 +48,7 @@ Currently we only handle the following dictionary keys - `window_width` - `pixel_data` -This is useful for example when generating DICOM datasets for a volume of slices. +This is useful when generating DICOM datasets for a volume of slices. See for example `dicom.write_volume()`. In addition to the tags dictionary, `generate_dicom_dataset()` has a `**kwargs` parameter that @@ -64,7 +66,7 @@ and `private_tags` is a list of `tuple`s with the following format: [(tag_id, VR, value), ...] ``` -where `tag` can be a `str`, `int` or `Tuple[int, int]`, `VR` is a `str` and `value` is a `str`. +where `tag_id` can be a `str`, `int` or `Tuple[int, int]`, `VR` is a `str` and `value` is a `str`. Note that this requires the [VR](https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_6.2.html) of the tag to be known. @@ -104,7 +106,11 @@ with the details implemented in [`pytest_pixl.dicom._create_default_json`](./src ## 'PIXL/pytest-pixl' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[scripts](./scripts/README.md) @@ -112,9 +118,19 @@ with the details implemented in [`pytest_pixl.dicom._create_default_json`](./src [tests](./tests/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| pyproject.toml | README.md | + +
-pyproject.toml -README.md diff --git a/pytest-pixl/pyproject.toml b/pytest-pixl/pyproject.toml index 839d8382a..2f7e1c232 100644 --- a/pytest-pixl/pyproject.toml +++ b/pytest-pixl/pyproject.toml @@ -10,17 +10,17 @@ classifiers = [ ] dependencies = [ "pydicom==2.4.4", - "pyftpdlib==1.5.10", - "pyOpenSSL==24.2.1", + "pyftpdlib==2.0.1", + "pyOpenSSL==24.3.0", "python-decouple==3.8", "pytest-loguru==0.4.0", - "requests==2.32.2", + "requests==2.32.3", ] [project.optional-dependencies] test = [ "core==0.2.0rc0", - "pytest==8.3.2", + "pytest==8.3.4", ] dev = [ "core[dev]==0.2.0rc0", diff --git a/pytest-pixl/scripts/README.md b/pytest-pixl/scripts/README.md index 65085443d..c9259c4e5 100644 --- a/pytest-pixl/scripts/README.md +++ b/pytest-pixl/scripts/README.md @@ -1,6 +1,14 @@ ## 'PIXL/pytest-pixl/scripts' Directory Contents -### Files +
+ +

Files

-create_default_dicom_tags_json.py +
+ +| **Code** | **User docs** | +| :--- | :--- | +| create_default_dicom_tags_json.py | README.md | + +
diff --git a/pytest-pixl/src/README.md b/pytest-pixl/src/README.md index 138dffa26..9a38c50e5 100644 --- a/pytest-pixl/src/README.md +++ b/pytest-pixl/src/README.md @@ -1,8 +1,26 @@ ## 'PIXL/pytest-pixl/src' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[pytest_pixl](./pytest_pixl/README.md) [resources](./resources/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pytest-pixl/src/pytest_pixl/README.md b/pytest-pixl/src/pytest_pixl/README.md index 98744d18c..0e5186840 100644 --- a/pytest-pixl/src/pytest_pixl/README.md +++ b/pytest-pixl/src/pytest_pixl/README.md @@ -1,18 +1,28 @@ ## 'PIXL/pytest-pixl/src/pytest_pixl' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

-[data](./data/README.md) +
-### Files +[data](./data/README.md) -dicom.py +
-ftpserver.py +
+ +

Files

-helpers.py +
-plugin.py +| **Code** | **User docs** | +| :--- | :--- | +| dicom.py | README.md | +| ftpserver.py | | +| helpers.py | | +| plugin.py | | +| __init__.py | | -__init__.py +
diff --git a/pytest-pixl/src/pytest_pixl/data/README.md b/pytest-pixl/src/pytest_pixl/data/README.md index cfa58b0bb..09b8b2a13 100644 --- a/pytest-pixl/src/pytest_pixl/data/README.md +++ b/pytest-pixl/src/pytest_pixl/data/README.md @@ -1,12 +1,25 @@ ## 'PIXL/pytest-pixl/src/pytest_pixl/data' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[omop-resources](./omop-resources/README.md) -### Files +
+ +
+ +

Files

+ +
-default_dicom_tags.json +| **Configuration** | **User docs** | +| :--- | :--- | +| default_dicom_tags.json | README.md | +| volume_dicom_variables.json | | -volume_dicom_variables.json +
diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/README.md b/pytest-pixl/src/pytest_pixl/data/omop-resources/README.md index 7a457c566..780ec047f 100644 --- a/pytest-pixl/src/pytest_pixl/data/omop-resources/README.md +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/README.md @@ -1,18 +1,28 @@ -## 'omop-resources' Directory Contents +## 'PIXL/pytest-pixl/src/pytest_pixl/data/omop-resources' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

-[omop](./omop/README.md) +
-### Files +[omop](./omop/README.md) -batch_input.csv +
-duplicate_input.csv +
+ +

Files

-multiple_projects.csv +
-participant_id.csv +| **Data** | **User docs** | +| :--- | :--- | +| batch_input.csv | README.md | +| duplicate_input.csv | | +| multiple_projects.csv | | +| participant_id.csv | | +| test.csv | | -test.csv +
diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/README.md b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/README.md index e5d45c492..328a0fc7c 100644 --- a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/README.md +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/README.md @@ -1,12 +1,26 @@ -## 'omop' Directory Contents +## 'PIXL/pytest-pixl/src/pytest_pixl/data/omop-resources/omop' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[private](./private/README.md) [public](./public/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| extract_summary.json | README.md | -extract_summary.json +
diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/README.md b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/README.md index 2e47873ed..81c4746bc 100644 --- a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/README.md +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/README.md @@ -1,8 +1,14 @@ -## 'private' Directory Contents +## 'PIXL/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private' Directory Contents -### Files +
+ +

Files

-PERSON_LINKS.parquet +
-PROCEDURE_OCCURRENCE_LINKS.parquet +| **Data** | **User docs** | +| :--- | :--- | +| PERSON_LINKS.parquet | README.md | +| PROCEDURE_OCCURRENCE_LINKS.parquet | | +
diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/public/README.md b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/public/README.md index d35e10141..d446f6902 100644 --- a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/public/README.md +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/public/README.md @@ -1,6 +1,14 @@ -## 'public' Directory Contents +## 'PIXL/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/public' Directory Contents -### Files +
+ +

Files

-PROCEDURE_OCCURRENCE.parquet +
+ +| **Data** | **User docs** | +| :--- | :--- | +| PROCEDURE_OCCURRENCE.parquet | README.md | + +
diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/test_whitespace_and_na_processing.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/test_whitespace_and_na_processing.csv new file mode 100644 index 000000000..8f0669be7 --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/test_whitespace_and_na_processing.csv @@ -0,0 +1,5 @@ +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +0, patient_identifier , 123456789 , ms-pinpoint-test ,2023-01-01T00:01:00Z,2022-01-01, 1.2.3.4.5.6.7.8 , +1, patient_identifier , 123456789 , ms-pinpoint-test ,2023-01-01T00:01:00Z,2022-01-01, 1.2.3.4.5.6.7.8 , +2, whitespace_idenfifiers , ,ms-pinpoint-test,2023-01-01T00:01:00Z,2022-01-01, , +3, NA_idenfifiers ,,ms-pinpoint-test,2023-01-01T00:01:00Z,2022-01-01,, diff --git a/pytest-pixl/src/pytest_pixl/dicom.py b/pytest-pixl/src/pytest_pixl/dicom.py index 87735239e..89a2f8d3e 100644 --- a/pytest-pixl/src/pytest_pixl/dicom.py +++ b/pytest-pixl/src/pytest_pixl/dicom.py @@ -62,6 +62,18 @@ def write_volume(filename_pattern: str) -> None: "pixel_data": None, } +# Mapping based on: https://dicom.nema.org/medical/dicom/current/output/chtml/part04/sect_b.5.html +MODALITY_TO_CLASS_UID = { + "CR": "1.2.840.10008.5.1.4.1.1.1", + "CT": "1.2.840.10008.5.1.4.1.1.2", + "DX": "1.2.840.10008.5.1.4.1.1.1.1", + "MR": "1.2.840.10008.5.1.4.1.1.4", + "PT": "1.2.840.10008.5.1.4.1.1.128", + "RTDOSE": "1.2.840.10008.5.1.4.1.1.481.2", + "RTSTRUCT": "1.2.840.10008.5.1.4.1.1.481.3", + "RTPLAN": "1.2.840.10008.5.1.4.1.1.481.5", +} + def generate_dicom_dataset(tag_values: dict = TAGS_DICT, **kwargs: Any) -> Dataset: """ @@ -111,6 +123,9 @@ def generate_dicom_dataset(tag_values: dict = TAGS_DICT, **kwargs: Any) -> Datas msg = f"Tag {key} is not a valid DICOM tag" raise ValueError(msg) + if "Modality" in kwargs and "SOPClassUID" not in kwargs: + ds.SOPClassUID = MODALITY_TO_CLASS_UID[kwargs["Modality"]] + return ds diff --git a/pytest-pixl/src/resources/README.md b/pytest-pixl/src/resources/README.md index 8c118dc40..591013d8e 100644 --- a/pytest-pixl/src/resources/README.md +++ b/pytest-pixl/src/resources/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pytest-pixl/src/resources' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[ssl](./ssl/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pytest-pixl/src/resources/ssl/README.md b/pytest-pixl/src/resources/ssl/README.md index 20c619682..d6d2ec754 100644 --- a/pytest-pixl/src/resources/ssl/README.md +++ b/pytest-pixl/src/resources/ssl/README.md @@ -1,8 +1,15 @@ ## 'PIXL/pytest-pixl/src/resources/ssl' Directory Contents -### Files +
+ +

Files

-localhost.crt +
-localhost.key +| **Keys** | **User docs** | +| :--- | :--- | +| localhost.crt | README.md | +| localhost.key | | + +
diff --git a/pytest-pixl/tests/README.md b/pytest-pixl/tests/README.md index d7c2900b2..f4b67339b 100644 --- a/pytest-pixl/tests/README.md +++ b/pytest-pixl/tests/README.md @@ -1,12 +1,25 @@ ## 'PIXL/pytest-pixl/tests' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[samples_for_fixture_tests](./samples_for_fixture_tests/README.md) -### Files +
+ +
+ +

Files

+ +
-conftest.py +| **Code** | **User docs** | +| :--- | :--- | +| conftest.py | README.md | +| test_ftpserver_fixture.py | | -test_ftpserver_fixture.py +
diff --git a/pytest-pixl/tests/samples_for_fixture_tests/README.md b/pytest-pixl/tests/samples_for_fixture_tests/README.md index ee9d21902..7ff4031d4 100644 --- a/pytest-pixl/tests/samples_for_fixture_tests/README.md +++ b/pytest-pixl/tests/samples_for_fixture_tests/README.md @@ -1,6 +1,24 @@ ## 'PIXL/pytest-pixl/tests/samples_for_fixture_tests' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[test_ftpserver_fixture](./test_ftpserver_fixture/README.md) +
+ +
+ +

Files

+ +
+ +| **User docs** | +| :--- | +| README.md | + +
+ diff --git a/pytest-pixl/tests/samples_for_fixture_tests/test_ftpserver_fixture/README.md b/pytest-pixl/tests/samples_for_fixture_tests/test_ftpserver_fixture/README.md index 06effc923..ab97db7b0 100644 --- a/pytest-pixl/tests/samples_for_fixture_tests/test_ftpserver_fixture/README.md +++ b/pytest-pixl/tests/samples_for_fixture_tests/test_ftpserver_fixture/README.md @@ -1,6 +1,14 @@ ## 'PIXL/pytest-pixl/tests/samples_for_fixture_tests/test_ftpserver_fixture' Directory Contents -### Files +
+ +

Files

-test_ftpserver_login.py +
+ +| **Code** | **User docs** | +| :--- | :--- | +| test_ftpserver_login.py | README.md | + +
diff --git a/schemas/README.md b/schemas/README.md index 432a9cc7b..0e34f82ee 100644 --- a/schemas/README.md +++ b/schemas/README.md @@ -1,6 +1,14 @@ ## 'PIXL/schemas' Directory Contents -### Files +
+ +

Files

-github-issue-forms.json +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| github-issue-forms.json | README.md | + +
diff --git a/scripts/README.md b/scripts/README.md index 26ff995a3..992fc993e 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,12 +1,17 @@ ## 'PIXL/scripts' Directory Contents -### Files +
+ +

Files

-cmove_all_studies.py +
-delete_oldest_n_studies.py +| **Code** | **User docs** | +| :--- | :--- | +| cmove_all_studies.py | README.md | +| delete_oldest_n_studies.py | | +| filter_cohort_for_those_present_in_raw.py | | +| list_newest_n_studies.py | | -filter_cohort_for_those_present_in_raw.py - -list_newest_n_studies.py +
diff --git a/test/.env b/test/.env index ad9b56d91..5b2f8a634 100644 --- a/test/.env +++ b/test/.env @@ -54,7 +54,6 @@ ORTHANC_ANON_URL=http://orthanc-anon:8042 ORTHANC_ANON_USERNAME=orthanc_anon_username ORTHANC_ANON_PASSWORD=orthanc_anon_password ORTHANC_ANON_AE_TITLE=ORTHANCANON -ORTHANC_ANON_HTTP_TIMEOUT=60 ENABLE_DICOM_WEB=true ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT=true STUDY_TIME_OFFSET=0 diff --git a/test/README.md b/test/README.md index b1c25278f..20495378b 100644 --- a/test/README.md +++ b/test/README.md @@ -39,9 +39,9 @@ For CI, there is also another subcommand to run pytest, reporting coverage ## The `pytest-pixl` plugin -We provide a [`pytest` plugin](../pytest-pixl/README.md) with shared functionality for PIXL system +We provide a [`pytest` plugin](../pytest-pixl/README.md) with shared functionality for the PIXL system and unit tests. This includes an `ftp_server` fixture to spin up a lightweight FTP server, -to mock the FTP server used by the Data Safe Haven. +to mock an FTP server being used to deposit the data. ## File organisation @@ -51,7 +51,7 @@ to mock the FTP server used by the Data Safe Haven. ### Scripts -`./scripts` contains bash and Python scripts to check the individual components of the system test. +`../scripts` contains bash and Python scripts to check the individual components of the system test. ### Resources @@ -72,7 +72,11 @@ but configured to upload to a [DICOMweb server](#dicomweb-config) ## 'PIXL/test' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[dicomweb_config](./dicomweb_config/README.md) @@ -80,23 +84,21 @@ but configured to upload to a [DICOMweb server](#dicomweb-config) [vna_config](./vna_config/README.md) -### Files - -.env - -.secrets.env.sample - -conftest.py - -docker-compose.yml - -README.md +
-run-system-test.sh +
+ +

Files

-system_test.py +
-test_parquet_exports.py +|**Configuration** | **Code** | **User docs** | +| :--- | :--- | :--- | +| .env | conftest.py | README.md | +| .secrets.env.sample | run-system-test.sh | | +| docker-compose.yml | system_test.py | | +| | test_parquet_exports.py | | +| | utils.py | | -utils.py +
diff --git a/test/dicomweb_config/README.md b/test/dicomweb_config/README.md index 41729879a..23d069559 100644 --- a/test/dicomweb_config/README.md +++ b/test/dicomweb_config/README.md @@ -1,10 +1,15 @@ ## 'PIXL/test/dicomweb_config' Directory Contents -### Files +
+ +

Files

-.env +
-dicom.json +| **Configuration** | **User docs** | +| :--- | :--- | +| dicom.json | README.md | +| orthanc.json | | -orthanc.json +
diff --git a/test/resources/README.md b/test/resources/README.md index 132dcb141..f633fe31e 100644 --- a/test/resources/README.md +++ b/test/resources/README.md @@ -1,14 +1,27 @@ ## 'PIXL/test/resources' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[omop](./omop/README.md) [omop-dicomweb](./omop-dicomweb/README.md) -### Files +
+ +
+ +

Files

+ +
-Dicom1.dcm +| **Dicom** | **User docs** | +| :--- | :--- | +| Dicom1.dcm | README.md | +| Dicom2.dcm | | -Dicom2.dcm +
diff --git a/test/resources/omop-dicomweb/README.md b/test/resources/omop-dicomweb/README.md index 0847fb646..1d1788ac7 100644 --- a/test/resources/omop-dicomweb/README.md +++ b/test/resources/omop-dicomweb/README.md @@ -1,12 +1,26 @@ -## 'omop-dicomweb' Directory Contents +## 'PIXL/test/resources/omop-dicomweb' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[private](./private/README.md) [public](./public/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| extract_summary.json | README.md | -extract_summary.json +
diff --git a/test/resources/omop-dicomweb/private/README.md b/test/resources/omop-dicomweb/private/README.md index 2e47873ed..92299c635 100644 --- a/test/resources/omop-dicomweb/private/README.md +++ b/test/resources/omop-dicomweb/private/README.md @@ -1,8 +1,16 @@ -## 'private' Directory Contents +## 'PIXL/test/resources/omop-dicomweb/private' Directory Contents -### Files +
+ +

Files

-PERSON_LINKS.parquet +
+ +| **Data** | **User docs** | +| :--- | :--- | +| PERSON_LINKS.parquet | README.md | +| PROCEDURE_OCCURRENCE_LINKS.parquet | | + +
-PROCEDURE_OCCURRENCE_LINKS.parquet diff --git a/test/resources/omop-dicomweb/public/README.md b/test/resources/omop-dicomweb/public/README.md index d35e10141..98de24d26 100644 --- a/test/resources/omop-dicomweb/public/README.md +++ b/test/resources/omop-dicomweb/public/README.md @@ -1,6 +1,14 @@ -## 'public' Directory Contents +## 'PIXL/test/resources/omop-dicomweb/public' Directory Contents -### Files +
+ +

Files

-PROCEDURE_OCCURRENCE.parquet +
+ +| **Data** | **User docs** | +| :--- | :--- | +| PROCEDURE_OCCURRENCE.parquet | README.md | + +
diff --git a/test/resources/omop/README.md b/test/resources/omop/README.md index f8f0010b4..228f6840a 100644 --- a/test/resources/omop/README.md +++ b/test/resources/omop/README.md @@ -1,12 +1,26 @@ ## 'PIXL/test/resources/omop' Directory Contents -### Subdirectories +
+ +

Subdirectories with links to the relevant README

+ +
[private](./private/README.md) [public](./public/README.md) -### Files +
+ +
+ +

Files

+ +
+ +| **Configuration** | **User docs** | +| :--- | :--- | +| extract_summary.json | README.md | -extract_summary.json +
diff --git a/test/resources/omop/private/README.md b/test/resources/omop/private/README.md index d0082ec64..969599330 100644 --- a/test/resources/omop/private/README.md +++ b/test/resources/omop/private/README.md @@ -1,8 +1,15 @@ ## 'PIXL/test/resources/omop/private' Directory Contents -### Files +
+ +

Files

-PERSON_LINKS.parquet +
-PROCEDURE_OCCURRENCE_LINKS.parquet +| **Data** | **User docs** | +| :--- | :--- | +| PERSON_LINKS.parquet | README.md | +| PROCEDURE_OCCURRENCE_LINKS.parquet | | + +
diff --git a/test/resources/omop/public/README.md b/test/resources/omop/public/README.md index be9075cd4..e10168fbd 100644 --- a/test/resources/omop/public/README.md +++ b/test/resources/omop/public/README.md @@ -1,6 +1,14 @@ ## 'PIXL/test/resources/omop/public' Directory Contents -### Files +
+ +

Files

-PROCEDURE_OCCURRENCE.parquet +
+ +| **Data** | **User docs** | +| :--- | :--- | +| PROCEDURE_OCCURRENCE.parquet | README.md | + +
diff --git a/test/system_test.py b/test/system_test.py index 00caa2f96..6fc53b0c0 100644 --- a/test/system_test.py +++ b/test/system_test.py @@ -19,7 +19,6 @@ import pydicom import pytest import requests -from core.dicom_tags import DICOM_TAG_PROJECT_NAME from loguru import logger from pydicom.uid import UID from pytest_check import check @@ -169,22 +168,6 @@ def _check_dcm_tags_from_zip( dcm.get("StudyInstanceUID") == zip_path.stem ) # StudyInstanceUID stores the pseudo study id post anon actual_instances.add((dcm.get("AccessionNumber"), dcm.get("SeriesDescription"))) - block = dcm.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - tag_offset = DICOM_TAG_PROJECT_NAME.offset_id - private_tag = block[tag_offset] - assert private_tag is not None - if isinstance(private_tag.value, bytes): - # Allow this for the time being, until it has been investigated - # See https://github.com/SAFEHR-data/PIXL/issues/363 - logger.error( - "TEMPORARILY IGNORE: tag value {} should be of type str, but is of type bytes", - private_tag.value, - ) - assert private_tag.value.decode() == TestFtpsUpload.project_slug - else: - assert private_tag.value == TestFtpsUpload.project_slug # check the basic info about the instances exactly matches with check: assert actual_instances == expected_study["instances"] diff --git a/test/vna_config/README.md b/test/vna_config/README.md index ee4cd9d44..d5f0491e9 100644 --- a/test/vna_config/README.md +++ b/test/vna_config/README.md @@ -1,8 +1,15 @@ ## 'PIXL/test/vna_config' Directory Contents -### Files +
+ +

Files

-dicom.json +
-orthanc.json +| **Configuration** | **User docs** | +| :--- | :--- | +| dicom.json | README.md | +| orthanc.json | | + +