Skip to content

Commit

Permalink
[BF] Auto extractors and merge regex expressions (#275)
Browse files Browse the repository at this point in the history
* fix extract_auto_entities, it was using every auto_extractor found. + fix merge auto_extractors with custom extractors

* fix dict and add docstring

* add random key to show fix extractors and auto_extractors

* fix typo

---------

Co-authored-by: bora2502 <[email protected]>
  • Loading branch information
arnaudbore and bora2502 authored Oct 11, 2023
1 parent 6572021 commit cac5a25
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 9 deletions.
23 changes: 15 additions & 8 deletions dcm2bids/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from dcm2bids.acquisition import Acquisition
from dcm2bids.utils.io import load_json
from dcm2bids.utils.utils import DEFAULT, convert_dir, splitext_
from dcm2bids.utils.utils import DEFAULT, convert_dir, combine_dict_extractors, splitext_

compare_float_keys = ["lt", "gt", "le", "ge", "btw", "btwe"]

Expand Down Expand Up @@ -428,7 +428,6 @@ def searchDcmTagEntity(self, sidecar, desc):
descWithTask = desc.copy()
concatenated_matches = {}
entities = []

if "custom_entities" in desc.keys() or self.auto_extract_entities:
if 'custom_entities' in desc.keys():
if isinstance(descWithTask["custom_entities"], str):
Expand All @@ -437,7 +436,8 @@ def searchDcmTagEntity(self, sidecar, desc):
descWithTask["custom_entities"] = []

if self.auto_extract_entities:
self.extractors.update(DEFAULT.auto_extractors)
self.extractors = combine_dict_extractors(self.extractors, DEFAULT.auto_extractors)


for dcmTag in self.extractors:
if dcmTag in sidecar.data.keys():
Expand All @@ -455,8 +455,14 @@ def searchDcmTagEntity(self, sidecar, desc):
compile_regex.search(curr_dcmInfo).groupdict())
break

# Keep entities asked in custom_entities
# If dir found in custom_entities and concatenated_matches.keys we keep it
if "custom_entities" in desc.keys():
entities = set(concatenated_matches.keys()).union(set(descWithTask["custom_entities"]))
entities = set(concatenated_matches.keys()).intersection(set(descWithTask["custom_entities"]))

# custom_entities not a key for extractor or auto_extract_entities
complete_entities = [ent for ent in descWithTask["custom_entities"] if '-' in ent]
entities = entities.union(set(complete_entities))

if self.auto_extract_entities:
auto_acq = '_'.join([descWithTask['datatype'], descWithTask["suffix"]])
Expand All @@ -467,10 +473,11 @@ def searchDcmTagEntity(self, sidecar, desc):
if left_auto_entities:
self.logger.warning(f"{left_auto_entities} have not been found for datatype '{descWithTask['datatype']}' "
f"and suffix '{descWithTask['suffix']}'.")
else:
entities = list(entities) + DEFAULT.auto_entities[auto_acq]
entities = list(set(entities))
descWithTask["custom_entities"] = entities

entities = list(entities) + list(auto_entities)
entities = list(set(entities))
descWithTask["custom_entities"] = entities


for curr_entity in entities:
if curr_entity in concatenated_matches.keys():
Expand Down
16 changes: 16 additions & 0 deletions dcm2bids/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,22 @@ def convert_dir(dir):
return DEFAULT.entity_dir[dir]


def combine_dict_extractors(d1, d2):
""" combine dict
Args:
d1 (dic): dictionary
d2 (dic): dictionary
Returns:
dict: dictionary with combined information
if d1 d2 use the same keys, return dict will return a list of items.
"""
return {
k: [d[k][0] for d in (d1, d2) if k in d]
for k in set(d1.keys()) | set(d2.keys())
}


class TreePrinter:
"""
Generates and prints a tree representation of a given a directory.
Expand Down
3 changes: 2 additions & 1 deletion tests/data/config_test_auto_extract.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"search_method": "re",
"extractors": {"SeriesDescription": ["random-(?P<random>[a-zA-Z0-9]+)"]},
"descriptions": [
{
"datatype": "localizer",
Expand All @@ -12,7 +13,7 @@
"id": "func_task-rest",
"datatype": "func",
"suffix": "bold",
"custom_entities": ["acq-highres"],
"custom_entities": ["acq-highres", "random"],
"criteria": {
"SeriesDescription": ".*bold.*"
}
Expand Down

0 comments on commit cac5a25

Please sign in to comment.