Skip to content

Commit

Permalink
Merge pull request #398 from European-XFEL/aliases_location
Browse files Browse the repository at this point in the history
Add support for a default aliases file
  • Loading branch information
JamesWrigley authored May 5, 2023
2 parents 808afae + 871512b commit cfca21f
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 3 deletions.
38 changes: 35 additions & 3 deletions extra_data/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ def _load_aliases_from_file(self, aliases_path):
with open(aliases_path, 'r') as f:
data = json.load(f)

elif aliases_path.suffix == '.yaml':
elif aliases_path.suffix in ['.yaml', '.yml']:
import yaml

with open(aliases_path, 'r') as f:
Expand Down Expand Up @@ -1617,6 +1617,13 @@ def __getitem__(self, aliased_item):

raise TypeError('expected alias or (source alias, key) tuple')

def __contains__(self, aliased_item):
try:
self[aliased_item]
return True
except KeyError:
return False

def _resolve_aliased_selection(self, selection):
if isinstance(selection, dict):
res = {self._resolve_source_alias(alias): keys
Expand Down Expand Up @@ -1786,10 +1793,12 @@ def RunDirectory(
# any code was already using it.
RunHandler = RunDirectory

DEFAULT_ALIASES_FILE = "{}/usr/extra-data-aliases.yml"

def open_run(
proposal, run, data='raw', include='*', file_filter=locality.lc_any, *,
inc_suspect_trains=True, parallelize=True, _use_voview=True,
inc_suspect_trains=True, parallelize=True, aliases=DEFAULT_ALIASES_FILE,
_use_voview=True,
):
"""Access EuXFEL data on the Maxwell cluster by proposal and run number.
Expand Down Expand Up @@ -1824,6 +1833,13 @@ def open_run(
Enable or disable opening files in parallel. Particularly useful if
creating child processes is not allowed (e.g. in a daemonized
:class:`multiprocessing.Process`).
aliases: str, Path
Path to an alias file for the run, see the documentation for
:meth:`DataCollection.with_aliases` for details. If the
argument is a string with a format argument like
``{}/path/to/aliases.yml``, then the format argument will be replaced with
the proposal directory path. By default it looks for a file named
``{}/usr/extra-data-aliases.yml``.
"""
if data == 'all':
common_args = dict(
Expand Down Expand Up @@ -1871,8 +1887,24 @@ def open_run(
run = index(run) # Allow integers, including numpy integers
run = 'r' + str(run).zfill(4)

return RunDirectory(
dc = RunDirectory(
osp.join(prop_dir, data, run), include=include, file_filter=file_filter,
inc_suspect_trains=inc_suspect_trains, parallelize=parallelize,
_use_voview=_use_voview,
)

# Normalize string arguments to be an absolute Path
if isinstance(aliases, str):
aliases = Path(aliases.format(prop_dir))

# If we're using the default aliases file and it doesn't exist, ignore it
# without throwing any errors.
default_aliases = Path(DEFAULT_ALIASES_FILE.format(prop_dir))
if aliases == default_aliases and not default_aliases.is_file():
aliases = None

if aliases is not None:
dc = dc.with_aliases(aliases)
print(f"Loading {len(dc._aliases)} aliases from: {aliases}", flush=True, file=sys.stderr)

return dc
6 changes: 6 additions & 0 deletions extra_data/tests/test_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def assert_equal_keydata(kd1, kd2):
# Test whether source alias yields identical SourceData.
assert run.alias['sa3-xgm'] is run['SA3_XTD10_XGM/XGM/DOOCS']

# Test __contains__()
assert "sa3-xgm" in run.alias
assert not "sa42-xgm" in run.alias
with pytest.raises(TypeError):
42 in run.alias

# Test whether source alias plus literal key yields equal KeyData.
assert_equal_keydata(
run.alias['sa3-xgm', 'pulseEnergy.wavelengthUsed'],
Expand Down
33 changes: 33 additions & 0 deletions extra_data/tests/test_reader_mockdata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from datetime import datetime, timedelta, timezone
from itertools import islice
from multiprocessing import Process
from pathlib import Path
from textwrap import dedent
from warnings import catch_warnings

import h5py
Expand All @@ -20,6 +22,7 @@
SourceNameError, PropertyNameError, DataCollection, open_run,
MultiRunError
)
from extra_data.reader import DEFAULT_ALIASES_FILE

def test_iterate_trains(mock_agipd_data, mock_control_data_with_empty_source):
with H5File(mock_agipd_data) as f:
Expand Down Expand Up @@ -850,6 +853,36 @@ def test_open_run(mock_spb_raw_and_proc_run):
open_run(proposal=2012, run=238, data='all')
assert len(w) == 1

# Helper function to write an alias file at a specific path
def write_aliases(path):
aliases_path.parent.mkdir(parents=True, exist_ok=True)
aliases_path.write_text(dedent("""
xgm: SA1_XTD2_XGM/DOOCS/MAIN
"""))

# To set the aliases, we should be able to use a string relative to the
# proposal directory.
aliases_path = Path(mock_data_root) / "SPB/201830/p002012/foo.yml"
write_aliases(aliases_path)
run = open_run(2012, 238, aliases="{}/foo.yml")
assert "xgm" in run.alias

# And a proper path
aliases_path = Path(mock_data_root) / "foo.yml"
write_aliases(aliases_path)
run = open_run(2012, 238, aliases=aliases_path)
assert "xgm" in run.alias

# And a plain string
run = open_run(2012, 238, aliases=str(aliases_path))
assert "xgm" in run.alias

# If the default file exists, it should be used automatically
aliases_path = Path(DEFAULT_ALIASES_FILE.format(mock_data_root + "/SPB/201830/p002012"))
write_aliases(aliases_path)
run = open_run(2012, 238)
assert "xgm" in run.alias

def test_open_file(mock_sa3_control_data):
f = H5File(mock_sa3_control_data)
file_access = f.files[0]
Expand Down

0 comments on commit cfca21f

Please sign in to comment.