From b10e82caa676bcf46735ebd8287fdeaa545e1e1f Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Mon, 24 Apr 2023 17:44:24 +0200 Subject: [PATCH 1/3] Allow either .yaml or .yml suffixes for alias files --- extra_data/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra_data/reader.py b/extra_data/reader.py index 2ebd07b9..eec780c9 100644 --- a/extra_data/reader.py +++ b/extra_data/reader.py @@ -645,7 +645,7 @@ def _load_aliases_from_file(self, aliases_path): with open(aliases_path, 'r') as f: data = json.load(f) - elif aliases_path.suffix == '.yaml': + elif aliases_path.suffix in ['.yaml', '.yml']: import yaml with open(aliases_path, 'r') as f: From 9af3fc4d178d5fc7d1096dac8cd036a8137dbf2c Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Mon, 24 Apr 2023 17:58:14 +0200 Subject: [PATCH 2/3] Add __contains__ to AliasIndexer Useful for checking if some alias exists, e.g. with `"foo" in run.alias`. --- extra_data/reader.py | 7 +++++++ extra_data/tests/test_aliases.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/extra_data/reader.py b/extra_data/reader.py index eec780c9..2feed539 100644 --- a/extra_data/reader.py +++ b/extra_data/reader.py @@ -1617,6 +1617,13 @@ def __getitem__(self, aliased_item): raise TypeError('expected alias or (source alias, key) tuple') + def __contains__(self, aliased_item): + try: + self[aliased_item] + return True + except KeyError: + return False + def _resolve_aliased_selection(self, selection): if isinstance(selection, dict): res = {self._resolve_source_alias(alias): keys diff --git a/extra_data/tests/test_aliases.py b/extra_data/tests/test_aliases.py index 750fb509..502334ea 100644 --- a/extra_data/tests/test_aliases.py +++ b/extra_data/tests/test_aliases.py @@ -23,6 +23,12 @@ def assert_equal_keydata(kd1, kd2): # Test whether source alias yields identical SourceData. assert run.alias['sa3-xgm'] is run['SA3_XTD10_XGM/XGM/DOOCS'] + # Test __contains__() + assert "sa3-xgm" in run.alias + assert not "sa42-xgm" in run.alias + with pytest.raises(TypeError): + 42 in run.alias + # Test whether source alias plus literal key yields equal KeyData. assert_equal_keydata( run.alias['sa3-xgm', 'pulseEnergy.wavelengthUsed'], From 871512b12edce1b0be8ed6cf8e894d41c27e1db3 Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Mon, 24 Apr 2023 18:12:38 +0200 Subject: [PATCH 3/3] Add support for a default file to get run aliases --- extra_data/reader.py | 29 +++++++++++++++++++-- extra_data/tests/test_reader_mockdata.py | 33 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/extra_data/reader.py b/extra_data/reader.py index 2feed539..b31f0a78 100644 --- a/extra_data/reader.py +++ b/extra_data/reader.py @@ -1793,10 +1793,12 @@ def RunDirectory( # any code was already using it. RunHandler = RunDirectory +DEFAULT_ALIASES_FILE = "{}/usr/extra-data-aliases.yml" def open_run( proposal, run, data='raw', include='*', file_filter=locality.lc_any, *, - inc_suspect_trains=True, parallelize=True, _use_voview=True, + inc_suspect_trains=True, parallelize=True, aliases=DEFAULT_ALIASES_FILE, + _use_voview=True, ): """Access EuXFEL data on the Maxwell cluster by proposal and run number. @@ -1831,6 +1833,13 @@ def open_run( Enable or disable opening files in parallel. Particularly useful if creating child processes is not allowed (e.g. in a daemonized :class:`multiprocessing.Process`). + aliases: str, Path + Path to an alias file for the run, see the documentation for + :meth:`DataCollection.with_aliases` for details. If the + argument is a string with a format argument like + ``{}/path/to/aliases.yml``, then the format argument will be replaced with + the proposal directory path. By default it looks for a file named + ``{}/usr/extra-data-aliases.yml``. """ if data == 'all': common_args = dict( @@ -1878,8 +1887,24 @@ def open_run( run = index(run) # Allow integers, including numpy integers run = 'r' + str(run).zfill(4) - return RunDirectory( + dc = RunDirectory( osp.join(prop_dir, data, run), include=include, file_filter=file_filter, inc_suspect_trains=inc_suspect_trains, parallelize=parallelize, _use_voview=_use_voview, ) + + # Normalize string arguments to be an absolute Path + if isinstance(aliases, str): + aliases = Path(aliases.format(prop_dir)) + + # If we're using the default aliases file and it doesn't exist, ignore it + # without throwing any errors. + default_aliases = Path(DEFAULT_ALIASES_FILE.format(prop_dir)) + if aliases == default_aliases and not default_aliases.is_file(): + aliases = None + + if aliases is not None: + dc = dc.with_aliases(aliases) + print(f"Loading {len(dc._aliases)} aliases from: {aliases}", flush=True, file=sys.stderr) + + return dc diff --git a/extra_data/tests/test_reader_mockdata.py b/extra_data/tests/test_reader_mockdata.py index 5f6de8b6..23d61ed3 100644 --- a/extra_data/tests/test_reader_mockdata.py +++ b/extra_data/tests/test_reader_mockdata.py @@ -1,6 +1,8 @@ from datetime import datetime, timedelta, timezone from itertools import islice from multiprocessing import Process +from pathlib import Path +from textwrap import dedent from warnings import catch_warnings import h5py @@ -20,6 +22,7 @@ SourceNameError, PropertyNameError, DataCollection, open_run, MultiRunError ) +from extra_data.reader import DEFAULT_ALIASES_FILE def test_iterate_trains(mock_agipd_data, mock_control_data_with_empty_source): with H5File(mock_agipd_data) as f: @@ -850,6 +853,36 @@ def test_open_run(mock_spb_raw_and_proc_run): open_run(proposal=2012, run=238, data='all') assert len(w) == 1 + # Helper function to write an alias file at a specific path + def write_aliases(path): + aliases_path.parent.mkdir(parents=True, exist_ok=True) + aliases_path.write_text(dedent(""" + xgm: SA1_XTD2_XGM/DOOCS/MAIN + """)) + + # To set the aliases, we should be able to use a string relative to the + # proposal directory. + aliases_path = Path(mock_data_root) / "SPB/201830/p002012/foo.yml" + write_aliases(aliases_path) + run = open_run(2012, 238, aliases="{}/foo.yml") + assert "xgm" in run.alias + + # And a proper path + aliases_path = Path(mock_data_root) / "foo.yml" + write_aliases(aliases_path) + run = open_run(2012, 238, aliases=aliases_path) + assert "xgm" in run.alias + + # And a plain string + run = open_run(2012, 238, aliases=str(aliases_path)) + assert "xgm" in run.alias + + # If the default file exists, it should be used automatically + aliases_path = Path(DEFAULT_ALIASES_FILE.format(mock_data_root + "/SPB/201830/p002012")) + write_aliases(aliases_path) + run = open_run(2012, 238) + assert "xgm" in run.alias + def test_open_file(mock_sa3_control_data): f = H5File(mock_sa3_control_data) file_access = f.files[0]