Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for a default aliases file #398

Merged
merged 3 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions extra_data/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ def _load_aliases_from_file(self, aliases_path):
with open(aliases_path, 'r') as f:
data = json.load(f)

elif aliases_path.suffix == '.yaml':
elif aliases_path.suffix in ['.yaml', '.yml']:
import yaml

with open(aliases_path, 'r') as f:
Expand Down Expand Up @@ -1617,6 +1617,13 @@ def __getitem__(self, aliased_item):

raise TypeError('expected alias or (source alias, key) tuple')

def __contains__(self, aliased_item):
try:
self[aliased_item]
return True
except KeyError:
return False

def _resolve_aliased_selection(self, selection):
if isinstance(selection, dict):
res = {self._resolve_source_alias(alias): keys
Expand Down Expand Up @@ -1786,10 +1793,12 @@ def RunDirectory(
# any code was already using it.
RunHandler = RunDirectory

DEFAULT_ALIASES_FILE = "{}/usr/extra-data-aliases.yml"

def open_run(
proposal, run, data='raw', include='*', file_filter=locality.lc_any, *,
inc_suspect_trains=True, parallelize=True, _use_voview=True,
inc_suspect_trains=True, parallelize=True, aliases=DEFAULT_ALIASES_FILE,
_use_voview=True,
):
"""Access EuXFEL data on the Maxwell cluster by proposal and run number.

Expand Down Expand Up @@ -1824,6 +1833,13 @@ def open_run(
Enable or disable opening files in parallel. Particularly useful if
creating child processes is not allowed (e.g. in a daemonized
:class:`multiprocessing.Process`).
aliases: str, Path
Path to an alias file for the run, see the documentation for
:meth:`DataCollection.with_aliases` for details. If the
argument is a string with a format argument like
``{}/path/to/aliases.yml``, then the format argument will be replaced with
the proposal directory path. By default it looks for a file named
``{}/usr/extra-data-aliases.yml``.
"""
if data == 'all':
common_args = dict(
Expand Down Expand Up @@ -1871,8 +1887,24 @@ def open_run(
run = index(run) # Allow integers, including numpy integers
run = 'r' + str(run).zfill(4)

return RunDirectory(
dc = RunDirectory(
osp.join(prop_dir, data, run), include=include, file_filter=file_filter,
inc_suspect_trains=inc_suspect_trains, parallelize=parallelize,
_use_voview=_use_voview,
)

# Normalize string arguments to be an absolute Path
if isinstance(aliases, str):
aliases = Path(aliases.format(prop_dir))

# If we're using the default aliases file and it doesn't exist, ignore it
# without throwing any errors.
default_aliases = Path(DEFAULT_ALIASES_FILE.format(prop_dir))
if aliases == default_aliases and not default_aliases.is_file():
aliases = None

if aliases is not None:
dc = dc.with_aliases(aliases)
print(f"Loading {len(dc._aliases)} aliases from: {aliases}", flush=True, file=sys.stderr)

return dc
6 changes: 6 additions & 0 deletions extra_data/tests/test_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def assert_equal_keydata(kd1, kd2):
# Test whether source alias yields identical SourceData.
assert run.alias['sa3-xgm'] is run['SA3_XTD10_XGM/XGM/DOOCS']

# Test __contains__()
assert "sa3-xgm" in run.alias
assert not "sa42-xgm" in run.alias
with pytest.raises(TypeError):
42 in run.alias

# Test whether source alias plus literal key yields equal KeyData.
assert_equal_keydata(
run.alias['sa3-xgm', 'pulseEnergy.wavelengthUsed'],
Expand Down
33 changes: 33 additions & 0 deletions extra_data/tests/test_reader_mockdata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from datetime import datetime, timedelta, timezone
from itertools import islice
from multiprocessing import Process
from pathlib import Path
from textwrap import dedent
from warnings import catch_warnings

import h5py
Expand All @@ -20,6 +22,7 @@
SourceNameError, PropertyNameError, DataCollection, open_run,
MultiRunError
)
from extra_data.reader import DEFAULT_ALIASES_FILE

def test_iterate_trains(mock_agipd_data, mock_control_data_with_empty_source):
with H5File(mock_agipd_data) as f:
Expand Down Expand Up @@ -850,6 +853,36 @@ def test_open_run(mock_spb_raw_and_proc_run):
open_run(proposal=2012, run=238, data='all')
assert len(w) == 1

# Helper function to write an alias file at a specific path
def write_aliases(path):
aliases_path.parent.mkdir(parents=True, exist_ok=True)
aliases_path.write_text(dedent("""
xgm: SA1_XTD2_XGM/DOOCS/MAIN
"""))

# To set the aliases, we should be able to use a string relative to the
# proposal directory.
aliases_path = Path(mock_data_root) / "SPB/201830/p002012/foo.yml"
write_aliases(aliases_path)
run = open_run(2012, 238, aliases="{}/foo.yml")
assert "xgm" in run.alias

# And a proper path
aliases_path = Path(mock_data_root) / "foo.yml"
write_aliases(aliases_path)
run = open_run(2012, 238, aliases=aliases_path)
assert "xgm" in run.alias

# And a plain string
run = open_run(2012, 238, aliases=str(aliases_path))
assert "xgm" in run.alias

# If the default file exists, it should be used automatically
aliases_path = Path(DEFAULT_ALIASES_FILE.format(mock_data_root + "/SPB/201830/p002012"))
write_aliases(aliases_path)
run = open_run(2012, 238)
assert "xgm" in run.alias

def test_open_file(mock_sa3_control_data):
f = H5File(mock_sa3_control_data)
file_access = f.files[0]
Expand Down