Skip to content

Commit

Permalink
Only save combined data_type in only-combining mode (#878)
Browse files Browse the repository at this point in the history
* Only save combined `data_type` in only-combining mode

* Retrigger docs rebuild

* Retrigger docs rebuild

* Add more tests
  • Loading branch information
dachengx authored Aug 25, 2024
1 parent 41a21bc commit cb7e232
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 5 deletions.
12 changes: 8 additions & 4 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,11 @@ def concat_loader(*args, **kwargs):
# only save if we are not in a superrun or the plugin allows superruns
# otherwise we will see error at Chunk.concatenate
# but anyway the data is should already been made
for d_to_save in set(current_plugin_to_savers + list(target_plugin.provides)):
if not _combining_subruns:
data_type_to_save = set(current_plugin_to_savers + list(target_plugin.provides))
else:
data_type_to_save = set(current_plugin_to_savers)
for d_to_save in data_type_to_save:
key = self.key_for(run_id, d_to_save, chunk_number=chunk_number)
# Here we just check the availability of key,
# chunk_number for _get_partial_loader_for can be None
Expand Down Expand Up @@ -1333,7 +1337,7 @@ def concat_loader(*args, **kwargs):
targets=strax.to_str_tuple(final_plugin),
)

def get_datakey(self, run_id, target, lineage):
def get_data_key(self, run_id, target, lineage):
"""Get datakey for a given run_id, target and lineage.
If super is detected, the subruns information are added to the key.
Expand Down Expand Up @@ -1366,7 +1370,7 @@ def _add_saver(
:return: Updated savers dictionary.
"""
key = self.get_datakey(run_id, d_to_save, target_plugin.lineage)
key = self.get_data_key(run_id, d_to_save, target_plugin.lineage)
for sf in self._sorted_storage:
if sf.readonly:
continue
Expand Down Expand Up @@ -2020,7 +2024,7 @@ def key_for(self, run_id, target, chunk_number=None):
plugins = self._get_plugins((target,), run_id, chunk_number=chunk_number)

lineage = plugins[target].lineage
return self.get_datakey(run_id, target, lineage)
return self.get_data_key(run_id, target, lineage)

def get_meta(self, run_id, target, chunk_number=None) -> dict:
"""Return metadata for target for run_id, or raise DataNotAvailable if data is not yet
Expand Down
2 changes: 1 addition & 1 deletion strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def keys_for_runs(
# Get the lineage once, for the context specifies that the
# defaults may not change!
p = self._get_plugins((target,), run_ids[0])[target]
return [self.get_datakey(r, target, p.lineage) for r in run_ids]
return [self.get_data_key(r, target, p.lineage) for r in run_ids]
else:
return []

Expand Down
15 changes: 15 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,20 @@ def test_per_chunk_storage():
register=[Records, Peaks],
use_per_run_defaults=True,
)

# If per-chunk storage is not for dependencies, DataKey will not be different.
key_1st = st.key_for(run_id, "records", chunk_number={"records": [0]})
key_2nd = st.key_for(run_id, "records", chunk_number={"records": [1]})
assert str(key_1st) == str(key_2nd)

# If per-chunk storage is for dependencies, savers will not be different.
components_1st = st.get_components(run_id, "peaks", chunk_number={"peaks": [0]})
components_2nd = st.get_components(run_id, "peaks", chunk_number={"peaks": [1]})
assert (
components_1st.savers["peaks"][0].dirname == components_2nd.savers["peaks"][0].dirname
)

# Test merge_per_chunk_storage
st.make(run_id, "records")
n_chunks = len(st.get_metadata(run_id, "records")["chunks"])
assert n_chunks > 2
Expand All @@ -462,6 +476,7 @@ def test_per_chunk_storage():
with pytest.raises(ValueError):
st.merge_per_chunk_storage(run_id, "peaks", "records")

# Per-chunk storage not allowed for some plugins
p = type("whatever", (strax.OverlapWindowPlugin,), dict(depends_on="records"))
st.register(p)
with pytest.raises(ValueError):
Expand Down
4 changes: 4 additions & 0 deletions tests/test_superruns.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def test_loaders_and_savers(self):
self.superrun_name, "peak_classification", _combining_subruns=True
)
assert len(components.loaders) == 1
assert len(components.savers) == 1
assert "peak_classification" in components.loaders
assert "peak_classification" in components.savers

with self.assertRaises(ValueError):
self.context.get_components(
Expand Down Expand Up @@ -335,6 +337,8 @@ def test_only_combining_superruns(self):
The test also shows the difference between the two.
"""
self.context.tree
self.context.inversed_tree
self.context.check_superrun()
sum_super = self.context.get_array(self.superrun_name, "sum")
_sum_super = self.context.get_array(self.superrun_name, "sum", _combining_subruns=True)
Expand Down

0 comments on commit cb7e232

Please sign in to comment.