From a42496e8ec9384770159b1650020b798baca56e6 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 10 Oct 2024 15:42:22 +0200 Subject: [PATCH] Disable locking when opening h5 files, add missing with --- lib/galaxy/datatypes/binary.py | 32 ++++++++++----------- lib/galaxy/tool_util/verify/asserts/hdf5.py | 22 +++++++------- lib/galaxy/tools/recommendations.py | 18 ++++++------ test/unit/tool_util/verify/test_asserts.py | 2 +- 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index 4454344d6d48..559945b3facc 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -1299,7 +1299,7 @@ class Loom(H5): def sniff(self, filename: str) -> bool: if super().sniff(filename): - with h5py.File(filename, "r") as loom_file: + with h5py.File(filename, "r", locking=False) as loom_file: # Check the optional but distinctive LOOM_SPEC_VERSION attribute if bool(loom_file.attrs.get("LOOM_SPEC_VERSION")): return True @@ -1328,7 +1328,7 @@ def display_peek(self, dataset: DatasetProtocol) -> str: def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: super().set_meta(dataset, overwrite=overwrite, **kwd) try: - with h5py.File(dataset.get_file_name(), "r") as loom_file: + with h5py.File(dataset.get_file_name(), "r", locking=False) as loom_file: dataset.metadata.title = loom_file.attrs.get("title") dataset.metadata.description = loom_file.attrs.get("description") dataset.metadata.url = loom_file.attrs.get("url") @@ -1464,7 +1464,7 @@ class Anndata(H5): def sniff(self, filename: str) -> bool: if super().sniff(filename): try: - with h5py.File(filename, "r") as f: + with h5py.File(filename, "r", locking=False) as f: return all(attr in f for attr in ["X", "obs", "var"]) except Exception: return False @@ -1472,7 +1472,7 @@ def sniff(self, filename: str) -> bool: def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: super().set_meta(dataset, overwrite=overwrite, **kwd) - with h5py.File(dataset.get_file_name(), "r") as anndata_file: + with h5py.File(dataset.get_file_name(), "r", locking=False) as anndata_file: dataset.metadata.title = anndata_file.attrs.get("title") dataset.metadata.description = anndata_file.attrs.get("description") dataset.metadata.url = anndata_file.attrs.get("url") @@ -1822,7 +1822,7 @@ def sniff(self, filename: str) -> bool: False """ if super().sniff(filename): - with h5py.File(filename, "r") as f: + with h5py.File(filename, "r", locking=False) as f: required_fields = {"id", "format-url", "type", "generated-by", "creation-date", "nnz", "shape"} return required_fields.issubset(f.attrs.keys()) return False @@ -1830,7 +1830,7 @@ def sniff(self, filename: str) -> bool: def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: super().set_meta(dataset, overwrite=overwrite, **kwd) try: - with h5py.File(dataset.get_file_name(), "r") as f: + with h5py.File(dataset.get_file_name(), "r", locking=False) as f: attributes = f.attrs dataset.metadata.id = util.unicodify(attributes["id"]) @@ -1853,7 +1853,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: if not dataset.dataset.purged: lines = ["Biom2 (HDF5) file"] try: - with h5py.File(dataset.get_file_name()) as f: + with h5py.File(dataset.get_file_name(), locking=False) as f: for k, v in f.attrs.items(): lines.append(f"{k}: {util.unicodify(v)}") except Exception as e: @@ -1900,7 +1900,7 @@ def sniff(self, filename: str) -> bool: if super().sniff(filename): keys = ["chroms", "bins", "pixels", "indexes"] - with h5py.File(filename, "r") as handle: + with h5py.File(filename, "r", locking=False) as handle: fmt = util.unicodify(handle.attrs.get("format")) url = util.unicodify(handle.attrs.get("format-url")) if fmt == MAGIC or url == URL: @@ -1956,7 +1956,7 @@ def sniff(self, filename: str) -> bool: if super().sniff(filename): keys0 = ["resolutions"] - with h5py.File(filename, "r") as handle: + with h5py.File(filename, "r", locking=False) as handle: if not all(name in handle.keys() for name in keys0): return False res0 = next(iter(handle["resolutions"].keys())) @@ -2022,7 +2022,7 @@ def set_meta( params_file = dataset.metadata.spec[spec_key].param.new_file( dataset=dataset, metadata_tmp_files_dir=metadata_tmp_files_dir ) - with h5py.File(dataset.get_file_name(), "r") as handle: + with h5py.File(dataset.get_file_name(), "r", locking=False) as handle: hyper_params = handle[self.HYPERPARAMETER][()] hyper_params = json.loads(util.unicodify(hyper_params)) with open(params_file.get_file_name(), "w") as f: @@ -2036,7 +2036,7 @@ def set_meta( def sniff(self, filename: str) -> bool: if super().sniff(filename): keys = [self.CONFIG] - with h5py.File(filename, "r") as handle: + with h5py.File(filename, "r", locking=False) as handle: if not all(name in handle.keys() for name in keys): return False url = util.unicodify(handle.attrs.get(self.URL)) @@ -2046,7 +2046,7 @@ def sniff(self, filename: str) -> bool: def get_attribute(self, filename: str, attr_key: str) -> str: try: - with h5py.File(filename, "r") as handle: + with h5py.File(filename, "r", locking=False) as handle: attr = util.unicodify(handle.attrs.get(attr_key)) return attr except Exception as e: @@ -2069,7 +2069,7 @@ def get_html_repr(self, filename: str) -> str: def get_config_string(self, filename: str) -> str: try: - with h5py.File(filename, "r") as handle: + with h5py.File(filename, "r", locking=False) as handle: config = util.unicodify(handle[self.CONFIG][()]) return config except Exception as e: @@ -2109,7 +2109,7 @@ def display_data( out_dict: Dict = {} try: - with h5py.File(dataset.get_file_name(), "r") as handle: + with h5py.File(dataset.get_file_name(), "r", locking=False) as handle: out_dict["Attributes"] = {} attributes = handle.attrs for k in set(attributes.keys()) - {self.HTTP_REPR, self.REPR, self.URL}: @@ -2199,7 +2199,7 @@ class HexrdMaterials(H5): def sniff(self, filename: str) -> bool: if super().sniff(filename): req = {"AtomData", "Atomtypes", "CrystalSystem", "LatticeParameters"} - with h5py.File(filename, "r") as mat_file: + with h5py.File(filename, "r", locking=False) as mat_file: for k in mat_file.keys(): if isinstance(mat_file[k], h5py._hl.group.Group) and set(mat_file[k].keys()) >= req: return True @@ -2208,7 +2208,7 @@ def sniff(self, filename: str) -> bool: def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None: super().set_meta(dataset, overwrite=overwrite, **kwd) try: - with h5py.File(dataset.get_file_name(), "r") as mat_file: + with h5py.File(dataset.get_file_name(), "r", locking=False) as mat_file: dataset.metadata.materials = list(mat_file.keys()) sgn = {} lp = {} diff --git a/lib/galaxy/tool_util/verify/asserts/hdf5.py b/lib/galaxy/tool_util/verify/asserts/hdf5.py index e94c61c3af04..e2c3a3b7d501 100644 --- a/lib/galaxy/tool_util/verify/asserts/hdf5.py +++ b/lib/galaxy/tool_util/verify/asserts/hdf5.py @@ -18,10 +18,11 @@ def assert_has_h5_attribute(output_bytes: bytes, key: str, value: str) -> None: attribute""" _assert_h5py() output_temp = io.BytesIO(output_bytes) - local_attrs = h5py.File(output_temp, "r").attrs - assert ( - key in local_attrs and str(local_attrs[key]) == value - ), f"Not a HDF5 file or H5 attributes do not match:\n\t{list(local_attrs.items())}\n\n\t({key} : {value})" + with h5py.File(output_temp, "r", locking=False) as h5: + local_attrs = h5.attrs + assert ( + key in local_attrs and str(local_attrs[key]) == value + ), f"Not a HDF5 file or H5 attributes do not match:\n\t{list(local_attrs.items())}\n\n\t({key} : {value})" # TODO the function actually queries groups. so the function and argument name are misleading @@ -36,9 +37,10 @@ def append_keys(key): local_keys.append(key) return None - h5py.File(output_temp, "r").visit(append_keys) - missing = 0 - for key in h5_keys: - if key not in local_keys: - missing += 1 - assert missing == 0, f"Not a HDF5 file or H5 keys missing:\n\t{local_keys}\n\t{h5_keys}" + with h5py.File(output_temp, "r", locking=False) as f: + f.visit(append_keys) + missing = 0 + for key in h5_keys: + if key not in local_keys: + missing += 1 + assert missing == 0, f"Not a HDF5 file or H5 keys missing:\n\t{local_keys}\n\t{h5_keys}" diff --git a/lib/galaxy/tools/recommendations.py b/lib/galaxy/tools/recommendations.py index 6d6aa1e9e352..62df298fcfb4 100644 --- a/lib/galaxy/tools/recommendations.py +++ b/lib/galaxy/tools/recommendations.py @@ -131,16 +131,16 @@ def __set_model(self, trans, remote_model_url): Create model and associated dictionaries for recommendations """ self.tool_recommendation_model_path = self.__download_model(remote_model_url) - model_file = h5py.File(self.tool_recommendation_model_path, "r") - self.reverse_dictionary = json.loads(model_file["reverse_dict"][()].decode("utf-8")) - self.loaded_model = self.create_transformer_model(len(self.reverse_dictionary) + 1) - self.loaded_model.load_weights(self.tool_recommendation_model_path) + with h5py.File(self.tool_recommendation_model_path, "r", locking=False) as model_file: + self.reverse_dictionary = json.loads(model_file["reverse_dict"][()].decode("utf-8")) + self.loaded_model = self.create_transformer_model(len(self.reverse_dictionary) + 1) + self.loaded_model.load_weights(self.tool_recommendation_model_path) - self.model_data_dictionary = {v: k for k, v in self.reverse_dictionary.items()} - # set the list of compatible tools - self.compatible_tools = json.loads(model_file["compatible_tools"][()].decode("utf-8")) - tool_weights = json.loads(model_file["class_weights"][()].decode("utf-8")) - self.standard_connections = json.loads(model_file["standard_connections"][()].decode("utf-8")) + self.model_data_dictionary = {v: k for k, v in self.reverse_dictionary.items()} + # set the list of compatible tools + self.compatible_tools = json.loads(model_file["compatible_tools"][()].decode("utf-8")) + tool_weights = json.loads(model_file["class_weights"][()].decode("utf-8")) + self.standard_connections = json.loads(model_file["standard_connections"][()].decode("utf-8")) # sort the tools' usage dictionary tool_pos_sorted = [int(key) for key in tool_weights.keys()] for k in tool_pos_sorted: diff --git a/test/unit/tool_util/verify/test_asserts.py b/test/unit/tool_util/verify/test_asserts.py index a7f419fa4a32..6a7d40454f4e 100644 --- a/test/unit/tool_util/verify/test_asserts.py +++ b/test/unit/tool_util/verify/test_asserts.py @@ -1259,7 +1259,7 @@ def test_has_json_property_with_text_neg(): if h5py is not None: with tempfile.NamedTemporaryFile(delete=False) as tmp: h5name = tmp.name - with h5py.File(tmp.name, "w") as h5fh: + with h5py.File(tmp.name, "w", locking=False) as h5fh: h5fh.attrs["myfileattr"] = "myfileattrvalue" h5fh.attrs["myfileattrint"] = 1 dset = h5fh.create_dataset("myint", (100,), dtype="i")