From 8d194b67f7685584ba25adaab76b673e7f5694c5 Mon Sep 17 00:00:00 2001 From: Sid El Moctar AHMED MAOULOUD <56254328+SidAhmedMa@users.noreply.github.com> Date: Fri, 31 Jul 2020 15:27:47 +0200 Subject: [PATCH 01/10] Add files via upload --- strax/storage/rucio.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 strax/storage/rucio.py diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py new file mode 100644 index 000000000..bbf13add8 --- /dev/null +++ b/strax/storage/rucio.py @@ -0,0 +1,41 @@ +import json +import hashlib +import os.path as osp + +import strax +from strax.storage.files import dirname_to_prefix + +export, __all__ = strax.exporter() + + +@export +class rucio(strax.StorageBackend): + """Get data from a rucio directory + """ + + def get_metadata(self, dirname, **kwargs): + dirname = str(dirname) + prefix = dirname_to_prefix(dirname) + metadata_json = f'{prefix}-metadata.json' + fn = rucio_path(metadata_json, dirname) + with open(fn, mode='r') as f: + return json.loads(f.read()) + + def _read_chunk(self, dirname, chunk_info, dtype, compressor): + #print('yes') + fn = rucio_path(chunk_info['filename'], dirname) + return strax.load_file(fn, dtype=dtype, compressor=compressor) + + def _saver(self, dirname, metadata): + raise NotImplementedError( + "Cannot save directly into rucio, upload with admix instead") + + +def rucio_path(filename, dirname): + root_path ='/dali/lgrandi/rucio' + scope = "xnt_"+dirname.split('-')[0] + rucio_did = "{0}:{1}".format(scope,filename) + rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest() + t1 = rucio_md5[0:2] + t2 = rucio_md5[2:4] + return osp.join(root_path,scope,t1,t2,filename) From 57012a506ec809be0f5b611e44023c167c150ceb Mon Sep 17 00:00:00 2001 From: Sid El Moctar AHMED MAOULOUD <56254328+SidAhmedMa@users.noreply.github.com> Date: Fri, 31 Jul 2020 15:28:06 +0200 Subject: [PATCH 02/10] Add files via upload --- rucio.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 rucio.py diff --git a/rucio.py b/rucio.py new file mode 100644 index 000000000..bbf13add8 --- /dev/null +++ b/rucio.py @@ -0,0 +1,41 @@ +import json +import hashlib +import os.path as osp + +import strax +from strax.storage.files import dirname_to_prefix + +export, __all__ = strax.exporter() + + +@export +class rucio(strax.StorageBackend): + """Get data from a rucio directory + """ + + def get_metadata(self, dirname, **kwargs): + dirname = str(dirname) + prefix = dirname_to_prefix(dirname) + metadata_json = f'{prefix}-metadata.json' + fn = rucio_path(metadata_json, dirname) + with open(fn, mode='r') as f: + return json.loads(f.read()) + + def _read_chunk(self, dirname, chunk_info, dtype, compressor): + #print('yes') + fn = rucio_path(chunk_info['filename'], dirname) + return strax.load_file(fn, dtype=dtype, compressor=compressor) + + def _saver(self, dirname, metadata): + raise NotImplementedError( + "Cannot save directly into rucio, upload with admix instead") + + +def rucio_path(filename, dirname): + root_path ='/dali/lgrandi/rucio' + scope = "xnt_"+dirname.split('-')[0] + rucio_did = "{0}:{1}".format(scope,filename) + rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest() + t1 = rucio_md5[0:2] + t2 = rucio_md5[2:4] + return osp.join(root_path,scope,t1,t2,filename) From 879db293e27093b0c829cbf331b0b850701f70c8 Mon Sep 17 00:00:00 2001 From: Sid El Moctar AHMED MAOULOUD <56254328+SidAhmedMa@users.noreply.github.com> Date: Fri, 31 Jul 2020 15:29:28 +0200 Subject: [PATCH 03/10] Update __init__.py --- strax/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/strax/__init__.py b/strax/__init__.py index a0137bb2e..0158e86f9 100644 --- a/strax/__init__.py +++ b/strax/__init__.py @@ -11,6 +11,7 @@ from .storage.common import * from .storage.files import * +from .storage.rucio import * from .storage.mongo import * from .storage.s3 import * from .storage.zipfiles import * From c4e55c2f80714fbb1215dad643f76f50fbfde733 Mon Sep 17 00:00:00 2001 From: Sid El Moctar AHMED MAOULOUD <56254328+SidAhmedMa@users.noreply.github.com> Date: Tue, 11 Aug 2020 14:00:08 +0200 Subject: [PATCH 04/10] Delete rucio.py File uploaded twice --- rucio.py | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 rucio.py diff --git a/rucio.py b/rucio.py deleted file mode 100644 index bbf13add8..000000000 --- a/rucio.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import hashlib -import os.path as osp - -import strax -from strax.storage.files import dirname_to_prefix - -export, __all__ = strax.exporter() - - -@export -class rucio(strax.StorageBackend): - """Get data from a rucio directory - """ - - def get_metadata(self, dirname, **kwargs): - dirname = str(dirname) - prefix = dirname_to_prefix(dirname) - metadata_json = f'{prefix}-metadata.json' - fn = rucio_path(metadata_json, dirname) - with open(fn, mode='r') as f: - return json.loads(f.read()) - - def _read_chunk(self, dirname, chunk_info, dtype, compressor): - #print('yes') - fn = rucio_path(chunk_info['filename'], dirname) - return strax.load_file(fn, dtype=dtype, compressor=compressor) - - def _saver(self, dirname, metadata): - raise NotImplementedError( - "Cannot save directly into rucio, upload with admix instead") - - -def rucio_path(filename, dirname): - root_path ='/dali/lgrandi/rucio' - scope = "xnt_"+dirname.split('-')[0] - rucio_did = "{0}:{1}".format(scope,filename) - rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest() - t1 = rucio_md5[0:2] - t2 = rucio_md5[2:4] - return osp.join(root_path,scope,t1,t2,filename) From 6404ae5f5733d1f53301bf0976fcc9f637acbfe3 Mon Sep 17 00:00:00 2001 From: Sid El Moctar AHMED MAOULOUD <56254328+SidAhmedMa@users.noreply.github.com> Date: Tue, 11 Aug 2020 14:04:28 +0200 Subject: [PATCH 05/10] Update rucio.py Remonving a commented test --- strax/storage/rucio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py index bbf13add8..6df0a4f40 100644 --- a/strax/storage/rucio.py +++ b/strax/storage/rucio.py @@ -22,7 +22,6 @@ def get_metadata(self, dirname, **kwargs): return json.loads(f.read()) def _read_chunk(self, dirname, chunk_info, dtype, compressor): - #print('yes') fn = rucio_path(chunk_info['filename'], dirname) return strax.load_file(fn, dtype=dtype, compressor=compressor) From 92d2fa8aa5d0e01c86bcca2905cb2f382da27ee5 Mon Sep 17 00:00:00 2001 From: Joran Angevaare Date: Wed, 12 Aug 2020 16:49:59 +0200 Subject: [PATCH 06/10] Update rucio.py --- strax/storage/rucio.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py index 6df0a4f40..44f30b05e 100644 --- a/strax/storage/rucio.py +++ b/strax/storage/rucio.py @@ -12,17 +12,23 @@ class rucio(strax.StorageBackend): """Get data from a rucio directory """ + def __init__(self, root_dir, *args, **kwargs): + super().__init__(*args, **kwargs) + self.root_dir = root_dir - def get_metadata(self, dirname, **kwargs): - dirname = str(dirname) + def get_metadata(self, dirname:str, **kwargs): prefix = dirname_to_prefix(dirname) metadata_json = f'{prefix}-metadata.json' - fn = rucio_path(metadata_json, dirname) + fn = rucio_path(self.root_dir, metadata_json, dirname) + + if not osp.exists(fn): + raise strax.DataCorrupted(f"Data in {dirname} has no metadata") + with open(fn, mode='r') as f: return json.loads(f.read()) def _read_chunk(self, dirname, chunk_info, dtype, compressor): - fn = rucio_path(chunk_info['filename'], dirname) + fn = rucio_path(self.root_dir, chunk_info['filename'], dirname) return strax.load_file(fn, dtype=dtype, compressor=compressor) def _saver(self, dirname, metadata): @@ -30,11 +36,11 @@ def _saver(self, dirname, metadata): "Cannot save directly into rucio, upload with admix instead") -def rucio_path(filename, dirname): - root_path ='/dali/lgrandi/rucio' +def rucio_path(root_dir, filename, dirname): + """Convert target to path according to rucio convention""" scope = "xnt_"+dirname.split('-')[0] - rucio_did = "{0}:{1}".format(scope,filename) + rucio_did = "{0}:{1}".format(scope, filename) rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest() t1 = rucio_md5[0:2] t2 = rucio_md5[2:4] - return osp.join(root_path,scope,t1,t2,filename) + return osp.join(root_dir, scope, t1, t2, filename) From 0d9724d879dff48c947c1e334510339bbb16ed13 Mon Sep 17 00:00:00 2001 From: Joran Angevaare Date: Wed, 12 Aug 2020 17:01:22 +0200 Subject: [PATCH 07/10] remove pass for codefactor --- strax/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strax/utils.py b/strax/utils.py index f906c961d..7e525f684 100644 --- a/strax/utils.py +++ b/strax/utils.py @@ -205,7 +205,6 @@ def profile_threaded(filename): monitoring_gil = True except (RuntimeError, ImportError): monitoring_gil = False - pass yappi.start() yield From 557480453d042e780b1aaec5830d2280f667bbde Mon Sep 17 00:00:00 2001 From: Joran Angevaare Date: Thu, 13 Aug 2020 14:12:02 +0200 Subject: [PATCH 08/10] remove todo for codefactor --- strax/processing/pulse_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strax/processing/pulse_processing.py b/strax/processing/pulse_processing.py index a1194c185..cc508dbbb 100644 --- a/strax/processing/pulse_processing.py +++ b/strax/processing/pulse_processing.py @@ -260,7 +260,7 @@ def _find_hits(records, min_amplitude, min_height_over_noise, for i in range(n_samples): # We can't use enumerate over r['data'], # numba gives errors if we do. - # TODO: file issue? + # maybe file an issue? x = r['data'][i] satisfy_threshold = x >= threshold From 52d6a99f52a19b69f0642dec2611d1d532a896dd Mon Sep 17 00:00:00 2001 From: Joran Angevaare Date: Fri, 14 Aug 2020 14:12:48 +0200 Subject: [PATCH 09/10] Update rucio.py raise data not available if folder does not exist --- strax/storage/rucio.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py index 44f30b05e..0d6222600 100644 --- a/strax/storage/rucio.py +++ b/strax/storage/rucio.py @@ -20,9 +20,11 @@ def get_metadata(self, dirname:str, **kwargs): prefix = dirname_to_prefix(dirname) metadata_json = f'{prefix}-metadata.json' fn = rucio_path(self.root_dir, metadata_json, dirname) - - if not osp.exists(fn): - raise strax.DataCorrupted(f"Data in {dirname} has no metadata") + folder = osp.join('/', *fn.split('/')[:-1]) + if not osp.exists(folder): + raise strax.DataNotAvailable(f"No folder for matadata at {fn}") + elif not osp.exists(fn): + raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}") with open(fn, mode='r') as f: return json.loads(f.read()) From c05ef527db82795a11b21f393e4f8372315e3f29 Mon Sep 17 00:00:00 2001 From: Joran Angevaare Date: Fri, 14 Aug 2020 14:13:45 +0200 Subject: [PATCH 10/10] Update rucio.py fix codefactor --- strax/storage/rucio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py index 0d6222600..926c5e7db 100644 --- a/strax/storage/rucio.py +++ b/strax/storage/rucio.py @@ -23,7 +23,7 @@ def get_metadata(self, dirname:str, **kwargs): folder = osp.join('/', *fn.split('/')[:-1]) if not osp.exists(folder): raise strax.DataNotAvailable(f"No folder for matadata at {fn}") - elif not osp.exists(fn): + if not osp.exists(fn): raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}") with open(fn, mode='r') as f: