From 19f98cf27aefce71d4f75f7b399b8623ce6fabce Mon Sep 17 00:00:00 2001 From: Andrea Bocci <andrea.bocci@cern.ch> Date: Fri, 12 Aug 2022 16:48:13 +0200 Subject: [PATCH 1/3] Change the default to 100 events per file --- HLTrigger/Tools/scripts/convertToRaw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw index ea688bcc71ca5..c9ff2e164c64d 100755 --- a/HLTrigger/Tools/scripts/convertToRaw +++ b/HLTrigger/Tools/scripts/convertToRaw @@ -27,8 +27,8 @@ def cmsRun(config, **args): # default values -events_per_file = 50 -events_per_lumi = 11650 +events_per_file = 100 +events_per_lumi = 11655 output_directory = '' parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter) From 884604ab2c8568038e2dda287660cd46a9915f10 Mon Sep 17 00:00:00 2001 From: Andrea Bocci <andrea.bocci@cern.ch> Date: Fri, 12 Aug 2022 22:40:28 +0200 Subject: [PATCH 2/3] Add an option to selct the run and lumi range Add the -r/--range option to restrict the processing to a range of runs and lumisections. --- HLTrigger/Tools/scripts/convertToRaw | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw index c9ff2e164c64d..0429d15567517 100755 --- a/HLTrigger/Tools/scripts/convertToRaw +++ b/HLTrigger/Tools/scripts/convertToRaw @@ -26,6 +26,43 @@ def cmsRun(config, **args): sys.exit(status.returncode) +class LuminosityBlockRange: + def __init__(self, value: str = '') -> None: + self.min_run = 0 + self.max_run = 0 + self.min_lumi = 0 + self.max_lumi = 0 + if value and value != 'all': + ((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value) + + @staticmethod + def parse_value(value: str) -> int: + return 0 if value in ('', 'min', 'max') else int(value) + + @staticmethod + def parse_value_pair(value: str) -> (int, int): + if value.count(':') > 1: + raise ValueError('invalid syntax') + (first, second) = value.split(':') if ':' in value else ('', value) + return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second) + + @staticmethod + def parse_range(value: str) -> ((int, int), (int, int)): + if value.count('-') > 1: + raise ValueError('invalid syntax') + (first, second) = value.split('-') if '-' in value else (value, value) + return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second) + + def is_in_range(self, run: int, lumi: int) -> bool: + return ( + (self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or + (self.min_run != 0 and self.min_run < run) + ) and ( + (self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or + (self.min_run != 0 and self.max_run > run) + ) + + # default values events_per_file = 100 events_per_lumi = 11655 @@ -36,6 +73,7 @@ parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input f parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created') parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events') parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection') +parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range') parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)') # parse the command line arguments and options @@ -86,6 +124,8 @@ for f in files: if parsing: run, lumi, events = tuple(map(int, line.split())) + if not args.range.is_in_range(run, lumi): + continue if not run in content: content[run] = {} if not lumi in content[run]: From d9113f05b9eee4c49e967c2af254a4b9b712c745 Mon Sep 17 00:00:00 2001 From: Andrea Bocci <andrea.bocci@cern.ch> Date: Sat, 13 Aug 2022 14:32:08 +0200 Subject: [PATCH 3/3] Improve convertToRaw After processing each run, write a cff.py file with the Source and the necessary Services to process the converted files. Improve the handling of the output and exit status of the subprocesses, and add a -v/--verbose option to print additional message while preprocessing and converting the input files. --- HLTrigger/Tools/scripts/convertToRaw | 92 +++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw index 0429d15567517..1af03633cfa1e 100755 --- a/HLTrigger/Tools/scripts/convertToRaw +++ b/HLTrigger/Tools/scripts/convertToRaw @@ -10,19 +10,27 @@ import socket import subprocess import sys -def cmsRun(config, **args): +def cmsRun(config: str, verbose: bool, **args): cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ] sys.stdout.write(' \\\n '.join(cmd)) sys.stdout.write('\n\n') - status = subprocess.run(cmd, stdout=None, stderr=None) - status.check_returncode() + if verbose: + status = subprocess.run(cmd, stdout=None, stderr=None) + else: + status = subprocess.run(cmd, capture_output=True, text=True) # handle error conditions if status.returncode < 0: sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode) + if not verbose: + sys.stderr.write('\n') + sys.stderr.write(status.stderr) sys.exit(status.returncode) elif status.returncode > 0: sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode) + if not verbose: + sys.stderr.write('\n') + sys.stderr.write(status.stderr) sys.exit(status.returncode) @@ -74,6 +82,7 @@ parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events') parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection') parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range') +parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files') parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)') # parse the command line arguments and options @@ -97,16 +106,24 @@ content = {} for f in files: # run edmFileUtil --eventsInLumis ... - output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True) + print(f'preprocessing input file {f}') + if args.verbose: + output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], stdout=None, stderr=None) + else: + output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True) + + # handle error conditions if output.returncode < 0: sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode) - sys.stderr.write('\n') - sys.stderr.write(output.stderr) + if not args.verbose: + sys.stderr.write('\n') + sys.stderr.write(output.stderr) sys.exit(output.returncode) elif output.returncode > 0: sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode) - sys.stderr.write('\n') - sys.stderr.write(output.stderr) + if not args.verbose: + sys.stderr.write('\n') + sys.stderr.write(output.stderr) sys.exit(output.returncode) # parse the output of edmFileUtil @@ -125,15 +142,22 @@ for f in files: if parsing: run, lumi, events = tuple(map(int, line.split())) if not args.range.is_in_range(run, lumi): + print(f' run {run}, lumisetion {lumi} is outside of the given range and will be skipped') + continue + if events == 0: + print(f' run {run}, lumisetion {lumi} is empty and will be skipped') continue + print(f' run {run}, lumisetion {lumi} with {events} events will be processed') if not run in content: content[run] = {} if not lumi in content[run]: content[run][lumi] = FileInfo() content[run][lumi].events += events content[run][lumi].files.add(f) + print() # drop empty lumisections +# note: this may no longer be needed, but is left as a cross check for run in content: empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ] for lumi in empty_lumis: @@ -157,6 +181,7 @@ if not os.path.exists(config_py): sys.exit(1) # convert the input data to FED RAW data format +converted_files = [] # process each run for run in sorted(content): @@ -170,7 +195,8 @@ for run in sorted(content): # process the whole run lumis = sorted(content[run]) print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis))) - cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory) + cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory) + converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw') else: # process lumisections individualy, then merge the output @@ -187,7 +213,7 @@ for run in sorted(content): lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}' shutil.rmtree(lumi_path, ignore_errors=True) os.makedirs(lumi_path) - cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path) + cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path) # merge all lumisetions data @@ -214,7 +240,10 @@ for run in sorted(content): # lumisection data and EoLS files lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*') for f in lumi_files: - shutil.move(f, run_path + '/') + target = run_path + f.removeprefix(lumi_path) + shutil.move(f, target) + if f.endswith('.raw'): + converted_files.append(target) # read the partial EoR file eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn' @@ -239,7 +268,42 @@ for run in sorted(content): # write the final EoR file # implemented by hand instead of using json.dump() to match the style used by the DAQ tools + assert len(converted_files) == summary['data'][1] eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn' - f = open(eor_file, 'w') - f.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source'])) - f.close() + with open(eor_file, 'w') as file: + file.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source'])) + file.close() + + # mark the .raw files as not executable + for f in converted_files: + os.chmod(f, 0o644) + + # write a cff file for processing the converted files + cff_file = args.output_directory + f'/run{run:06d}_cff.py' + with open(cff_file, 'w') as file: + file.write("""import FWCore.ParameterSet.Config as cms + +from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source +source = _source.clone( + eventChunkSize = 200, # MB + eventChunkBlock = 200, # MB + numBuffers = 4, + maxBufferedFiles = 4, + fileListMode = True, + fileNames = ( +%s + ) +) + +from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector +EvFDaqDirector = _EvFDaqDirector.clone( + buBaseDir = '%s', + runNumber = %d +) + +from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService +FastMonitoringService = _FastMonitoringService.clone() +""" % ('\n'.join(" '" + f + "'," for f in converted_files), args.output_directory, run)) + file.close() + + # all done