From 19f98cf27aefce71d4f75f7b399b8623ce6fabce Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Fri, 12 Aug 2022 16:48:13 +0200
Subject: [PATCH 1/3] Change the default to 100 events per file

---
 HLTrigger/Tools/scripts/convertToRaw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw
index ea688bcc71ca5..c9ff2e164c64d 100755
--- a/HLTrigger/Tools/scripts/convertToRaw
+++ b/HLTrigger/Tools/scripts/convertToRaw
@@ -27,8 +27,8 @@ def cmsRun(config, **args):
 
 
 # default values
-events_per_file = 50
-events_per_lumi = 11650
+events_per_file = 100
+events_per_lumi = 11655
 output_directory = ''
 
 parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)

From 884604ab2c8568038e2dda287660cd46a9915f10 Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Fri, 12 Aug 2022 22:40:28 +0200
Subject: [PATCH 2/3] Add an option to selct the run and lumi range

Add the -r/--range option to restrict the processing to a range of runs
and lumisections.
---
 HLTrigger/Tools/scripts/convertToRaw | 40 ++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw
index c9ff2e164c64d..0429d15567517 100755
--- a/HLTrigger/Tools/scripts/convertToRaw
+++ b/HLTrigger/Tools/scripts/convertToRaw
@@ -26,6 +26,43 @@ def cmsRun(config, **args):
         sys.exit(status.returncode)
 
 
+class LuminosityBlockRange:
+    def __init__(self, value: str = '') -> None:
+        self.min_run = 0
+        self.max_run = 0
+        self.min_lumi = 0
+        self.max_lumi = 0
+        if value and value != 'all':
+            ((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value)
+
+    @staticmethod
+    def parse_value(value: str) -> int:
+        return 0 if value in ('', 'min', 'max') else int(value)
+
+    @staticmethod
+    def parse_value_pair(value: str) -> (int, int):
+        if value.count(':') > 1:
+            raise ValueError('invalid syntax')
+        (first, second) = value.split(':') if ':' in value else ('', value)
+        return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second)
+
+    @staticmethod
+    def parse_range(value: str) -> ((int, int), (int, int)):
+        if value.count('-') > 1:
+            raise ValueError('invalid syntax')
+        (first, second) = value.split('-') if '-' in value else (value, value)
+        return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second)
+
+    def is_in_range(self, run: int, lumi: int) -> bool:
+        return (
+            (self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or
+            (self.min_run != 0 and self.min_run < run)
+        ) and (
+            (self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or
+            (self.min_run != 0 and self.max_run > run)
+        )
+
+
 # default values
 events_per_file = 100
 events_per_lumi = 11655
@@ -36,6 +73,7 @@ parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input f
 parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created')
 parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
 parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
+parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range')
 parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')
 
 # parse the command line arguments and options
@@ -86,6 +124,8 @@ for f in files:
 
         if parsing:
             run, lumi, events = tuple(map(int, line.split()))
+            if not args.range.is_in_range(run, lumi):
+                continue
             if not run in content:
                 content[run] = {}
             if not lumi in content[run]:

From d9113f05b9eee4c49e967c2af254a4b9b712c745 Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Sat, 13 Aug 2022 14:32:08 +0200
Subject: [PATCH 3/3] Improve convertToRaw

After processing each run, write a cff.py file with the Source and the
necessary Services to process the converted files.

Improve the handling of the output and exit status of the subprocesses,
and add a -v/--verbose option to print additional message while
preprocessing and converting the input files.
---
 HLTrigger/Tools/scripts/convertToRaw | 92 +++++++++++++++++++++++-----
 1 file changed, 78 insertions(+), 14 deletions(-)

diff --git a/HLTrigger/Tools/scripts/convertToRaw b/HLTrigger/Tools/scripts/convertToRaw
index 0429d15567517..1af03633cfa1e 100755
--- a/HLTrigger/Tools/scripts/convertToRaw
+++ b/HLTrigger/Tools/scripts/convertToRaw
@@ -10,19 +10,27 @@ import socket
 import subprocess
 import sys
 
-def cmsRun(config, **args):
+def cmsRun(config: str, verbose: bool, **args):
     cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ]
     sys.stdout.write(' \\\n  '.join(cmd))
     sys.stdout.write('\n\n')
-    status = subprocess.run(cmd, stdout=None, stderr=None)
-    status.check_returncode()
+    if verbose:
+        status = subprocess.run(cmd, stdout=None, stderr=None)
+    else:
+        status = subprocess.run(cmd, capture_output=True, text=True)
 
     # handle error conditions
     if status.returncode < 0:
         sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode)
+        if not verbose:
+            sys.stderr.write('\n')
+            sys.stderr.write(status.stderr)
         sys.exit(status.returncode)
     elif status.returncode > 0:
         sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode)
+        if not verbose:
+            sys.stderr.write('\n')
+            sys.stderr.write(status.stderr)
         sys.exit(status.returncode)
 
 
@@ -74,6 +82,7 @@ parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar
 parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
 parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
 parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range')
+parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files')
 parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')
 
 # parse the command line arguments and options
@@ -97,16 +106,24 @@ content = {}
 for f in files:
 
     # run edmFileUtil --eventsInLumis ...
-    output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
+    print(f'preprocessing input file {f}')
+    if args.verbose:
+        output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], stdout=None, stderr=None)
+    else:
+        output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
+
+    # handle error conditions
     if output.returncode < 0:
         sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode)
-        sys.stderr.write('\n')
-        sys.stderr.write(output.stderr)
+        if not args.verbose:
+            sys.stderr.write('\n')
+            sys.stderr.write(output.stderr)
         sys.exit(output.returncode)
     elif output.returncode > 0:
         sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode)
-        sys.stderr.write('\n')
-        sys.stderr.write(output.stderr)
+        if not args.verbose:
+            sys.stderr.write('\n')
+            sys.stderr.write(output.stderr)
         sys.exit(output.returncode)
 
     # parse the output of edmFileUtil
@@ -125,15 +142,22 @@ for f in files:
         if parsing:
             run, lumi, events = tuple(map(int, line.split()))
             if not args.range.is_in_range(run, lumi):
+                print(f'  run {run}, lumisetion {lumi} is outside of the given range and will be skipped')
+                continue
+            if events == 0:
+                print(f'  run {run}, lumisetion {lumi} is empty and will be skipped')
                 continue
+            print(f'  run {run}, lumisetion {lumi} with {events} events will be processed')
             if not run in content:
                 content[run] = {}
             if not lumi in content[run]:
                 content[run][lumi] = FileInfo()
             content[run][lumi].events += events
             content[run][lumi].files.add(f)
+    print()
 
 # drop empty lumisections
+# note: this may no longer be needed, but is left as a cross check
 for run in content:
     empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ]
     for lumi in empty_lumis:
@@ -157,6 +181,7 @@ if not os.path.exists(config_py):
     sys.exit(1)
 
 # convert the input data to FED RAW data format
+converted_files = []
 
 # process each run
 for run in sorted(content):
@@ -170,7 +195,8 @@ for run in sorted(content):
         # process the whole run
         lumis = sorted(content[run])
         print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis)))
-        cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory)
+        cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory)
+        converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw')
 
     else:
         # process lumisections individualy, then merge the output
@@ -187,7 +213,7 @@ for run in sorted(content):
             lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
             shutil.rmtree(lumi_path, ignore_errors=True)
             os.makedirs(lumi_path)
-            cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path)
+            cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path)
 
             # merge all lumisetions data
 
@@ -214,7 +240,10 @@ for run in sorted(content):
             # lumisection data and EoLS files
             lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*')
             for f in lumi_files:
-                shutil.move(f, run_path + '/')
+                target = run_path + f.removeprefix(lumi_path)
+                shutil.move(f, target)
+                if f.endswith('.raw'):
+                    converted_files.append(target)
 
             # read the partial EoR file
             eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn'
@@ -239,7 +268,42 @@ for run in sorted(content):
 
         # write the final EoR file
         # implemented by hand instead of using json.dump() to match the style used by the DAQ tools
+        assert len(converted_files) == summary['data'][1]
         eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn'
-        f = open(eor_file, 'w')
-        f.write('{\n   "data" : [ "%d", "%d", "%d", "%d" ],\n   "definition" : "%s",\n   "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
-        f.close()
+        with open(eor_file, 'w') as file:
+            file.write('{\n   "data" : [ "%d", "%d", "%d", "%d" ],\n   "definition" : "%s",\n   "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
+            file.close()
+
+    # mark the .raw files as not executable
+    for f in converted_files:
+        os.chmod(f, 0o644)
+
+    # write a cff file for processing the converted files
+    cff_file = args.output_directory + f'/run{run:06d}_cff.py'
+    with open(cff_file, 'w') as file:
+        file.write("""import FWCore.ParameterSet.Config as cms
+
+from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source
+source = _source.clone(
+    eventChunkSize = 200,   # MB
+    eventChunkBlock = 200,  # MB
+    numBuffers = 4,
+    maxBufferedFiles = 4,
+    fileListMode = True,
+    fileNames = (
+%s
+    )
+)
+
+from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector
+EvFDaqDirector = _EvFDaqDirector.clone(
+    buBaseDir = '%s',
+    runNumber = %d
+)
+
+from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService
+FastMonitoringService = _FastMonitoringService.clone()
+""" % ('\n'.join("        '" + f + "'," for f in converted_files), args.output_directory, run))
+        file.close()
+
+    # all done