Merge pull request #41168 from missirol/devel_testDataFormatsScouting…

…_130X a first unit test for backward compatibility of Scouting data formats [`13_0_X`]
cms-sw · Apr 26, 2023 · 93abea9 · 93abea9
2 parents dfb0603 + a777727
commit 93abea9
Show file tree

Hide file tree

Showing 7 changed files with 721 additions and 0 deletions.
diff --git a/DataFormats/Scouting/test/BuildFile.xml b/DataFormats/Scouting/test/BuildFile.xml
@@ -0,0 +1,2 @@
+<test name="testDataFormatsScoutingRun2" command="testDataFormatsScoutingRun2.sh"/>
+<test name="testDataFormatsScoutingRun3" command="testDataFormatsScoutingRun3.sh"/>
diff --git a/DataFormats/Scouting/test/scoutingCollectionsDumper.py b/DataFormats/Scouting/test/scoutingCollectionsDumper.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+import os
+import argparse
+import glob
+import fnmatch
+import ROOT
+
+from DataFormats.FWLite import Runs, Events, Handle
+
+def getScoutingProductsList(key):
+    '''Returns list of tuples (type, label) for EDM Scouting collections
+       - 'key' must be "Scouting" (data formats before Run 3), or "Run3Scouting"
+    '''
+    if key == 'Scouting':
+        return [
+            ('vector<ScoutingCaloJet>', 'hltScoutingCaloPacker'),
+            ('vector<ScoutingElectron>', 'hltScoutingEgammaPacker'),
+            ('vector<ScoutingMuon>', 'hltScoutingMuonPackerCalo'),
+            ('vector<ScoutingPFJet>', 'hltScoutingPFPacker'),
+            ('vector<ScoutingParticle>', 'hltScoutingPFPacker'),
+            ('vector<ScoutingPhoton>', 'hltScoutingEgammaPacker'),
+            ('vector<ScoutingTrack>', 'hltScoutingTrackPacker'),
+            ('vector<ScoutingVertex>', 'hltScoutingPrimaryVertexPacker:primaryVtx'),
+        ]
+    elif key == 'Run3Scouting':
+        return [
+            ('vector<Run3ScoutingElectron>', 'hltScoutingEgammaPacker'),
+            ('vector<Run3ScoutingMuon>', 'hltScoutingMuonPacker'),
+            ('vector<Run3ScoutingPFJet>', 'hltScoutingPFPacker'),
+            ('vector<Run3ScoutingParticle>', 'hltScoutingPFPacker'),
+            ('vector<Run3ScoutingPhoton>', 'hltScoutingEgammaPacker'),
+            ('vector<Run3ScoutingTrack>', 'hltScoutingTrackPacker'),
+            ('vector<Run3ScoutingVertex>', 'hltScoutingMuonPacker:displacedVtx'),
+            ('vector<Run3ScoutingVertex>', 'hltScoutingPrimaryVertexPacker:primaryVtx')
+        ]
+    else:
+        raise RuntimeError(f'getScoutingProductsList -- invalid key (must be "Scouting", or "Run3Scouting"): "{key}"')
+
+def printScoutingVar(name, value):
+    '''Print content of data member of Scouting object
+    '''
+    if isinstance(value, ROOT.Run3ScoutingHitPatternPOD):
+        for subvar in [
+            'hitCount',
+            'beginTrackHits',
+            'endTrackHits',
+            'beginInner',
+            'endInner',
+            'beginOuter',
+            'endOuter',
+            'hitPattern',
+        ]:
+            subvalue = getattr(value, subvar)
+            print(f'      {name}.{subvar} = {subvalue}')
+    else:
+        print(f'      {name} = {value}')
+
+def printScoutingProduct(product_label, product_type, product, verbosity):
+    '''Print content of EDM product
+    '''
+    if verbosity == 0:
+        return
+
+    productIsVector = product_type.startswith('vector')
+
+    productInfoStr = f'Product Label: "{product_label}" (type: "{product_type}")'
+    if productIsVector:
+        productInfoStr += f', size = {product.size()}'
+
+    print(f'\n  {productInfoStr}')
+
+    if not productIsVector:
+        printScoutingVar('value', product[0])
+        return
+
+    obj_idx = 0
+    for obj in product:
+        # print only first N objects, where N corresponds to verbosity (if positive)
+        if verbosity > 0 and obj_idx >= verbosity:
+            break
+
+        # names of data members to print
+        if obj_idx == 0:
+            varNames = sorted([foo for foo in dir(obj) if not fnmatch.fnmatch(foo, '__*__')])
+
+        print(f'\n    Object #{obj_idx}')
+        obj_idx += 1
+        for varName in varNames:
+            varValue = getattr(obj, varName)()
+            printScoutingVar(varName, varValue)
+
+def analyseEvent(event, productList, verbosity = -1):
+    '''Function to analyse a single EDM Event
+    '''
+    if verbosity != 0:
+        print('-'*50)
+        print(f'Run             = {event.eventAuxiliary().run()}')
+        print(f'LuminosityBlock = {event.eventAuxiliary().luminosityBlock()}')
+        print(f'Event           = {event.eventAuxiliary().event()}')
+
+    for productType, productLabel in productList:
+        productHandle = Handle(productType)
+        event.getByLabel(productLabel, productHandle)
+        if productHandle.isValid():
+            printScoutingProduct(productLabel, productType, productHandle.product(), verbosity)
+
+    if verbosity != 0:
+        print('-'*50)
+
+def getInputFiles(inputList):
+    '''List of input files (after resolving wildcards, removing duplicates, and sorting)
+    '''
+    ret = set()
+    for input_i in inputList:
+        inputFiles_i = glob.glob(input_i)
+        if len(inputFiles_i) == 0:
+            inputFiles_i = [input_i]
+        for input_j in inputFiles_i:
+            ret.add(os.path.abspath(os.path.realpath(input_j)) if os.path.isfile(input_j) else input_j)
+    return sorted(list(ret))
+
+###
+### main
+###
+if __name__ == '__main__':
+    ## args
+    parser = argparse.ArgumentParser(
+        description = 'FWLite script to print to stdout content of Scouting collections in EDM files.',
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    parser.add_argument('-i', '--inputs', dest='inputs', required=True, nargs='+', default=None,
+                        help='List of EDM files in ROOT format')
+
+    parser.add_argument('-s', '--skipEvents', dest='skipEvents', action='store', type=int, default=0,
+                        help='Index of first event to be processed (inclusive)')
+
+    parser.add_argument('-n', '--maxEvents', dest='maxEvents', action='store', type=int, default=-1,
+                        help='Maximum number of events to be processed (inclusive)')
+
+    parser.add_argument('-k', '--key', dest='key', action='store', type=str, choices=['Scouting', 'Run3Scouting'], default='Scouting',
+                        help='Keyword to select Scouting DataFormats (must be "Scouting", or "Run3Scouting")')
+
+    parser.add_argument('-v', '--verbosity', dest='verbosity', action='store', type=int, default=-1,
+                        help='Level of verbosity')
+
+    opts, opts_unknown = parser.parse_known_args()
+
+    log_prx = os.path.basename(__file__)+' --'
+
+    ## args validation
+    if len(opts_unknown) > 0:
+        raise RuntimeError(f'{log_prx} unrecognized command-line arguments: {opts_unknown}')
+
+    inputFiles = getInputFiles(opts.inputs)
+
+    if len(inputFiles) == 0:
+        raise RuntimeError(f'{log_prx} empty list of input files [-i]')
+
+    ## Event Loop
+    nEvtRead, nEvtProcessed = 0, 0
+    skipEvents = max(0, opts.skipEvents)
+
+    scoutingProductsList = getScoutingProductsList(opts.key)
+
+    for input_file in inputFiles:
+        try:
+            events = Events(input_file)
+            for event in events:
+                nEvtRead += 1
+                if (nEvtRead <= skipEvents) or ((opts.maxEvents >= 0) and (nEvtProcessed >= opts.maxEvents)):
+                    continue
+
+                analyseEvent(event = event, productList = scoutingProductsList, verbosity = opts.verbosity)
+                nEvtProcessed += 1
+
+        except:
+            print(f'{log_prx} failed to analyse TFile (file will be ignored): {input_file}')
+            continue
+
+    if opts.verbosity != 0:
+        print(f'Events processed = {nEvtProcessed}')
diff --git a/DataFormats/Scouting/test/scoutingCollectionsIO_cfg.py b/DataFormats/Scouting/test/scoutingCollectionsIO_cfg.py
@@ -0,0 +1,68 @@
+import FWCore.ParameterSet.Config as cms
+
+import argparse
+import sys
+
+parser = argparse.ArgumentParser(
+    prog = 'cmsRun '+sys.argv[0]+' --',
+    description = 'Configuration file to test I/O of Scouting collections.',
+    formatter_class = argparse.ArgumentDefaultsHelpFormatter
+)
+
+parser.add_argument('-t', '--nThreads', type = int, help = 'Number of threads',
+                    default = 1)
+
+parser.add_argument('-s', '--nStreams', type = int, help = 'Number of EDM streams',
+                    default = 0)
+
+parser.add_argument('-i', '--inputFiles', nargs = '+', help = 'List of EDM input files',
+                    default = ['/store/mc/Run3Summer22DR/GluGlutoHHto2B2Tau_kl-5p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV_powheg-pythia8/GEN-SIM-RAW/124X_mcRun3_2022_realistic_v12-v2/2550000/bbfb86f3-4073-47e3-967b-059aa6b904ad.root'])
+
+parser.add_argument('-n', '--maxEvents', type = int, help = 'Max number of input events to be processed',
+                    default = 10)
+
+parser.add_argument('--skipEvents', type = int, help = 'Number of input events to be skipped',
+                    default = 0)
+
+parser.add_argument('-o', '--outputFile', type = str, help = 'Path to output EDM file in ROOT format',
+                    default = 'scoutingCollectionsIO_output.root')
+
+parser.add_argument('--wantSummary', action = 'store_true', help = 'Value of process.options.wantSummary',
+                    default = False)
+
+argv = sys.argv[:]
+if '--' in argv:
+    argv.remove('--')
+
+args, unknown = parser.parse_known_args(argv)
+
+# Process
+process = cms.Process('TEST')
+
+process.options.numberOfThreads = args.nThreads
+process.options.numberOfStreams = args.nStreams
+process.options.wantSummary = args.wantSummary
+
+process.maxEvents.input = args.maxEvents
+
+# Source (EDM input)
+process.source = cms.Source('PoolSource',
+    fileNames = cms.untracked.vstring(args.inputFiles),
+    skipEvents = cms.untracked.uint32(args.skipEvents)
+)
+
+# MessageLogger (Service)
+process.load('FWCore.MessageLogger.MessageLogger_cfi')
+process.MessageLogger.cerr.FwkReport.reportEvery = 1
+
+# Output module
+process.testOutput = cms.OutputModule('PoolOutputModule',
+    fileName = cms.untracked.string( args.outputFile ),
+    outputCommands = cms.untracked.vstring(
+        'drop *',
+        'keep *Scouting*_*_*_*',
+    )
+)
+
+# EndPath
+process.testEndPath = cms.EndPath( process.testOutput )
diff --git a/DataFormats/Scouting/test/testDataFormatsScoutingRun2.sh b/DataFormats/Scouting/test/testDataFormatsScoutingRun2.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# Pass in name and status
+function die {
+  printf "\n%s: status %s\n" "$1" "$2"
+  if [ $# -gt 2 ]; then
+    printf "%s\n" "=== Log File =========="
+    cat $3
+    printf "%s\n" "=== End of Log File ==="
+  fi
+  exit $2
+}
+
+# read Scouting collections from existing EDM file, and write them to disk
+cmsRun "${SCRAM_TEST_PATH}"/scoutingCollectionsIO_cfg.py -- \
+  -i /store/mc/RunIISummer20UL18RECO/DoubleElectron_Pt-1To300-gun/AODSIM/FlatPU0to70EdalIdealGT_EdalIdealGT_106X_upgrade2018_realistic_v11_L1v1_EcalIdealIC-v2/270000/4CDD9457-E14C-D84A-9BD4-3140CB6AEEB6.root \
+  -n 150 --skip 900 -o testDataFormatsScoutingRun2_step1.root \
+  > testDataFormatsScoutingRun2_step1.log 2> testDataFormatsScoutingRun2_step1_stderr.log \
+  || die "Failure running scoutingCollectionsIO_cfg.py" $? testDataFormatsScoutingRun2_step1_stderr.log
+
+cat testDataFormatsScoutingRun2_step1.log
+
+# validate content of Scouting collections
+"${SCRAM_TEST_PATH}"/scoutingCollectionsDumper.py -v 1 -n 1 --skip 81 -i testDataFormatsScoutingRun2_step1.root -k Scouting \
+  > testDataFormatsScoutingRun2_step2.log 2> testDataFormatsScoutingRun2_step2_stderr.log \
+  || die "Failure running scoutingCollectionsDumper.py" $? testDataFormatsScoutingRun2_step2_stderr.log
+
+diff -q "${SCRAM_TEST_PATH}"/testDataFormatsScoutingRun2_expected.log testDataFormatsScoutingRun2_step2.log \
+  || die "Unexpected differences in outputs of testDataFormatsScoutingRun2 (step 2)" $?
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		<test name="testDataFormatsScoutingRun2" command="testDataFormatsScoutingRun2.sh"/>
		<test name="testDataFormatsScoutingRun3" command="testDataFormatsScoutingRun3.sh"/>