From 043b424fd8d46f28d58b1e8ef472df2b77d1e4fb Mon Sep 17 00:00:00 2001
From: Marino Missiroli <m.missiroli@cern.ch>
Date: Sun, 18 Feb 2024 20:44:59 +0100
Subject: [PATCH] add option to restrict single-Path jobs of
 hltIntegrationTests to a subset of triggers

---
 .../Configuration/scripts/hltIntegrationTests | 50 ++++++++-----
 HLTrigger/Configuration/scripts/hltListPaths  | 71 +++++++++++--------
 2 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/HLTrigger/Configuration/scripts/hltIntegrationTests b/HLTrigger/Configuration/scripts/hltIntegrationTests
index b88614d1aced9..48be210599144 100755
--- a/HLTrigger/Configuration/scripts/hltIntegrationTests
+++ b/HLTrigger/Configuration/scripts/hltIntegrationTests
@@ -19,7 +19,7 @@ function err() {
 
 NAME=$(basename $0)
 
-HELP="Run the integration tests over all paths in a given HLT menu.
+HELP="Run the integration tests on a given HLT menu.
 
 Usage:
   $NAME -h|--help
@@ -42,6 +42,10 @@ Usage:
        --streams     STREAMS      Run with STREAMS parallel streams (i.e. events) (default 0 means as many streams as threads)
        --threads     THREADS      Run with THREADS threads when running the whole HLT (default 4)
   -a | --accelerator ACCELERATOR  Keyword to choose allowed accelerators (examples: \"*\", \"cpu\", \"gpu-nvidia\")
+  -p | --paths       PATHS        Comma-separated list of Path names (incl. wildcards)
+                                  to select which Paths are tested standalone.
+                                  If a Path-name pattern starts with the dash character (-),
+                                  the Paths whose name matches that pattern will be ignored.
   -x | --extra       OPTIONS      If the HLT menu is a local cmsRun cfg file, OPTIONS is used as
                                   additional arguments to cmsRun (i.e. \"cmsRun hlt.py [OPTIONS]\")
                                   If the HLT menu is the name of a ConfDB configuration, OPTIONS is used as
@@ -95,8 +99,8 @@ Examples:
 "
 
 # parse command line argument and options
-OPTS=$(getopt -n "$NAME" -o "s:d:i:j:n:k:e:a:x:h" \
- -l "setup:,dir:,input:,jobs:,size:,skip:,streams:,threads:,accelerator:,events:,mc,extra:,help,dbproxy,dbproxyhost:,dbproxyport:" -- "$@")
+OPTS=$(getopt -n "$NAME" -o "s:d:i:j:n:k:e:p:a:x:h" \
+ -l "setup:,dir:,input:,jobs:,size:,skip:,streams:,threads:,paths:,accelerator:,events:,mc,extra:,help,dbproxy,dbproxyhost:,dbproxyport:" -- "$@")
 
 # invalid options
 if [ $? != 0 ]; then
@@ -118,6 +122,7 @@ EVENTS=""
 JOBS=4
 THREADS=4
 STREAMS=0
+PATHS=""
 ACCELERATOR="cpu"
 WORKDIR="hltintegration"
 EXTRA=""
@@ -200,6 +205,10 @@ while true; do
       THREADS=$2
       shift 2
       ;;
+    "-p" | "--paths" )
+      PATHS="$2"
+      shift 2
+      ;;
     "-a" | "--accelerator" )
       ACCELERATOR="$2"
       shift 2
@@ -320,10 +329,10 @@ else
   # if ${DATA} is empty, set it to "--data"
   [ "${DATA}" ] || DATA="--data"
   # download HLT menu from ConfDB
-  HLTGETCMD="hltGetConfiguration ${MENU} \
+  HLTGETCMD="hltGetConfiguration ${MENU}
     --process \"TEST$(date -u +'%Y%m%d%H%M%S')\"
-    --full --offline ${DATA} --unprescale \
-    --max-events ${SIZE} ${EXTRA} ${DBPROXYOPTS} --input ${INPUT}"
+    --max-events ${SIZE} --no-prescale --no-output
+    ${DATA} --input ${INPUT} ${EXTRA} ${DBPROXYOPTS}"
   HLTGETCMD=$(echo "${HLTGETCMD}" | xargs)
   log "Creating HLT menu from ConfDB configuration:\n> ${HLTGETCMD}"
   ${HLTGETCMD} > hlt.py
@@ -355,11 +364,11 @@ if [ "${SELECTION}" == "complex" ]; then
 process.source.eventsToProcess = cms.untracked.VEventRange( '$(echo $EVENTS | sed -e"s/,/','/g")' )
 @EOF
 
-elif (( $SKIP > 0 )); then
+elif (( ${SKIP} > 0 )); then
   cat >> hlt.py << @EOF
 
 # event selection customised by hltIntegrationTests
-process.source.skipEvents = cms.untracked.uint32( $SKIP )
+process.source.skipEvents = cms.untracked.uint32( ${SKIP} )
 @EOF
 fi
 
@@ -367,8 +376,8 @@ fi
 cat >> hlt.py << @EOF
 
 # configure multithreading, and allocate 10 MB of stack space per thread
-process.options.numberOfThreads = $THREADS
-process.options.numberOfStreams = $STREAMS
+process.options.numberOfThreads = ${THREADS}
+process.options.numberOfStreams = ${STREAMS}
 process.options.sizeOfStackForThreadsInKB = 10*1024
 # set allowed accelerators
 process.options.accelerators = [ "$ACCELERATOR" ]
@@ -376,17 +385,20 @@ process.options.accelerators = [ "$ACCELERATOR" ]
 process.hltTriggerSummaryAOD.throw = cms.bool( True )
 @EOF
 
-# find the list of all trigger paths
-TRIGGERS=$(hltListPaths hlt.py -p --no-dep --exclude "^HLTriggerFinalPath$")
+# list of trigger Paths to be tested standalone (always exclude HLTriggerFinalPath)
+log "Preparing list of trigger Paths to be tested standalone (paths.txt)"
+[ "${PATHS}" ] || PATHS="*"
+PATHS+=",-HLTriggerFinalPath"
+TRIGGERS=$(hltListPaths hlt.py -p --no-dep --select-paths "${PATHS}")
 echo "${TRIGGERS[@]}" > paths.txt
 
 # print some info
-if [ "$SELECTION" == "complex" ]; then
-  log "Will run $(echo $TRIGGERS | wc -w) HLT paths over $(echo $EVENTS | tr ',' '\n' | wc -l) events, with $JOBS jobs in parallel"
-elif [ "$SIZE" == "-1" ]; then
-  log "Will run $(echo $TRIGGERS | wc -w) HLT paths over all events, with $JOBS jobs in parallel"
+if [ "${SELECTION}" == "complex" ]; then
+  log "Will run full menu and $(echo $TRIGGERS | wc -w) triggers standalone over $(echo ${EVENTS} | tr ',' '\n' | wc -l) events, with ${JOBS} jobs in parallel"
+elif [ "${SIZE}" == "-1" ]; then
+  log "Will run full menu and $(echo ${TRIGGERS} | wc -w) triggers standalone over all events, with ${JOBS} jobs in parallel"
 else
-  log "Will run $(echo $TRIGGERS | wc -w) HLT paths over $SIZE events, with $JOBS jobs in parallel"
+  log "Will run full menu and $(echo ${TRIGGERS} | wc -w) triggers standalone over ${SIZE} events, with ${JOBS} jobs in parallel"
 fi
 
 # check the prescale modules
@@ -427,7 +439,7 @@ if [ "${SETUP}" ]; then
     log "Creating setup_cff from ConfDB configuration: ${SETUP_Vx}/${SETUP_DB}:${SETUP}"
     hltConfigFromDB --${SETUP_Vx} --${SETUP_DB} ${DBPROXYOPTS} --cff --configName "$SETUP" \
       --nopaths --services -FUShmDQMOutputService,-PrescaleService,-EvFDaqDirector,-FastMonitoringService > setup_cff.py
-    sed -i -e's/process = cms.Process(.*)/&\nprocess.load("setup_cff")/' hlt.py $(for TRIGGER in $TRIGGERS; do echo "${TRIGGER}".py; done)
+    sed -i -e's/process = cms.Process(.*)/&\nprocess.load("setup_cff")/' hlt.py $(for TRIGGER in ${TRIGGERS}; do echo "${TRIGGER}".py; done)
   else
     printf "%s\n" "WARNING -- \"--setup ${SETUP}\" will be ignored (failed to deduce name of HLT menu from hlt.py)"
   fi
@@ -435,7 +447,7 @@ fi
 
 # run all HLT dumps
 cat > .makefile << @EOF
-TRIGGERS=$(echo $TRIGGERS)
+TRIGGERS=$(echo ${TRIGGERS})
 CFGS=\$(TRIGGERS:%=%.py)
 LOGS=\$(TRIGGERS:%=%.log)
 DONE=\$(TRIGGERS:%=%.done)
diff --git a/HLTrigger/Configuration/scripts/hltListPaths b/HLTrigger/Configuration/scripts/hltListPaths
index 525b1a60aa081..4f215948c2efc 100755
--- a/HLTrigger/Configuration/scripts/hltListPaths
+++ b/HLTrigger/Configuration/scripts/hltListPaths
@@ -2,28 +2,29 @@
 import os
 import sys
 import argparse
-import re
+import fnmatch
+
 import FWCore.ParameterSet.Config as cms
 import HLTrigger.Configuration.Tools.pipe as pipe
 import HLTrigger.Configuration.Tools.options as options
 from HLTrigger.Configuration.extend_argparse import *
 
-def getPathList(config):
+def getPathList(args):
 
-  if isinstance(config.menu, options.ConnectionHLTMenu):
+  if isinstance(args.menu, options.ConnectionHLTMenu):
     # cmd to download HLT configuration
     cmdline = 'hltConfigFromDB'
-    if config.menu.run:
-      cmdline += f' --runNumber {config.menu.run}'
+    if args.menu.run:
+      cmdline += f' --runNumber {args.menu.run}'
     else:
-      cmdline += f' --{config.menu.database} --{config.menu.version} --configName {config.menu.name}'
+      cmdline += f' --{args.menu.database} --{args.menu.version} --configName {args.menu.name}'
     cmdline += ' --noedsources --noes --noservices'
-    if config.proxy:
-      cmdline += f' --dbproxy --dbproxyhost {config.proxy_host} --dbproxyport {config.proxy_port}'
+    if args.proxy:
+      cmdline += f' --dbproxy --dbproxyhost {args.proxy_host} --dbproxyport {args.proxy_port}'
 
   else:
     # use edmConfigDump to ensure the config can be executed
-    cmdline = f'edmConfigDump {config.menu}'
+    cmdline = f'edmConfigDump {args.menu}'
 
   # load HLT configuration
   try:
@@ -39,24 +40,33 @@ def getPathList(config):
   usePaths, useEndPaths, useFinalPaths = False, False, False
 
   # Paths only
-  if config.selection == 'paths':
+  if args.selection == 'paths':
     usePaths = True
 
   # EndPaths only
-  elif config.selection == 'endpaths':
+  elif args.selection == 'endpaths':
     useEndPaths = True
 
   # FinalPaths only
-  elif config.selection == 'finalpaths':
+  elif args.selection == 'finalpaths':
     useFinalPaths = True
 
   # Paths, EndPaths, and FinalPaths ('all')
-  elif config.selection == 'all':
+  elif args.selection == 'all':
     usePaths, useEndPaths, useFinalPaths = True, True, True
 
   # invalid value
   else:
-    raise RuntimeError(f'ERROR: invalid value for option "--selection" (must be "paths", "endpaths", "finalpaths", or "all"): {config.selection}')
+    raise RuntimeError(f'ERROR: invalid value for option "--selection" (must be "paths", "endpaths", "finalpaths", or "all"): {args.selection}')
+
+  path_keep_rules = []
+  for path_keep_rule in args.path_keep_rules.split(','):
+    if not path_keep_rule:
+      continue
+    keep_rule = not path_keep_rule.startswith('-')
+    pattern_idx = 0 if keep_rule else 1
+    rule_pattern = path_keep_rule[pattern_idx:]
+    path_keep_rules += [(keep_rule, rule_pattern)]
 
   ret = []
   for pathDict in [
@@ -69,17 +79,16 @@ def getPathList(config):
 
     for pathName in pathDict:
 
-      # skip if name of the path matches any of
-      # the regular expressions listed in "--exclude"
-      skipPath = False
-      for excludeRegExpr in config.excludeRegExprs:
-        if bool(re.search(excludeRegExpr, pathName)):
-          skipPath = True
-          break
-      if skipPath:
+      # keep or drop the Path based on whether or not
+      # its name complies with the patterns in path_keep_rules (if any)
+      keepPath = not path_keep_rules
+      for (keep_rule, rule_pattern) in path_keep_rules:
+        if fnmatch.fnmatch(pathName, rule_pattern):
+          keepPath = keep_rule
+      if not keepPath:
         continue
 
-      if config.no_dependent_paths:
+      if args.no_dependent_paths:
         # do not include "dependent paths", i.e. paths that depend on the result of other paths in the same job
         # the current criterion to identify a path as "dependent" is that
         # (1) the path contains a "TriggerResultsFilter" module and
@@ -146,7 +155,7 @@ parser.add_argument('menu',
                     action  = 'store',
                     type    = hltMenu,
                     metavar = 'MENU',
-                    help    = 'HLT menu (can be a local cmsRun configuration file, or the name of a configuration in the ConfDB database). For ConfDB configurations, supported formats are:\n  - /path/to/configuration[/Vn]\n  - [[{v1|v2|v3}/]{run3|run2|online|adg}:]/path/to/configuration[/Vn]\n  - run:runnumber\nThe possible converters are "v1", "v2, and "v3" (default).\nThe possible databases are "run3" (default, used for offline development), "run2" (used for accessing run2 offline development menus), "online" (used to extract online menus within Point 5) and "adg" (used to extract the online menus outside Point 5).\nIf no menu version is specified, the latest one is automatically used.\nIf "run:" is used instead, the HLT menu used for the given run number is looked up and used.\nNote other converters and databases exist as options but they are only for expert/special use.' )
+                    help    = 'HLT menu (can be a local cmsRun configuration file, or the name of a configuration in the ConfDB database).\nFor ConfDB configurations, supported formats are:\n- /path/to/configuration[/Vn]\n- [[{v1|v2|v3}/]{run3|run2|online|adg}:]/path/to/configuration[/Vn]\n- run:runnumber\nThe possible converters are "v1", "v2, and "v3" (default).\nThe possible databases are\n"run3" (default, used for offline development in Run 3),\n"run2" (used for accessing Run-2 offline development menus),\n"online" (used to extract online menus from inside Point 5) and\n"adg" (used to extract the online menus from outside Point 5).\nIf no menu version is specified, the latest one is automatically used.\nIf "run:" is used instead, the HLT menu used for the given run number is looked up and used.\nNote: other converters and databases exist, but they are only for expert/special use.' )
 
 # options
 parser.add_argument('--dbproxy',
@@ -196,11 +205,11 @@ parser.add_argument('--no-dependent-paths',
                     default = False,
                     help    = 'Do not list paths which depend on the result of other paths (default: false)' )
 
-parser.add_argument('--exclude',
-                    dest    = 'excludeRegExprs',
-                    nargs   = '+',
-                    default = [],
-                    help    = 'List of regular expressions to select names of paths to be ignored with re.search (default: empty)' )
+parser.add_argument('-s', '--select-paths',
+                    dest    = 'path_keep_rules',
+                    action  = 'store',
+                    default = '',
+                    help    = 'Comma-separated list of Path-name patterns (incl. wildcards) to select a subset of Paths using fnmatch.\nIf a Path-name pattern starts with the dash character (-), the Paths whose name matches that pattern are not selected.\nThe patterns are ordered: a given pattern can override previous ones (example: "*,-Foo,*" retains all Paths)\n(default: empty, meaning all Paths are kept)')
 
 # redefine "--help" to be the last option, and use a customized message 
 parser.add_argument('-h', '--help', 
@@ -208,8 +217,8 @@ parser.add_argument('-h', '--help',
                     help    = 'Show this help message and exit' )
 
 # parse command line arguments and options
-config = parser.parse_args()
+args = parser.parse_args()
 
-paths = getPathList(config)
+paths = getPathList(args)
 for path in paths:
   print(path)