cms-btv-pog · mondalspandan · Oct 12, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 26, 2023
diff --git a/README.md b/README.md
@@ -697,6 +697,7 @@ python runner.py --workflow emctag_ttdilep_sf --json metadata/test_bta_run3.json
 - `ci:skip syst` : remove `--isSyst all` option
 - `ci:JERC_split` : change systematic option to split JERC uncertainty sources `--isSyst JERC_split`
 - `ci:weight_only` : change systematic option to weight only variations `--isSyst weight_only`
+
 
 ### Running jupyter remotely
 1. On your local machine, edit `.ssh/config`:

diff --git a/metadata/data_Summer23BPix_2023_jetmet_BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/metadata/data_Summer23BPix_2023_jetmet_BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/metadata/data_Summer23_2023_jetmet_BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/metadata/data_Summer23_2023_jetmet_BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/runner.py b/runner.py
@@ -235,8 +235,8 @@ def get_main_parser():
     if args.output == parser.get_default("output"):
         index = args.samplejson.rfind("/") + 1
         sample_json = args.samplejson[index:]
-        histoutdir = f"hists_{args.workflow}_{sample_json.rstrip('.json')}"
-        outdir = f"arrays_{args.workflow}_{sample_json.rstrip('.json')}"
+        histoutdir = f"hists_{args.workflow}"
+        outdir = f"arrays_{args.workflow}"
         coffeaoutput = (
             f'{histoutdir}/hists_{args.workflow}_{(sample_json).rstrip(".json")}.coffea'
         )
@@ -267,7 +267,7 @@ def get_main_parser():
             isamp = int(args.only)
             nsamp = len(sample_dict.keys())
             if isamp >= nsamp:
-                print(
+                raise RuntimeError(
                     f"There are {nsamp} datasets, please use --only n with n<{nsamp}."
                 )
             key = list(sample_dict.keys())[isamp]
@@ -350,10 +350,7 @@ def get_main_parser():
         raise Exception(f"{coffeaoutput} exists")
 
     if args.isArray:
-        if path.exists(outdir) and args.overwrite == False and args.only is None:
-            raise Exception("Directory exists")
-        else:
-            os.system(f"mkdir -p {outdir}")
+        os.system(f"mkdir -p {outdir}")
 
     if args.executor not in ["futures", "iterative", "dask/lpc", "dask/casa"]:
         """

diff --git a/scripts/dohadd.py b/scripts/dohadd.py
@@ -0,0 +1,23 @@
+import os, sys
+from glob import glob
+
+indir = sys.argv[1]
+
+systs = os.listdir(indir)
+
+outfile = open("hadd.sh", "w")
+
+for syst in systs:
+    roots = glob(f"{indir}/{syst}/*/*.root")
+    if len(roots) == 0:
+        print(f"Skipping {indir}/{syst}. Not the right directory structure.")
+        continue
+    samps = os.listdir(f"{indir}/{syst}")
+    for samp in samps:
+        if len(glob(f"{indir}/{syst}/{samp}/*.root")) == 0:
+            continue
+        outfile.write(
+            f"hadd -v 0 {indir}/{syst}/{samp}.root {indir}/{syst}/{samp}/*.root\n"
+        )
+
+print("Now run `parallel :::: hadd.sh` from an environment with ROOT installed. E.g. \nconda activate rootenv\nparallel :::: hadd.sh\nconda activate btv_coffea")
diff --git a/scripts/dump_processed.py b/scripts/dump_processed.py
@@ -30,14 +30,17 @@ def dump_dataset(output, fname, alljson):
     original_list, list_from_coffea = {}, {}
     for j in jsonlist:
         old = json.load(open(j))
-        original_list[j] = []
         for o in old.keys():
+            if o not in original_list.keys():
+                original_list[o] = []
             original_list[o].append(old[o])
 
     for m in output.keys():
-        list_from_coffea[m] = []
         for f in output[m].keys():
-            list_from_coffea[f].append(list(set(output[m][f]["fname"])))
+            if f not in list_from_coffea.keys():
+                list_from_coffea[f] = []
+            else:
+                list_from_coffea[f] += list(set(output[m][f]["fname"]))
     failed = {}
     for t in original_list.keys():
         failed[t] = []

diff --git a/scripts/fetch.py b/scripts/fetch.py
@@ -62,6 +62,24 @@
     action="store_true",
     default=False,
 )
+parser.add_argument(
+    '-r', "--redirector",
+    help="xrootd ridirector in case sites are not found",
+    choices=["infn","fnal","cern"],
+    default="infn"
+)
+parser.add_argument(
+    '-j', "--ncpus",
+    help="Number of CPUs to use for validation",
+    default="4"
+)
+parser.add_argument(
+    "--skipvalidation",
+    action="store_true",
+    help="If true, the readability of files will not be validated.",
+    default=False,
+)
+
 parser.add_argument("--campaign", help="campaign info", default=None, type=str)
 
 
@@ -210,9 +228,14 @@ def getFilesFromDas(args):
 
         if xrd is None:
             print(
-                f"No SITE available in the whitelist for file {dsname}, change to global redirector"
+                f"No SITE available in the whitelist for file {dsname}, change to global redirector: {args.redirector}"
             )
-            xrd = "root://xrootd-cms.infn.it//"
+            redirector = {
+                "infn": "root://xrootd-cms.infn.it//",
+                "fnal": "root://cmsxrootd.fnal.gov/",
+                "cern": "root://cms-xrd-global.cern.ch/"
+            }
+            xrd = redirector[args.redirector]
         if args.limit is not None:
             flist = flist[: args.limit]
         if dsname not in fdict:
@@ -335,7 +358,7 @@ def remove_bad_files(sample_dict, outname, remove_bad=True):
         _rmap = p_map(
             validate,
             sample_dict[sample],
-            num_cpus=4,
+            num_cpus=int(args.ncpus),
             desc=f"Validating {sample[:20]}...",
         )
 
@@ -373,6 +396,7 @@ def main(args):
         outf = open(args.input + "_DAS_" + args.campaign, "w")
         short_campaign = args.campaign
         for l in f.readlines():
+            print(l)
             l = l.replace("\n", "")
             dataset = (
                 os.popen(
@@ -418,7 +442,8 @@ def main(args):
             empty = False
     assert empty, "you have empty lists"
     output_file = "./%s" % (args.output)
-    # fdict = remove_bad_files(fdict, args.output, True)  # remove bad files
+    if not args.skipvalidation:
+        fdict = remove_bad_files(fdict, args.output, True)  # remove bad files
     with open(output_file, "w") as fp:
         json.dump(fdict, fp, indent=4)
         print("The file is saved at: ", output_file)

diff --git a/src/BTVNanoCommissioning/data/JME/Summer22/jec_compiled.pkl.gz b/src/BTVNanoCommissioning/data/JME/Summer22/jec_compiled.pkl.gz