fix: restructure files

- scripts: add debug info - wf: adapt campaign and years - utils, helper: clean up JEC
cms-btv-pog · Nov 5, 2024 · 526f8be · 526f8be
1 parent 63a3025
commit 526f8be
Show file tree

Hide file tree

Showing 21 changed files with 386 additions and 454 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,29 @@
+
+# This file is a template, and might need editing before it works on your project.
+# This is a sample GitLab CI/CD configuration file that should run without any modifications.
+# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts,
+# it uses echo commands to simulate the pipeline execution.
+#
+# A pipeline is composed of independent jobs that run scripts, grouped into stages.
+# Stages run in sequential order, but jobs within stages run in parallel.
+#
+# For more information, see: https://docs.gitlab.com/ee/ci/yaml/index.html#stages
+#
+# You can copy and paste this template into a new `.gitlab-ci.yml` file.
+# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword.
+#
+# To contribute improvements to CI/CD templates, please follow the Development guide at:
+# https://docs.gitlab.com/ee/development/cicd/templates.html
+# This specific template is located at:
+# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml
+
+stages:          # List of stages for jobs, and their order of execution
+  - deploy
+
+deploy-job:      # This job runs in the deploy stage.
+  stage: deploy  # It only runs when *both* jobs in the test stage complete successfully.
+  script:
+    - 'curl --fail --request POST --form token=$MY_TRIGGER_TOKEN --form ref=master "https://gitlab.cern.ch/cms-analysis/btv/software-and-algorithms/autobtv/trigger/pipeline"'
+  rules:
+    - if: $CI_COMMIT_TAG
+  environment: production
diff --git a/scripts/fetch.py b/scripts/fetch.py
@@ -61,7 +61,7 @@
 parser.add_argument(
     "--whitelist_sites",
     help="White list fot sites",
-    default="T2_DE_DESY,T2_DE_RWTH,T2_CH_CERN",
+    default=None,
 )
 parser.add_argument(
     "--blacklist_sites",
@@ -197,7 +197,7 @@ def getFilesFromDas(args):
             .read()
             .split("\n")
         )
-
+        print("Number of files: ", len(flist))
         import json
 
         dataset = dataset[:-1] if "\n" in dataset else dataset

diff --git a/scripts/suball.py b/scripts/suball.py
@@ -1,6 +1,7 @@
 import os, argparse
 from BTVNanoCommissioning.workflows import workflows
 from BTVNanoCommissioning.utils.sample import predefined_sample
+from BTVNanoCommissioning.utils.AK4_parameters import correction_config
 import os, sys, inspect
 
 current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
@@ -10,6 +11,42 @@
 from runner import config_parser, scaleout_parser, debug_parser
 
 
+# Get lumi
+def get_lumi_from_web(year):
+    import requests
+    import re
+
+    year = str(year)
+    # Define the URL of the directory
+    url = (
+        f"https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions{year[2:]}/"
+    )
+
+    # Send a request to fetch the HTML content of the webpage
+    response = requests.get(url)
+    html_content = response.text
+
+    # Use regex to find all href links that contain 'Golden.json' but do not contain 'era'
+    # Ensures it only captures the URL part within href="..." and not any other content.
+    goldenjson_files = re.findall(r'href="([^"]*Golden\.json[^"]*)"', html_content)
+
+    # Filter out any matches that contain 'era' in the filename
+    goldenjson_files = [file for file in goldenjson_files if "era" not in file]
+
+    # If there are any such files, find the latest one (assuming the files are sorted lexicographically)
+    if goldenjson_files:
+        latest_file = sorted(goldenjson_files)[
+            -1
+        ]  # Assuming lexicographical sorting works for the dates
+        os.system(f"wget {url}/{latest_file}")
+        os.system(f"mv {latest_file} src/BTVNanoCommissioning/data/lumiMasks/.")
+        return latest_file
+    else:
+        raise (
+            f"No files for Year{year} containing 'Golden.json' (excluding 'era') were found."
+        )
+
+
 ### Manage workflow in one script
 # EXAMPLE: python scripts/suball.py --scheme default_comissioning --campaign Summer23  --DAS_campaign "*Run2023D*Sep2023*,*Run3Summer23BPixNanoAODv12-130X*" --year 2023
 # prerequest a new campaign should create a entry in AK4_parameters.py
@@ -26,7 +63,7 @@
     parser.add_argument(
         "-sc",
         "--scheme",
-        default="CAMPAIGN_prompt_dataMC",
+        default="Validation",
         choices=list(workflows.keys()) + ["Validation", "SF", "default_comissioning"],
         help="Choose the function for dump luminosity(`lumi`)/failed files(`failed`) into json",
     )
@@ -43,6 +80,11 @@
         action="store_true",
         help="not transfered to https://btvweb.web.cern.ch/Commissioning/dataMC/",
     )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run local debug test with small set of dataset with iterative executor",
+    )
 
     args = parser.parse_args()
     # summarize diffeerent group for study
@@ -64,8 +106,26 @@
     if args.scheme in workflows.keys():
         workflow_group["test"] = [args.scheme]
         args.scheme = "test"
+    # Check lumiMask exists and replace the Validation
+    input_lumi_json = correction_config[args.campaign]["lumiMask"]
+    if args.campaign != "prompt_dataMC" and not os.path.exists(
+        f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json}"
+    ):
+        raise f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json} not exist"
+
+    if (
+        args.campaign == "prompt_dataMC"
+        and correction_config[args.campaign]["lumiMask"] == "$PROMPT_DATAMC"
+    ):
+        input_lumi_json = get_lumi_from_web(args.year)
+        os.system(
+            f"sed -i 's/$PROMPT_DATAMC/{input_lumi_json}/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
+        )
+        print(f"======>{input_lumi_json} is used for {args.year}")
 
     for wf in workflow_group[args.scheme]:
+        if args.debug:
+            print(f"======{wf} in {args.scheme}=====")
         overwrite = "--overwrite" if args.overwrite else ""
         ## creating dataset
         if (
@@ -74,19 +134,37 @@
             )
             or args.overwrite
         ):
+            if args.debug:
+                print(
+                    f"Creating MC dataset: python scripts/fetch.py -c {args.campaign} --from_workflow {wf} --DAS_campaign {args.DAS_campaign} --year {args.year} {overwrite} --skipvalidation"
+                )
+
             os.system(
                 f"python scripts/fetch.py -c {args.campaign} --from_workflow {wf} --DAS_campaign {args.DAS_campaign} --year {args.year} {overwrite} --skipvalidation"
             )
+            if args.debug:
+                os.system(f"ls metadata/{args.campaign}/*.json")
+
         ## Run the workflows
         for types in predefined_sample[wf].keys():
+
             if (types != "data" or types != "MC") and args.scheme == "Validation":
                 continue
+            print(
+                f"hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}/hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}.coffea"
+            )
             if (
                 not os.path.exists(
                     f"hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}/hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}.coffea"
                 )
                 or args.overwrite
             ):
+                if not os.path.exists(
+                    f"metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json"
+                ):
+                    raise Exception(
+                        f"metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json not exist"
+                    )
                 runner_config_required = f"python runner.py --wf {wf} --json metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json {overwrite} --campaign {args.campaign} --year {args.year}"
                 runner_config = ""
                 for key, value in vars(args).items():
@@ -100,6 +178,7 @@
                         "DAS_campaign",
                         "version",
                         "local",
+                        "debug",
                     ]:
                         continue
                     if key in [
@@ -112,25 +191,48 @@
                         if value == True:
                             runner_config += f" --{key}"
                     elif value is not None:
-                        if "Validation" == args.scheme and types == "MC":
+                        if (
+                            "Validation" == args.scheme
+                            and types == "MC"
+                            and "limit" not in key
+                        ):
                             runner_config += " --limit 50"
+
                         else:
                             runner_config += f" --{key}={value}"
                 runner_config = runner_config_required + runner_config
-                print(runner_config)
-                os.system(runner_config)
+                if args.debug:
+                    print(f"run the workflow: {runner_config}")
+                with open(
+                    f"config_{args.year}_{args.campaign}_{args.scheme}_{args.version}.txt",
+                    "w",
+                ) as config_list:
+                    config_list.write(runner_config)
 
+                os.system(runner_config)
+        if args.debug:
+            print(f"workflow is finished for {wf}!")
         # Get luminosity
         if (
             os.path.exists(
                 f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
             )
             or args.overwrite
         ):
+            if args.debug:
+                print(
+                    f"Get the luminosity from hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
+                )
+            if not os.path.exists(
+                f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea "
+            ):
+                raise Exception(
+                    f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea not exist"
+                )
             lumi = os.popen(
                 f"python scripts/dump_processed.py -t all -c hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea --json metadata/{args.campaign}/data_{args.campaign}_{args.year}_{wf}.json -n {args.campaign}_{args.year}_{wf}"
             ).read()
-
+            print(lumi)
             lumi = int(
                 round(
                     float(
@@ -145,14 +247,19 @@
             )
             if os.path.exists(
                 f"hists_{wf}_MC_{args.campaign}_{args.year}_{wf}/hists_{wf}_MC_{args.campaign}_{args.year}_{wf}.coffea"
+            ) and os.path.exists(
+                f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
             ):
-                print(lumi)
+                if args.debug:
+                    print(f"Plot the dataMC for {wf}")
                 os.system(
                     f'python scripts/plotdataMC.py -i "hists_{wf}_*_{args.campaign}_{args.year}_{wf}/hists_{wf}_*_{args.campaign}_{args.year}_{wf}.coffea" --lumi {lumi} -p {wf} -v all --ext {args.campaign}_{args.year}{args.version}'
                 )
                 ## Inspired from Uttiya, create remote directory
                 # https://github.com/cms-btv-pog/BTVNanoCommissioning/blob/14e654feeb4b4d738ee43ab913efb343ea65fd1d/scripts/submit/createremotedir.sh
                 # create remote direcotry
+                if args.debug:
+                    print(f"Upload plots&coffea to eos: {wf}")
                 if not args.local:
                     os.system(f"mkdir -p {args.campaign}{args.version}/{wf}")
                     os.system(f"cp scripts/index.php {args.campaign}{args.version}/.")
@@ -172,5 +279,10 @@
                     )
             else:
                 raise Exception(
-                    f"No input coffea hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
+                    f"No input coffea hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea or hists_{wf}_MC_{args.campaign}_{args.year}_{wf}/hists_{wf}_MC_{args.campaign}_{args.year}_{wf}.coffea"
                 )
+    # revert prompt_dataMC lumimask
+    if args.campaign == "prompt_dataMC":
+        os.system(
+            f"sed -i 's/{input_lumi_json}/$PROMPT_DATAMC/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
+        )
diff --git a/src/BTVNanoCommissioning/helpers/definitions.py b/src/BTVNanoCommissioning/helpers/definitions.py
@@ -6382,14 +6382,12 @@ def axes_name(var):
         elif "UParT" in var:
             unit = unit + " UParTAK4"
         else:
-            unit = unit + " DeepCSV"
+            unit = unit
         # output node
         if "CvL" in var:
             unit = unit + " CvL"
         elif "CvB" in var:
             unit = unit + " CvB"
-        elif "CvNotB" in var:
-            unit = unit + " CvNotB"
         elif "B_b" in var or "ProbB" in var:
             unit = unit + " Prob(b)"
         elif "B_bb" in var: