Skip to content

Commit

Permalink
fix: restructure files
Browse files Browse the repository at this point in the history
- scripts: add debug info
- wf: adapt campaign and years
- utils, helper: clean up JEC
  • Loading branch information
Ming-Yan committed Nov 5, 2024
1 parent 63a3025 commit 526f8be
Show file tree
Hide file tree
Showing 21 changed files with 386 additions and 454 deletions.
29 changes: 29 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

# This file is a template, and might need editing before it works on your project.
# This is a sample GitLab CI/CD configuration file that should run without any modifications.
# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts,
# it uses echo commands to simulate the pipeline execution.
#
# A pipeline is composed of independent jobs that run scripts, grouped into stages.
# Stages run in sequential order, but jobs within stages run in parallel.
#
# For more information, see: https://docs.gitlab.com/ee/ci/yaml/index.html#stages
#
# You can copy and paste this template into a new `.gitlab-ci.yml` file.
# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword.
#
# To contribute improvements to CI/CD templates, please follow the Development guide at:
# https://docs.gitlab.com/ee/development/cicd/templates.html
# This specific template is located at:
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml

stages: # List of stages for jobs, and their order of execution
- deploy

deploy-job: # This job runs in the deploy stage.
stage: deploy # It only runs when *both* jobs in the test stage complete successfully.
script:
- 'curl --fail --request POST --form token=$MY_TRIGGER_TOKEN --form ref=master "https://gitlab.cern.ch/cms-analysis/btv/software-and-algorithms/autobtv/trigger/pipeline"'
rules:
- if: $CI_COMMIT_TAG
environment: production
4 changes: 2 additions & 2 deletions scripts/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
parser.add_argument(
"--whitelist_sites",
help="White list fot sites",
default="T2_DE_DESY,T2_DE_RWTH,T2_CH_CERN",
default=None,
)
parser.add_argument(
"--blacklist_sites",
Expand Down Expand Up @@ -197,7 +197,7 @@ def getFilesFromDas(args):
.read()
.split("\n")
)

print("Number of files: ", len(flist))
import json

dataset = dataset[:-1] if "\n" in dataset else dataset
Expand Down
126 changes: 119 additions & 7 deletions scripts/suball.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os, argparse
from BTVNanoCommissioning.workflows import workflows
from BTVNanoCommissioning.utils.sample import predefined_sample
from BTVNanoCommissioning.utils.AK4_parameters import correction_config
import os, sys, inspect

current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
Expand All @@ -10,6 +11,42 @@
from runner import config_parser, scaleout_parser, debug_parser


# Get lumi
def get_lumi_from_web(year):
import requests
import re

year = str(year)
# Define the URL of the directory
url = (
f"https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions{year[2:]}/"
)

# Send a request to fetch the HTML content of the webpage
response = requests.get(url)
html_content = response.text

# Use regex to find all href links that contain 'Golden.json' but do not contain 'era'
# Ensures it only captures the URL part within href="..." and not any other content.
goldenjson_files = re.findall(r'href="([^"]*Golden\.json[^"]*)"', html_content)

# Filter out any matches that contain 'era' in the filename
goldenjson_files = [file for file in goldenjson_files if "era" not in file]

# If there are any such files, find the latest one (assuming the files are sorted lexicographically)
if goldenjson_files:
latest_file = sorted(goldenjson_files)[
-1
] # Assuming lexicographical sorting works for the dates
os.system(f"wget {url}/{latest_file}")
os.system(f"mv {latest_file} src/BTVNanoCommissioning/data/lumiMasks/.")
return latest_file
else:
raise (
f"No files for Year{year} containing 'Golden.json' (excluding 'era') were found."
)


### Manage workflow in one script
# EXAMPLE: python scripts/suball.py --scheme default_comissioning --campaign Summer23 --DAS_campaign "*Run2023D*Sep2023*,*Run3Summer23BPixNanoAODv12-130X*" --year 2023
# prerequest a new campaign should create a entry in AK4_parameters.py
Expand All @@ -26,7 +63,7 @@
parser.add_argument(
"-sc",
"--scheme",
default="CAMPAIGN_prompt_dataMC",
default="Validation",
choices=list(workflows.keys()) + ["Validation", "SF", "default_comissioning"],
help="Choose the function for dump luminosity(`lumi`)/failed files(`failed`) into json",
)
Expand All @@ -43,6 +80,11 @@
action="store_true",
help="not transfered to https://btvweb.web.cern.ch/Commissioning/dataMC/",
)
parser.add_argument(
"--debug",
action="store_true",
help="Run local debug test with small set of dataset with iterative executor",
)

args = parser.parse_args()
# summarize diffeerent group for study
Expand All @@ -64,8 +106,26 @@
if args.scheme in workflows.keys():
workflow_group["test"] = [args.scheme]
args.scheme = "test"
# Check lumiMask exists and replace the Validation
input_lumi_json = correction_config[args.campaign]["lumiMask"]
if args.campaign != "prompt_dataMC" and not os.path.exists(
f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json}"
):
raise f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json} not exist"

if (
args.campaign == "prompt_dataMC"
and correction_config[args.campaign]["lumiMask"] == "$PROMPT_DATAMC"
):
input_lumi_json = get_lumi_from_web(args.year)
os.system(
f"sed -i 's/$PROMPT_DATAMC/{input_lumi_json}/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
)
print(f"======>{input_lumi_json} is used for {args.year}")

for wf in workflow_group[args.scheme]:
if args.debug:
print(f"======{wf} in {args.scheme}=====")
overwrite = "--overwrite" if args.overwrite else ""
## creating dataset
if (
Expand All @@ -74,19 +134,37 @@
)
or args.overwrite
):
if args.debug:
print(
f"Creating MC dataset: python scripts/fetch.py -c {args.campaign} --from_workflow {wf} --DAS_campaign {args.DAS_campaign} --year {args.year} {overwrite} --skipvalidation"
)

os.system(
f"python scripts/fetch.py -c {args.campaign} --from_workflow {wf} --DAS_campaign {args.DAS_campaign} --year {args.year} {overwrite} --skipvalidation"
)
if args.debug:
os.system(f"ls metadata/{args.campaign}/*.json")

## Run the workflows
for types in predefined_sample[wf].keys():

if (types != "data" or types != "MC") and args.scheme == "Validation":
continue
print(
f"hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}/hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}.coffea"
)
if (
not os.path.exists(
f"hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}/hists_{wf}_{types}_{args.campaign}_{args.year}_{wf}.coffea"
)
or args.overwrite
):
if not os.path.exists(
f"metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json"
):
raise Exception(
f"metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json not exist"
)
runner_config_required = f"python runner.py --wf {wf} --json metadata/{args.campaign}/{types}_{args.campaign}_{args.year}_{wf}.json {overwrite} --campaign {args.campaign} --year {args.year}"
runner_config = ""
for key, value in vars(args).items():
Expand All @@ -100,6 +178,7 @@
"DAS_campaign",
"version",
"local",
"debug",
]:
continue
if key in [
Expand All @@ -112,25 +191,48 @@
if value == True:
runner_config += f" --{key}"
elif value is not None:
if "Validation" == args.scheme and types == "MC":
if (
"Validation" == args.scheme
and types == "MC"
and "limit" not in key
):
runner_config += " --limit 50"

else:
runner_config += f" --{key}={value}"
runner_config = runner_config_required + runner_config
print(runner_config)
os.system(runner_config)
if args.debug:
print(f"run the workflow: {runner_config}")
with open(
f"config_{args.year}_{args.campaign}_{args.scheme}_{args.version}.txt",
"w",
) as config_list:
config_list.write(runner_config)

os.system(runner_config)
if args.debug:
print(f"workflow is finished for {wf}!")
# Get luminosity
if (
os.path.exists(
f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
)
or args.overwrite
):
if args.debug:
print(
f"Get the luminosity from hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
)
if not os.path.exists(
f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea "
):
raise Exception(
f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea not exist"
)
lumi = os.popen(
f"python scripts/dump_processed.py -t all -c hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea --json metadata/{args.campaign}/data_{args.campaign}_{args.year}_{wf}.json -n {args.campaign}_{args.year}_{wf}"
).read()

print(lumi)
lumi = int(
round(
float(
Expand All @@ -145,14 +247,19 @@
)
if os.path.exists(
f"hists_{wf}_MC_{args.campaign}_{args.year}_{wf}/hists_{wf}_MC_{args.campaign}_{args.year}_{wf}.coffea"
) and os.path.exists(
f"hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
):
print(lumi)
if args.debug:
print(f"Plot the dataMC for {wf}")
os.system(
f'python scripts/plotdataMC.py -i "hists_{wf}_*_{args.campaign}_{args.year}_{wf}/hists_{wf}_*_{args.campaign}_{args.year}_{wf}.coffea" --lumi {lumi} -p {wf} -v all --ext {args.campaign}_{args.year}{args.version}'
)
## Inspired from Uttiya, create remote directory
# https://github.com/cms-btv-pog/BTVNanoCommissioning/blob/14e654feeb4b4d738ee43ab913efb343ea65fd1d/scripts/submit/createremotedir.sh
# create remote direcotry
if args.debug:
print(f"Upload plots&coffea to eos: {wf}")
if not args.local:
os.system(f"mkdir -p {args.campaign}{args.version}/{wf}")
os.system(f"cp scripts/index.php {args.campaign}{args.version}/.")
Expand All @@ -172,5 +279,10 @@
)
else:
raise Exception(
f"No input coffea hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
f"No input coffea hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea or hists_{wf}_MC_{args.campaign}_{args.year}_{wf}/hists_{wf}_MC_{args.campaign}_{args.year}_{wf}.coffea"
)
# revert prompt_dataMC lumimask
if args.campaign == "prompt_dataMC":
os.system(
f"sed -i 's/{input_lumi_json}/$PROMPT_DATAMC/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
)
4 changes: 1 addition & 3 deletions src/BTVNanoCommissioning/helpers/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6382,14 +6382,12 @@ def axes_name(var):
elif "UParT" in var:
unit = unit + " UParTAK4"
else:
unit = unit + " DeepCSV"
unit = unit
# output node
if "CvL" in var:
unit = unit + " CvL"
elif "CvB" in var:
unit = unit + " CvB"
elif "CvNotB" in var:
unit = unit + " CvNotB"
elif "B_b" in var or "ProbB" in var:
unit = unit + " Prob(b)"
elif "B_bb" in var:
Expand Down
Loading

0 comments on commit 526f8be

Please sign in to comment.