diff --git a/params/trainings.yaml b/params/trainings.yaml index 1e6b0ef..237ad89 100644 --- a/params/trainings.yaml +++ b/params/trainings.yaml @@ -2,172 +2,172 @@ Models: 'BDT': '2L_high': '2016_PreVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2016_PostVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2017': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2018': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2022_preEE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' - '2022_post_EE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + '2022_postEE': + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2023_preBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2023_postBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_high_model_DY.txt' '2L_low': '2016_PreVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2016_PostVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2017': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2018': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2022_preEE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2022_postEE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2023_preBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2023_postBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_low_model_DY.txt' '2L': '2016_PreVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2016_PostVFP': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2017': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2018': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2022_preEE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2022_postEE': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2023_preBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '2023_postBPix': - model_file: 'MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_Hto2C_Zto2L_2022_postEE_model_DY.txt' '1L': '2016_PreVFP': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2016_PostVFP': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2017': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2018': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2022_preEE': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2022_postEE': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2023_preBPix': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '2023_postBPix': - model_file: 'MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/signalCombo_WminusH_HToCC_WToLNu_2017_model_QCD.txt' '0L': '2016_PreVFP': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2016_PostVFP': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2017': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2018': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2022_preEE': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2022_postEE': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2023_preBPix': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' '2023_postBPix': - model_file: 'MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' + model_file: '${config_dir:}/../MVA/BDT_models/ZH_HToCC_ZToNuNu_2017_model_QCD.txt' 'DNN': '2L_high': '2016_PreVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2016_PostVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2017': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2018': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2022_preEE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' - '2022_post_EE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + '2022_postEE': + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2023_preBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2023_postBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_high_dnn_model_DY.h5' '2L_low': '2016_PreVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2016_PostVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2017': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2018': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2022_preEE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' - '2022_PostEE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + '2022_postEE': + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2023_preBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2023_postBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_low_dnn_model_DY.h5' '2L': '2016_PreVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2016_PostVFP': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2017': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2018': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2022_preEE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' - '2022_PostEE': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + '2022_postEE': + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2023_preBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '2023_postBPix': - model_file: 'MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_Hto2C_Zto2L_2022_postEE_dnn_model.h5' '1L': '2016_PreVFP': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2016_PostVFP': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2017': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2018': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2022_preEE': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2022_postEE': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2023_preBPix': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '2023_postBPix': - model_file: 'MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/signalCombo_WminusH_HToCC_WToLNu_2017_dnn_model_QCD.h5' '0L': '2016_PreVFP': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2016_PostVFP': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2017': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2018': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2022_preEE': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2022_postEE': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2023_preBPix': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' '2023_postBPix': - model_file: 'MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' + model_file: '${config_dir:}/../MVA/DNN_models/ZH_HToCC_ZToNuNu_2017_dnn_model_QCD.h5' diff --git a/workflow_VHcc.py b/workflow_VHcc.py index bd8cc3f..9cc6c29 100644 --- a/workflow_VHcc.py +++ b/workflow_VHcc.py @@ -14,6 +14,8 @@ from keras.models import load_model import CommonSelectors from CommonSelectors import * +from pocket_coffea.utils.utils import dump_ak_array + from pocket_coffea.workflows.base import BaseProcessorABC from pocket_coffea.utils.configurator import Configurator @@ -39,16 +41,16 @@ def get_nu_4momentum(Lepton, MET): py = Lepton.pt * np.sin(Lepton.phi) pz = Lepton.pt * np.sinh(Lepton.eta) E = np.sqrt(Lepton.mass**2 + Lepton.pt**2 * np.cosh(Lepton.eta)**2) - + MET_px = MET.pt * np.cos(MET.phi) MET_py = MET.pt * np.sin(MET.phi) - + MisET2 = (MET_px**2 + MET_py**2) mu = (mW**2) / 2 + MET_px * px + MET_py * py a = (mu * pz) / (E**2 - pz**2) a2 = a**2 b = ((E**2) * (MisET2) - mu**2) / (E**2 - pz**2) - + condition = a2 - b >= 0 # Vectorized handling of conditions @@ -76,7 +78,7 @@ def get_nu_4momentum(Lepton, MET): class VHccBaseProcessor(BaseProcessorABC): def __init__(self, cfg: Configurator): super().__init__(cfg) - + self.proc_type = self.params["proc_type"] self.save_arrays = self.params["save_arrays"] self.run_dnn = self.params["run_dnn"] @@ -86,17 +88,17 @@ def __init__(self, cfg: Configurator): #self.dnn_model = load_model(self.params.DNN_model) #self.dnn_low_model = load_model(self.params.DNN_low) #self.dnn_high_model = load_model(self.params.DNN_high) - + # Define the prediction functions with @tf.function #self.predict_dnn = tf.function(self.model.predict, reduce_retracing=True) #self.predict_dnn_low = tf.function(self.model_low.predict, reduce_retracing=True) #self.predict_dnn_high = tf.function(self.model_high.predict, reduce_retracing=True) print("Processor initialized") - + def apply_object_preselection(self, variation): ''' - + ''' # Include the supercluster pseudorapidity variable electron_etaSC = self.events.Electron.eta + self.events.Electron.deltaEtaSC @@ -119,20 +121,17 @@ def apply_object_preselection(self, variation): self.events["ll"] = get_dilepton( self.events.ElectronGood, self.events.MuonGood ) - - + + self.events["JetGood"], self.jetGoodMask = jet_selection( self.events, "Jet", self.params, self._year, "LeptonGood" ) self.events['EventNr'] = self.events.event - print("Before preselection len(events_nr)", len(self.events.EventNr), self.events.EventNr) - print() - self.events["BJetGood"] = btagging( self.events["JetGood"], self.params.btagging.working_point[self._year], wp=self.params.object_preselection.bJetWP) - + def count_objects(self, variation): self.events["nMuonGood"] = ak.num(self.events.MuonGood) self.events["nElectronGood"] = ak.num(self.events.ElectronGood) @@ -156,15 +155,15 @@ def evaluateBDT(self, data): gc.collect() return bdt_score - + def evaluateseparateBDTs(self, data): data_df_low = data[data['dilep_pt'] < 150] data_df_high = data[data['dilep_pt'] >= 150] - + # Initialize empty arrays for scores bdt_score_low = np.array([]) bdt_score_high = np.array([]) - + # Read the model files model_low = lgb.Booster(model_file=self.params.Models.BDT[f'{self.channel}_low'][self.events.metadata["year"]].model_file) model_high = lgb.Booster(model_file=self.params.Models.BDT[f'{self.channel}_high'][self.events.metadata["year"]].model_file) @@ -173,23 +172,23 @@ def evaluateseparateBDTs(self, data): if not data_df_low.empty: #bdt_score_low = self.bdt_low_model.predict(data_df_low) bdt_score_low = model_low.predict(data_df_low) - + # Predict only if data_df_high is non-empty if not data_df_high.empty: #bdt_score_high = self.bdt_high_model.predict(data_df_high) bdt_score_high = model_high.predict(data_df_high) - + # Concatenate the scores from low and high dataframes bdt_score = np.concatenate((bdt_score_low, bdt_score_high), axis=0) - + # Release memory del model_low, model_high gc.collect() return bdt_score - + def evaluateDNN(self, data): - + print("Evaluating DNN...") print(self.params.Models.DNN[self.channel][self.events.metadata["year"]].model_file) print() @@ -203,16 +202,16 @@ def evaluateDNN(self, data): gc.collect() print("DNN evaluation completed.") return dnn_score - + def evaluateseparateDNNs(self, data): - + data_df_low = data[data['dilep_pt'] < 150] data_df_high = data[data['dilep_pt'] >= 150] - + # Initialize empty arrays for scores dnn_score_low = np.array([]) dnn_score_high = np.array([]) - + # Read the model file model_low = load_model(self.params.Models.DNN[f'{self.channel}_low'][self.events.metadata["year"]].model_file) model_high = load_model(self.params.Models.DNN[f'{self.channel}_high'][self.events.metadata["year"]].model_file) @@ -227,7 +226,7 @@ def evaluateseparateDNNs(self, data): del model_low gc.collect() print("Prediction for low dilep_pt completed.") - + # Predict only if data_df_high is non-empty if not data_df_high.empty: print("Predicting for high dilep_pt...") @@ -238,15 +237,15 @@ def evaluateseparateDNNs(self, data): del model_high gc.collect() print("Prediction for high dilep_pt completed.") - - - + + + dnn_score = np.concatenate((dnn_score_low, dnn_score_high), axis=0) - + print("Separate DNN evaluation completed.") - + return dnn_score - + # Function that defines common variables employed in analyses and save them as attributes of `events` def define_common_variables_before_presel(self, variation): self.events["JetGood_Ht"] = ak.sum(abs(self.events.JetGood.pt), axis=1) @@ -279,10 +278,6 @@ def define_common_variables_after_presel(self, variation): raise NotImplementedError(f"This tagger is not implemented: {self.myJetTagger}") #self.events["dijet_pt"] = self.events.dijet.pt odd_event_mask = (self.events.EventNr % 2 == 1) - print(odd_event_mask, len (odd_event_mask)) - print() - - print("True enties in the mask", len(odd_event_mask[odd_event_mask])) if self.proc_type=="ZLL": @@ -304,14 +299,14 @@ def define_common_variables_after_presel(self, variation): self.events["dilep_dr"] = self.events.ll.deltaR self.events["dilep_deltaPhi"] = self.events.ll.deltaPhi self.events["dilep_deltaEta"] = self.events.ll.deltaEta - + self.events["ZH_pt_ratio"] = self.events.dijet_csort.pt/self.events.ll.pt self.events["ZH_deltaPhi"] = np.abs(self.events.ll.delta_phi(self.events.dijet_csort)) # why cant't we use delta_phi function here? self.angle21_gen = (abs(self.events.ll.l2phi - self.events.dijet_csort.j1Phi) < np.pi) self.angle22_gen = (abs(self.events.ll.l2phi - self.events.dijet_csort.j2Phi) < np.pi) - self.events["deltaPhi_l2_j1"] = ak.where(self.angle21_gen, abs(self.events.ll.l2phi - self.events.dijet_csort.j1Phi), 2*np.pi - abs(self.events.ll.l2phi - self.events.dijet_csort.j1Phi)) + self.events["deltaPhi_l2_j1"] = ak.where(self.angle21_gen, abs(self.events.ll.l2phi - self.events.dijet_csort.j1Phi), 2*np.pi - abs(self.events.ll.l2phi - self.events.dijet_csort.j1Phi)) self.events["deltaPhi_l2_j2"] = ak.where(self.angle22_gen, abs(self.events.ll.l2phi - self.events.dijet_csort.j2Phi), 2*np.pi - abs(self.events.ll.l2phi - self.events.dijet_csort.j2Phi)) self.events["deltaPhi_l2_j1"] = np.abs(delta_phi(self.events.ll.l2phi, self.events.dijet_csort.j1Phi)) @@ -343,7 +338,7 @@ def define_common_variables_after_presel(self, variation): "deltaPhi_l2_j1": odd_events["deltaPhi_l2_j1"], "deltaPhi_l2_j2": odd_events["deltaPhi_l2_j2"], }) - + df = ak.to_pandas(variables_to_process) columns_to_exclude = ['dilep_m'] df = df.drop(columns=columns_to_exclude, errors='ignore') @@ -378,7 +373,7 @@ def define_common_variables_after_presel(self, variation): mask = ((self.events.nJetGood >= 2) & (self.events.nLeptonGood>=2)) & (self.events.ll.pt > 60) & (self.events.ll.mass > 75) & (self.events.ll.mass < 115) & ((self.events.nJetGood >= 2) & (self.events.dijet_csort.mass > 75) & (self.events.dijet_csort.mass < 200)) & ((self.events.JetsCvsL.btagDeepFlavCvL[:,0]>0.2) & (self.events.JetsCvsL.btagDeepFlavCvB[:,0]>0.4)) selection_ZLL = ak.where(ak.is_none(mask), False, mask) - + if self.proc_type=="WLNu": self.events["MET_used"] = ak.zip({ "pt": self.events.MET.pt, @@ -418,10 +413,10 @@ def define_common_variables_after_presel(self, variation): self.events["dijet_CvsB_min"] = self.events.dijet_csort.j2CvsB self.events["dijet_pt_max"] = self.events.dijet_csort.j1pt self.events["dijet_pt_min"] = self.events.dijet_csort.j2pt - + self.events["deltaPhi_jet1_MET"] = np.abs(self.events.MET.delta_phi(self.events.JetGood[:,0])) self.events["deltaPhi_jet2_MET"] = np.abs(self.events.MET.delta_phi(self.events.JetGood[:,1])) - + self.events["WH_deltaPhi"] = np.abs(self.events.W_candidate.delta_phi(self.events.dijet_csort)) self.events["deltaPhi_l1_j1"] = np.abs(delta_phi(self.events.lead_lep.phi, self.events.dijet_csort.j1Phi)) self.events["deltaPhi_l1_MET"] = np.abs(delta_phi(self.events.lead_lep.phi, self.events.MET_used.phi)) @@ -466,13 +461,6 @@ def define_common_variables_after_presel(self, variation): df = ak.to_pandas(variables_to_process) # Remove the 'subentry' column df = df.reset_index(level='subentry', drop=True) - - - print(df) - print() - - print(len(df)) - print() # Ensure the DataFrame has a simple index if isinstance(df.index, pd.MultiIndex): @@ -513,7 +501,7 @@ def define_common_variables_after_presel(self, variation): },with_name="PtEtaPhiMCandidate") self.events["Z_candidate"] = self.events.MET_used self.events["Z_pt"] = self.events.Z_candidate.pt - + self.events["dijet_m"] = self.events.dijet_csort.mass self.events["dijet_pt"] = self.events.dijet_csort.pt self.events["dijet_dr"] = self.events.dijet_csort.deltaR @@ -525,7 +513,7 @@ def define_common_variables_after_presel(self, variation): self.events["dijet_CvsB_min"] = self.events.dijet_csort.j2CvsB self.events["dijet_pt_max"] = self.events.dijet_csort.j1pt self.events["dijet_pt_min"] = self.events.dijet_csort.j2pt - + self.events["ZH_pt_ratio"] = self.events.dijet_csort.pt/self.events.Z_candidate.pt self.events["ZH_deltaPhi"] = np.abs(self.events.Z_candidate.delta_phi(self.events.dijet_csort)) self.events["deltaPhi_jet1_MET"] = np.abs(self.events.MET.delta_phi(self.events.JetGood[:,0])) @@ -549,7 +537,7 @@ def define_common_variables_after_presel(self, variation): "ZH_deltaPhi": odd_events["ZH_deltaPhi"], "Z_pt": odd_events["Z_pt"], }) - + df = ak.to_pandas(variables_to_process) #columns_to_exclude = [] #df = df.drop(columns=columns_to_exclude, errors='ignore') @@ -567,6 +555,8 @@ def define_common_variables_after_presel(self, variation): self.events["DNN"] = np.zeros_like(self.events["BDT"]) mask = ((self.events.nJetGood >= 2) & (self.events.dijet_csort.pt > 120)) & ( (self.events.deltaPhi_jet1_MET > 0.6) & (self.events.deltaPhi_jet2_MET > 0.6)) & ((self.events.JetsCvsL.btagDeepFlavCvL[:,0]>0.2) & (self.events.JetsCvsL.btagDeepFlavCvB[:,0]>0.4)) & ((self.events.nJetGood >= 2) & (self.events.dijet_csort.mass > 75) & (self.events.dijet_csort.mass < 200)) selection_ZNuNu = ak.where(ak.is_none(mask), False, mask) + + @@ -619,7 +609,7 @@ def define_common_variables_after_presel(self, variation): "dilep_dr": SR_Data["dilep_dr"], "dilep_deltaPhi": SR_Data["dilep_deltaPhi"], "dilep_deltaEta": SR_Data["dilep_deltaEta"], - + "dijet_m": SR_Data["dijet_m"], "dijet_pt": SR_Data["dijet_pt"], "dijet_dr": SR_Data["dijet_dr"], @@ -631,13 +621,13 @@ def define_common_variables_after_presel(self, variation): "dijet_CvsB_min": SR_Data["dijet_CvsB_min"], "dijet_pt_max": SR_Data["dijet_pt_max"], "dijet_pt_min": SR_Data["dijet_pt_min"], - + "ZH_pt_ratio": SR_Data["ZH_pt_ratio"], "ZH_deltaPhi": SR_Data["ZH_deltaPhi"], "deltaPhi_l2_j1": SR_Data["deltaPhi_l2_j1"], "deltaPhi_l2_j2": SR_Data["deltaPhi_l2_j2"], }) - + if self.proc_type=="ZNuNu": SR_Data = self.events[~odd_event_mask & selection_ZNuNu] variables_to_save = ak.zip({ @@ -661,15 +651,15 @@ def define_common_variables_after_presel(self, variation): with warnings.catch_warnings(): # Suppress FutureWarning warnings.filterwarnings("ignore", category=FutureWarning) - + # Check if the directory exists - if not os.path.exists(f"Saved_root_files_{self.proc_type}/{self.events.metadata['dataset']}"): - # If not, create it - os.system(f"mkdir -p Saved_root_files_{self.proc_type}/{self.events.metadata['dataset']}") + # Define the directory path and subdirectories + base_dir = f"Saved_root_files_{self.proc_type}" + dataset_dir = self.events.metadata['dataset'] + subdirs = [dataset_dir] - # Write the events to a ROOT file - #with uproot.recreate(f"Saved_root_files/{self.events.metadata['dataset']}/{self.events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(self.events.metadata['entrystart'])}_{int(self.events.metadata['entrystop'])}.root") as f: - # f["variables"] = ak.to_pandas(variables_to_save) + # Define the file name + file_name = f"{self.events.metadata['filename'].split('/')[-1].replace('.root', '')}_{int(self.events.metadata['entrystart'])}_{int(self.events.metadata['entrystop'])}_vars.parquet" - # Write the events to a Parquet file - ak.to_pandas(variables_to_save).to_parquet(f"Saved_root_files_{self.proc_type}/{self.events.metadata['dataset']}/{self.events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(self.events.metadata['entrystart'])}_{int(self.events.metadata['entrystop'])}_vars.parquet") + # Use dump_ak_array to save the array with subdirectories + dump_ak_array(variables_to_save, file_name, base_dir, subdirs) \ No newline at end of file