From 486e6c86f30eb735f4939bf80cbbe8341895e072 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 9 Dec 2024 17:03:40 +0000 Subject: [PATCH 01/16] New implementation of CMS_WCHARM_13TEV_WPWM-TOT-UNNORM --- ...=> kinematics_legacy_WPWM-TOT-UNNORM.yaml} | 0 .../commondata/CMS_WCHARM_13TEV/metadata.yaml | 48 +++++++++---------- ...inties_legacy_WPWM-TOT-UNNORM_sys_10.yaml} | 0 3 files changed, 22 insertions(+), 26 deletions(-) rename nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/{kinematics_WPWM-TOT-UNNORM.yaml => kinematics_legacy_WPWM-TOT-UNNORM.yaml} (100%) rename nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/{uncertainties_WPWM-TOT-UNNORM_sys_10.yaml => uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml} (100%) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_legacy_WPWM-TOT-UNNORM.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_legacy_WPWM-TOT-UNNORM.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml index 0999feed3c..139ac71f76 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml @@ -1,51 +1,47 @@ setname: CMS_WCHARM_13TEV -version: 1 -version_comment: Port of old commondata + nnpdf_metadata: nnpdf31_process: DY CC experiment: CMS + arXiv: url: https://arxiv.org/abs/1811.10021 journal: Eur. Phys. J. C 79 (2019) 269 iNSPIRE: - url: '' + url: https://inspirehep.net/literature/1705068 hepdata: - url: '' - version: -1 + url: https://www.hepdata.net/record/ins1705068 + version: 1 + +version: 2 +version_comment: Implementation in the new format + implemented_observables: - observable_name: WPWM-TOT-UNNORM observable: description: Jet Rapidity Distribution label: CMS $W+c$ total 13 TeV - units: '' - process_type: EWJ_RAP - tables: [] - npoints: [] + units: '[fb]' + process_type: DY_W_ETA + tables: [1] ndata: 5 plotting: - kinematics_override: ewj_rap_sqrt_scale + kinematics_override: identity dataset_label: CMS $W+c$ total 13 TeV y_label: $d\sigma(W+c)/d|\eta^\mu|$ (fb) x_label: $|\eta^\mu|$ - plot_x: k1 - kinematic_coverage: - - k1 - - k2 - - k3 + plot_x: abs_eta + kinematic_coverage: [abs_eta, m_W2] kinematics: variables: - k1: - description: Variable k1 - label: k1 + abs_eta: + description: Absolute pseudo-rapidity of the Z boson + label: $|\eta|$ units: '' k2: - description: Variable k2 - label: k2 - units: '' - k3: - description: Variable k3 - label: k3 - units: '' + description: Mass of the W boson squared + label: $m_W^2$ + units: GeV$^{2}$ file: kinematics_WPWM-TOT-UNNORM.yaml theory: conversion_factor: 1.0 @@ -60,6 +56,6 @@ implemented_observables: - uncertainties_legacy_WPWM-TOT-UNNORM.yaml legacy_10: data_uncertainties: - - uncertainties_WPWM-TOT-UNNORM_sys_10.yaml + - uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml data_central: data_legacy_WPWM-TOT-UNNORM.yaml ported_from: CMS_WCHARM_DIFF_UNNORM_13TEV diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml similarity index 100% rename from nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml rename to nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml From f19ed6916ced1e74b7fc91cf46915f85a6c01a18 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 9 Dec 2024 18:56:13 +0000 Subject: [PATCH 02/16] Implementation in the new format - WIP --- .../data_WPWM-TOT-UNNORM.yaml | 6 + .../commondata/CMS_WCHARM_13TEV/filter.py | 17 + .../CMS_WCHARM_13TEV/filter_utils.py | 313 ++++++++++++++++++ .../kinematics_WPWM-TOT-UNNORM.yaml | 41 +++ .../commondata/CMS_WCHARM_13TEV/metadata.yaml | 9 +- .../rawdata/WplusCharm_AbsEtaMu_Table.yaml | 42 +++ .../CMS_WCHARM_13TEV/sys_uncertainties.py | 143 ++++++++ .../uncertainties_WPWM-TOT-UNNORM.yaml | 123 +++++++ .../uncertainties_WPWM-TOT-UNNORM_sys_10.yaml | 132 ++++++++ 9 files changed, 823 insertions(+), 3 deletions(-) create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/rawdata/WplusCharm_AbsEtaMu_Table.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml create mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml new file mode 100644 index 0000000000..f462c3da21 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml @@ -0,0 +1,6 @@ +data_central: +- 5.68699350e+05 +- 4.66912350e+05 +- 4.79309450e+05 +- 3.95159750e+05 +- 282824.0 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py new file mode 100644 index 0000000000..f7d00a0e76 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py @@ -0,0 +1,17 @@ +''' +Filter script for CMS_WCHARM_13TEV +''' + +import logging + +from filter_utils import Extractor +import numpy as np +import yaml + +logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') + + +if __name__ == "__main__": + CMS_WCHARM = Extractor("./metadata.yaml", "WPWM-TOT-UNNORM", mult_factor=1000) + _, _, _ = CMS_WCHARM.generate_data(variant='default', save_to_yaml=True) + _, _, _ = CMS_WCHARM.generate_data(variant='sys_10', save_to_yaml=True) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py new file mode 100644 index 0000000000..4d249e661d --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -0,0 +1,313 @@ +import logging +import os + +import numpy as np +import pandas as pd +from sys_uncertainties import SYS_DEFINITIONS, SYS_UNC_BY_BIN +import yaml + +from nnpdf_data.filter_utils.utils import prettify_float, symmetrize_errors + +yaml.add_representer(float, prettify_float) + +SQRTS = 8000 +MW2 = 80.385**2 +CMSLUMI13 = 2.5 # % + +# List of systematic uncertainties that shuold +# be considered uncorrelated +UNCORR_SYS_UNC = ['UnfoldMCstat', 'UnfoldOtherGen', 'UnfoldReweight'] +ART_LABEL = 'art_corr_unc' +STAT_LABEL = 'stat_uncorr_unc' +TABLE = '' + +# From Table 1 of the paper +SYS_UNC_by_bin = [{}] + + +class Extractor: + """ + Extracts kinematics, central data, and uncertainties for a given dataset + + Parameters + ---------- + metadata_file: str + Path to the metadata file + observable: str + The name of the observable for which the data is extracted. The name must + be listed in the metadata file. + """ + + def __init__(self, metadata_file, observable, mult_factor=1): + + # Open metadata and select process + with open(metadata_file, 'r') as file: + metadata = yaml.safe_load(file) + self.metadata = next( + ( + md + for md in metadata["implemented_observables"] + if md['observable_name'] == observable + ), + None, + ) + if self.metadata is None: + raise Exception(f"{observable} is not listed in the metadata file.") + + # Initialise dict of tables + self.tables = {} + self.observable = observable + self.mult_factor = mult_factor + self.kin_labels = self.metadata['kinematic_coverage'] + self.ndata = self.metadata['ndata'] + + # Collect diagonal absoulute uncertainties + # self.diag_unc = self.__collect_diag_unc() + # self.unc_labels = list(self.diag_unc[0].keys()) + # self.unc_labels.pop(0) + + def __retrieve_table(self, table_id): + """ + Implementation of the lazy loading for the tables. If the table + is loaded for the first time, it is stored into an internal + container of the class, so that it will not be loaded each time. + + When called, this functions checks if the table has already been stored + and, if that is the case, returns the stored table. + + Parameters + ---------- + table_id: int + Index that specifies the table + + Return + ------ + The table specified by `table_id`. If not previously loaded, it is also + stored into the internal container for future use. + """ + try: + table = self.tables[str(table_id)] + except KeyError: + logging.debug( + f'Table {table_id} has not already been used or stored.' f' Storing the table...' + ) + with open(f'./rawdata/{TABLE}{table_id}.yaml', 'r') as tab: + tab_dict = yaml.safe_load(tab) + self.tables[str(table_id)] = tab_dict + table = tab_dict + return table + + def __extract_kinematics(self, table: dict): + """ + Extracts the kinematic variables of the single differential + distribution given a table. + + For each bin, it computes the max, min, and mid value of the transverse + momentum of the boson. + + Parameters + ---------- + table: dict + Dictionary containing the bins in the transverse momentum + tab_number: int + Index to select the range of the second kinematic variable + + Return + ------ + List of bins containing min, max, and mid values for each of the kinematic + observables listed in the `kinematic_coverage` of the metadata file. + + """ + data = table['independent_variables'][0] + label = self.kin_labels + kinematics = [] + for bin in data['values']: + abs_eta_min = bin['low'] + abs_eta_max = bin['high'] + kin_bin = { + label[0]: { + 'min': abs_eta_min, + 'mid': (abs_eta_max + abs_eta_min) / 2, + 'max': abs_eta_max, + }, + label[1]: {'min': None, 'mid': MW2, 'max': None}, + } + kinematics.append(kin_bin) + return kinematics + + def generate_kinematics(self): + """ + Function that generates the kinematics by looping over all the + tables specified in the metadata file. The resulting kinematics + is then saved to a yaml file. It relies on the method + `__extract_kinematics`. + """ + + logging.info(f"Generating kinematics for ATLAS_{self.observable}...") + + # Initialise kinematics list + kinematics = [] + ndata = 0 + table = self.metadata["tables"][0] + tab_dict = self.__retrieve_table(table) + kin = self.__extract_kinematics(tab_dict) + kinematics = np.concatenate([kinematics, kin]) + ndata += len(kin) + + # Check number of data agrees with metadata + try: + assert self.metadata['ndata'] is not None + assert self.metadata['ndata'] == ndata + except AssertionError as e: + logging.warning( + f"The number of data in the metafile is either wrong or unspecified." + f" The correct number is {ndata}. Please, update the metafile." + ) + return + return kinematics.tolist() + + def generate_data_and_unc(self, mult_factor=1.0): + """ + Same as `generate_kinematics`, but for central data points. + """ + logging.info(f"Generating central data for CMS_{self.observable}...") + dat_central = [] + stat_unc = [] + asy_sys_unc = [] + table = self.metadata['tables'][0] + tab_dict = self.__retrieve_table(table) + tab_dict = tab_dict['dependent_variables'][0]['values'] + + # Loop over bins + for rap_bin in tab_dict: + dat_central.append(rap_bin['value'] * mult_factor) + stat_unc.append(rap_bin['errors'][0]['symerror'] * mult_factor) + asy_sys_unc.append( + { + key: value * mult_factor + for key, value in rap_bin['errors'][1]['asymerror'].items() + } + ) + return dat_central, stat_unc, asy_sys_unc + + def symmetrized_sys_unc(self): + """Symmetrise systematic uncertainties. Returns the symmetrized uncertainty + and the shift to the central data + """ + symmetrized_uncs = [] + for bin in SYS_UNC_BY_BIN: + unc_dict = {} + for source in bin: + if 'asyserror' in source.keys(): + error = source['asyserror'] + plus = error['high'] + minus = error['low'] + data_delta, sym_error = symmetrize_errors(plus, minus) + unc_dict[source['label']] = {'shift': data_delta, 'sym_error': sym_error} + elif 'syserror' in source.keys(): + unc_dict[source['label']] = {'shift': 0.0, 'sym_error': source['syserror']} + symmetrized_uncs.append(unc_dict) + return symmetrized_uncs + + def __build_unc_definitions(self, variant='default'): + unc_definitions = {} + + # Statistical uncertainty + unc_definitions[STAT_LABEL] = { + 'description': f'Statistical uncertainty', + 'treatment': 'ADD', + 'type': 'UNCORR', + } + + # Add lumi uncertainty + unc_definitions['corr_lumi_unc'] = { + 'description': f'Luminosity uncertainty 2.6%', + 'treatment': 'MULT', + 'type': 'CMSLUMI13', + } + + # Add systematic uncertainty + unc_definitions = unc_definitions | SYS_DEFINITIONS + + if variant == 'sys_10': + unc_definitions['uncorr_mc_unc'] = { + 'description': f'MC uncertainty', + 'treatment': 'MULT', + 'type': 'UNCORR', + } + elif variant != 'default': + raise ValueError(f'The variant {variant} is not implemented yet.') + + return unc_definitions + + def generate_data(self, variant='default', save_to_yaml=False, path='./'): + # Get central data and kinematics + central_data, stat_unc, _ = self.generate_data_and_unc(self.mult_factor) + kinematics = self.generate_kinematics() + + # Uncertainty definitions + unc_definitions = self.__build_unc_definitions(variant=variant) + + sys_artificial = [] # Initialize vector of artificial uncertainties + + symmetrized_sys_uncs = self.symmetrized_sys_unc() + for data_idx, data in enumerate(central_data): + shift = 0 + sys_unc_bin = symmetrized_sys_uncs[data_idx] + + # Add shift from symmetrization + tmp = {} + for key, value in sys_unc_bin.items(): + shift += value['shift'] + tmp[key] = value['sym_error'] + + # Shift central data + central_data[data_idx] = central_data[data_idx] + shift + + # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} + + # Lumi uncertainty + unc_dict = {'corr_lumi_unc': central_data[data_idx] * CMSLUMI13 * 0.01} + + # Add systematic uncertainties + unc_dict = unc_dict | tmp + + if variant == 'sys_10': + unc_dict['uncorr_mc_unc'] = central_data[data_idx] * 0.01 + + sys_artificial.append(unc_dict) + + if save_to_yaml: + # Save kinematics into file + logging.info("Dumping kinematics to file...") + kinematics_yaml = {'bins': kinematics} + with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: + yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + logging.info("Done!") + + # Save central data into file + logging.info("Dumping kinematics to file...") + dat_central_yaml = {'data_central': central_data} + file_name = self.metadata['data_central'] + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + + # Save unertainties + logging.info("Dumping kinematics to file...") + uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} + file_name = ( + self.metadata['data_uncertainties'][0] + if variant == 'default' + else self.metadata['variants'][variant]['data_uncertainties'][0] + ) + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + return kinematics, central_data, sys_artificial + else: + return kinematics, central_data, sys_artificial + + def get_table(self, table_id): + return self.__retrieve_table(table_id) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml new file mode 100644 index 0000000000..40784420b6 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/kinematics_WPWM-TOT-UNNORM.yaml @@ -0,0 +1,41 @@ +bins: +- abs_eta: + min: 0.0 + mid: 0.2 + max: 0.4 + m_W2: + min: null + mid: 6.46174823e+03 + max: null +- abs_eta: + min: 0.4 + mid: 6.00000000e-01 + max: 0.8 + m_W2: + min: null + mid: 6.46174823e+03 + max: null +- abs_eta: + min: 0.8 + mid: 1.05 + max: 1.3 + m_W2: + min: null + mid: 6.46174823e+03 + max: null +- abs_eta: + min: 1.3 + mid: 1.55 + max: 1.8 + m_W2: + min: null + mid: 6.46174823e+03 + max: null +- abs_eta: + min: 1.8 + mid: 2.1 + max: 2.4 + m_W2: + min: null + mid: 6.46174823e+03 + max: null diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml index 139ac71f76..68235e7601 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml @@ -23,7 +23,7 @@ implemented_observables: label: CMS $W+c$ total 13 TeV units: '[fb]' process_type: DY_W_ETA - tables: [1] + tables: ['WplusCharm_AbsEtaMu_Table'] ndata: 5 plotting: kinematics_override: identity @@ -43,19 +43,22 @@ implemented_observables: label: $m_W^2$ units: GeV$^{2}$ file: kinematics_WPWM-TOT-UNNORM.yaml + data_central: data_WPWM-TOT-UNNORM.yaml + data_uncertainties: [uncertainties_WPWM-TOT-UNNORM.yaml] theory: conversion_factor: 1.0 operation: add FK_tables: - - CMS_WCHARM_DIFF_UNNORM_13TEV-CMS_WCHARM_13TEV_WPCB_leptrap - - CMS_WCHARM_DIFF_UNNORM_13TEV-CMS_WCHARM_13TEV_WMC_leptrap - data_uncertainties: [] variants: + sys_10: + data_uncertainties: + - uncertainties_WPWM-TOT-UNNORM_sys_10.yaml legacy: data_uncertainties: - uncertainties_legacy_WPWM-TOT-UNNORM.yaml legacy_10: data_uncertainties: - uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml - data_central: data_legacy_WPWM-TOT-UNNORM.yaml ported_from: CMS_WCHARM_DIFF_UNNORM_13TEV diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/rawdata/WplusCharm_AbsEtaMu_Table.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/rawdata/WplusCharm_AbsEtaMu_Table.yaml new file mode 100644 index 0000000000..5e02825090 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/rawdata/WplusCharm_AbsEtaMu_Table.yaml @@ -0,0 +1,42 @@ +dependent_variables: +- header: {name: D(SIG(W + CQ))/DETA(MU), units: PB} + qualifiers: + - {name: MU ETA, value: -2.4-2.4} + - {name: MU PT, value: 26 GEV} + - {name: CQ PT, value: 5 GEV} + - {name: SQRT(S), units: GeV, value: '13000.0'} + values: + - errors: + - {label: stat, symerror: 37.17} + - asymerror: {minus: -52.74, plus: 61.16} + label: sys + value: 568.70 + - errors: + - {label: stat, symerror: 28.41} + - asymerror: {minus: -66.08, plus: 61.39} + label: sys + value: 466.91 + - errors: + - {label: stat, symerror: 30.21} + - asymerror: {minus: -53.87, plus: 57.25} + label: sys + value: 479.31 + - errors: + - {label: stat, symerror: 27.45} + - asymerror: {minus: -63.02, plus: 49.44} + label: sys + value: 395.16 + - errors: + - {label: stat, symerror: 24.68} + - asymerror: {minus: -33.91, plus: 38.73} + label: sys + value: 282.83 + +independent_variables: +- header: {name: ABS(ETA(MU))} + values: + - {high: 0.4, low: 0.0} + - {high: 0.8, low: 0.4} + - {high: 1.3, low: 0.8} + - {high: 1.8, low: 1.3} + - {high: 2.4, low: 1.8} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py new file mode 100644 index 0000000000..ed4753a5da --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py @@ -0,0 +1,143 @@ +import numpy as np + +# Common dict independent of the kinematics +IND_KIN_DICT = [ + {'label': 'track_sys_unc', 'syserror': 2.3}, + {'label': 'brch_sys_unc', 'syserror': 2.4}, + {'label': 'muons_sys_unc', 'syserror': 1.2}, + {'label': 'nsel_sys_unc', 'syserror': 1.5}, + {'label': 'dstar_sys_unc', 'syserror': 0.5}, +] + +SYS_UNC_BY_BIN = [ + # First bin [0, 2.4] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'syserror': 0.5}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +0.7, 'high': -0.9}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.0, 'high': -1.9}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.1, 'high': -1.1}}, + {'label': 'pdf_sys_unc', 'syserror': 1.2}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +3.9, 'high': -3.2}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +3.6, 'high': -3.3}}, + ], + # Second bin [0, 0.4] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.9, 'high': -0.8}}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +0.4, 'high': -1.2}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +0.4, 'high': -0.5}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': +1.3, 'high': +1.3}}, + {'label': 'pdf_sys_unc', 'syserror': 1.3}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +3.4, 'high': -1.8}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +8.8, 'high': -7.5}}, + ], + # Third bin [0.4, 0.8] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +1.9, 'high': -0.8}}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +1.3, 'high': -0.3}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.9, 'high': -3.0}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.2, 'high': -1.2}}, + {'label': 'pdf_sys_unc', 'syserror': 0.9}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +7.4, 'high': -5.2}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +9.0, 'high': -11.9}}, + ], + # Fourth bin [0.8, 1.3] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +1.4, 'high': -0.5}}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +1.1, 'high': -1.0}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.0, 'high': -1.9}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.5, 'high': -1.5}}, + {'label': 'pdf_sys_unc', 'syserror': 1.4}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +3.3, 'high': -3.0}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +7.9, 'high': -6.8}}, + ], + # Fifth bin [1.3, 1.8] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.8, 'high': -1.0}}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': 0.0, 'high': -2.6}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +4.6, 'high': -5.1}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': -2.7, 'high': -2.7}}, + {'label': 'pdf_sys_unc', 'syserror': 1.5}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +2.2, 'high': -1.2}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +9.8, 'high': -14.1}}, + ], + # Sixth bin [1.8, 2.4] + [ + *IND_KIN_DICT, + {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.0, 'high': -0.6}}, + {'label': 'ptmiss_sys_unc', 'asyserror': {'low': 0.0, 'high': +1.5}}, + {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.7, 'high': -2.6}}, + {'label': 'secvrx_sys_unc', 'asyserror': {'low': -2.5, 'high': -2.5}}, + {'label': 'pdf_sys_unc', 'syserror': 1.7}, + {'label': 'frag_sys_unc', 'asyserror': {'low': +7.4, 'high': -5.7}}, + {'label': 'mc_sys_unc', 'asyserror': {'low': +10.1, 'high': -8.5}}, + ], +] + + +SYS_DEFINITIONS = { + 'track_sys_unc': { + 'description': f'Tracking efficiency systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'brch_sys_unc': { + 'description': f'Branching fraction systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'muons_sys_unc': { + 'description': f'Muon identification systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'nsel_sys_unc': { + 'description': f'N_sel determination systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'dstar_sys_unc': { + 'description': f'D*(2010)+- systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'bkgnorm_sys_unc': { + 'description': f'Background normalization systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'ptmiss_sys_unc': { + 'description': f'pT miss systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'pileup_sys_unc': { + 'description': f'Pileup systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'secvrx_sys_unc': { + 'description': f'PDF systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'pdf_sys_unc': { + 'description': f'Fragmentation systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'frag_sys_unc': { + 'description': f'MC statistics systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, + 'mc_sys_unc': { + 'description': f'Symmetrized systematic uncertainty', + 'treatment': 'MULT', + 'type': 'CORR', + }, +} diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml new file mode 100644 index 0000000000..7437071f34 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml @@ -0,0 +1,123 @@ +definitions: + stat_uncorr_unc: + description: Statistical uncertainty + treatment: ADD + type: UNCORR + corr_lumi_unc: + description: Luminosity uncertainty 2.6% + treatment: MULT + type: CMSLUMI13 + track_sys_unc: + description: Tracking efficiency systematic uncertainty + treatment: MULT + type: CORR + brch_sys_unc: + description: Branching fraction systematic uncertainty + treatment: MULT + type: CORR + muons_sys_unc: + description: Muon identification systematic uncertainty + treatment: MULT + type: CORR + nsel_sys_unc: + description: N_sel determination systematic uncertainty + treatment: MULT + type: CORR + dstar_sys_unc: + description: D*(2010)+- systematic uncertainty + treatment: MULT + type: CORR + bkgnorm_sys_unc: + description: Background normalization systematic uncertainty + treatment: MULT + type: CORR + ptmiss_sys_unc: + description: pT miss systematic uncertainty + treatment: MULT + type: CORR + pileup_sys_unc: + description: Pileup systematic uncertainty + treatment: MULT + type: CORR + secvrx_sys_unc: + description: PDF systematic uncertainty + treatment: MULT + type: CORR + pdf_sys_unc: + description: Fragmentation systematic uncertainty + treatment: MULT + type: CORR + frag_sys_unc: + description: MC statistics systematic uncertainty + treatment: MULT + type: CORR + mc_sys_unc: + description: Symmetrized systematic uncertainty + treatment: MULT + type: CORR +bins: +- corr_lumi_unc: 1.42174837e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 0.5 + ptmiss_sys_unc: 8.12403840e-01 + pileup_sys_unc: 1.95128163e+00 + secvrx_sys_unc: 1.55563492e+00 + pdf_sys_unc: 1.2 + frag_sys_unc: 3.58434094e+00 + mc_sys_unc: 3.45651559e+00 +- corr_lumi_unc: 1.16728088e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 8.52936105e-01 + ptmiss_sys_unc: 9.79795897e-01 + pileup_sys_unc: 4.55521679e-01 + secvrx_sys_unc: 1.83847763e+00 + pdf_sys_unc: 1.3 + frag_sys_unc: 2.83548938e+00 + mc_sys_unc: 8.20167666e+00 +- corr_lumi_unc: 1.19827362e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 1.55804365e+00 + ptmiss_sys_unc: 1.06770783e+00 + pileup_sys_unc: 2.95084734e+00 + secvrx_sys_unc: 1.69705627e+00 + pdf_sys_unc: 0.9 + frag_sys_unc: 6.48922183e+00 + mc_sys_unc: 1.06492958e+01 +- corr_lumi_unc: 9.87899375e+03 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 1.14345966e+00 + ptmiss_sys_unc: 1.05237826e+00 + pileup_sys_unc: 1.95128163e+00 + secvrx_sys_unc: 2.12132034e+00 + pdf_sys_unc: 1.4 + frag_sys_unc: 3.15713478e+00 + mc_sys_unc: 7.39104188e+00 +- corr_lumi_unc: 7070.6 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 9.11043358e-01 + ptmiss_sys_unc: 2.25166605e+00 + pileup_sys_unc: 4.86286952e+00 + secvrx_sys_unc: 3.81837662e+00 + pdf_sys_unc: 1.5 + frag_sys_unc: 1.84119526e+00 + mc_sys_unc: 1.23307542e+01 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml new file mode 100644 index 0000000000..82f36ed94b --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml @@ -0,0 +1,132 @@ +definitions: + stat_uncorr_unc: + description: Statistical uncertainty + treatment: ADD + type: UNCORR + corr_lumi_unc: + description: Luminosity uncertainty 2.6% + treatment: MULT + type: CMSLUMI13 + track_sys_unc: + description: Tracking efficiency systematic uncertainty + treatment: MULT + type: CORR + brch_sys_unc: + description: Branching fraction systematic uncertainty + treatment: MULT + type: CORR + muons_sys_unc: + description: Muon identification systematic uncertainty + treatment: MULT + type: CORR + nsel_sys_unc: + description: N_sel determination systematic uncertainty + treatment: MULT + type: CORR + dstar_sys_unc: + description: D*(2010)+- systematic uncertainty + treatment: MULT + type: CORR + bkgnorm_sys_unc: + description: Background normalization systematic uncertainty + treatment: MULT + type: CORR + ptmiss_sys_unc: + description: pT miss systematic uncertainty + treatment: MULT + type: CORR + pileup_sys_unc: + description: Pileup systematic uncertainty + treatment: MULT + type: CORR + secvrx_sys_unc: + description: PDF systematic uncertainty + treatment: MULT + type: CORR + pdf_sys_unc: + description: Fragmentation systematic uncertainty + treatment: MULT + type: CORR + frag_sys_unc: + description: MC statistics systematic uncertainty + treatment: MULT + type: CORR + mc_sys_unc: + description: Symmetrized systematic uncertainty + treatment: MULT + type: CORR + uncorr_mc_unc: + description: MC uncertainty + treatment: MULT + type: UNCORR +bins: +- corr_lumi_unc: 1.42174837e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 0.5 + ptmiss_sys_unc: 8.12403840e-01 + pileup_sys_unc: 1.95128163e+00 + secvrx_sys_unc: 1.55563492e+00 + pdf_sys_unc: 1.2 + frag_sys_unc: 3.58434094e+00 + mc_sys_unc: 3.45651559e+00 + uncorr_mc_unc: 5.68699350e+03 +- corr_lumi_unc: 1.16728088e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 8.52936105e-01 + ptmiss_sys_unc: 9.79795897e-01 + pileup_sys_unc: 4.55521679e-01 + secvrx_sys_unc: 1.83847763e+00 + pdf_sys_unc: 1.3 + frag_sys_unc: 2.83548938e+00 + mc_sys_unc: 8.20167666e+00 + uncorr_mc_unc: 4.66912350e+03 +- corr_lumi_unc: 1.19827362e+04 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 1.55804365e+00 + ptmiss_sys_unc: 1.06770783e+00 + pileup_sys_unc: 2.95084734e+00 + secvrx_sys_unc: 1.69705627e+00 + pdf_sys_unc: 0.9 + frag_sys_unc: 6.48922183e+00 + mc_sys_unc: 1.06492958e+01 + uncorr_mc_unc: 4.79309450e+03 +- corr_lumi_unc: 9.87899375e+03 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 1.14345966e+00 + ptmiss_sys_unc: 1.05237826e+00 + pileup_sys_unc: 1.95128163e+00 + secvrx_sys_unc: 2.12132034e+00 + pdf_sys_unc: 1.4 + frag_sys_unc: 3.15713478e+00 + mc_sys_unc: 7.39104188e+00 + uncorr_mc_unc: 3.95159750e+03 +- corr_lumi_unc: 7070.6 + track_sys_unc: 2.3 + brch_sys_unc: 2.4 + muons_sys_unc: 1.2 + nsel_sys_unc: 1.5 + dstar_sys_unc: 0.5 + bkgnorm_sys_unc: 9.11043358e-01 + ptmiss_sys_unc: 2.25166605e+00 + pileup_sys_unc: 4.86286952e+00 + secvrx_sys_unc: 3.81837662e+00 + pdf_sys_unc: 1.5 + frag_sys_unc: 1.84119526e+00 + mc_sys_unc: 1.23307542e+01 + uncorr_mc_unc: 2.82824000e+03 From db6cb12020e2e6414fc485ae955a02f6da13d224 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 9 Dec 2024 18:58:13 +0000 Subject: [PATCH 03/16] Correct metadata --- nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml index 68235e7601..d64a3c14a9 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml @@ -38,7 +38,7 @@ implemented_observables: description: Absolute pseudo-rapidity of the Z boson label: $|\eta|$ units: '' - k2: + m_W2: description: Mass of the W boson squared label: $m_W^2$ units: GeV$^{2}$ From 5a440b1647e6cc4b54dcb8e85a1b7c7a96a75387 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 9 Dec 2024 19:02:03 +0000 Subject: [PATCH 04/16] Correct bug in filter --- .../commondata/CMS_WCHARM_13TEV/filter_utils.py | 2 +- .../uncertainties_WPWM-TOT-UNNORM.yaml | 15 ++++++++++----- .../uncertainties_WPWM-TOT-UNNORM_sys_10.yaml | 15 ++++++++++----- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 4d249e661d..74a9af15e1 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -268,7 +268,7 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Lumi uncertainty - unc_dict = {'corr_lumi_unc': central_data[data_idx] * CMSLUMI13 * 0.01} + unc_dict['corr_lumi_unc'] = central_data[data_idx] * CMSLUMI13 * 0.01 # Add systematic uncertainties unc_dict = unc_dict | tmp diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml index 7437071f34..755b916c68 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml @@ -56,7 +56,8 @@ definitions: treatment: MULT type: CORR bins: -- corr_lumi_unc: 1.42174837e+04 +- stat_uncorr_unc: 37170.0 + corr_lumi_unc: 1.42174837e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -69,7 +70,8 @@ bins: pdf_sys_unc: 1.2 frag_sys_unc: 3.58434094e+00 mc_sys_unc: 3.45651559e+00 -- corr_lumi_unc: 1.16728088e+04 +- stat_uncorr_unc: 28410.0 + corr_lumi_unc: 1.16728088e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -82,7 +84,8 @@ bins: pdf_sys_unc: 1.3 frag_sys_unc: 2.83548938e+00 mc_sys_unc: 8.20167666e+00 -- corr_lumi_unc: 1.19827362e+04 +- stat_uncorr_unc: 30210.0 + corr_lumi_unc: 1.19827362e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -95,7 +98,8 @@ bins: pdf_sys_unc: 0.9 frag_sys_unc: 6.48922183e+00 mc_sys_unc: 1.06492958e+01 -- corr_lumi_unc: 9.87899375e+03 +- stat_uncorr_unc: 27450.0 + corr_lumi_unc: 9.87899375e+03 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -108,7 +112,8 @@ bins: pdf_sys_unc: 1.4 frag_sys_unc: 3.15713478e+00 mc_sys_unc: 7.39104188e+00 -- corr_lumi_unc: 7070.6 +- stat_uncorr_unc: 24680.0 + corr_lumi_unc: 7070.6 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml index 82f36ed94b..74dd7ceabc 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml @@ -60,7 +60,8 @@ definitions: treatment: MULT type: UNCORR bins: -- corr_lumi_unc: 1.42174837e+04 +- stat_uncorr_unc: 37170.0 + corr_lumi_unc: 1.42174837e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -74,7 +75,8 @@ bins: frag_sys_unc: 3.58434094e+00 mc_sys_unc: 3.45651559e+00 uncorr_mc_unc: 5.68699350e+03 -- corr_lumi_unc: 1.16728088e+04 +- stat_uncorr_unc: 28410.0 + corr_lumi_unc: 1.16728088e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -88,7 +90,8 @@ bins: frag_sys_unc: 2.83548938e+00 mc_sys_unc: 8.20167666e+00 uncorr_mc_unc: 4.66912350e+03 -- corr_lumi_unc: 1.19827362e+04 +- stat_uncorr_unc: 30210.0 + corr_lumi_unc: 1.19827362e+04 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -102,7 +105,8 @@ bins: frag_sys_unc: 6.48922183e+00 mc_sys_unc: 1.06492958e+01 uncorr_mc_unc: 4.79309450e+03 -- corr_lumi_unc: 9.87899375e+03 +- stat_uncorr_unc: 27450.0 + corr_lumi_unc: 9.87899375e+03 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 @@ -116,7 +120,8 @@ bins: frag_sys_unc: 3.15713478e+00 mc_sys_unc: 7.39104188e+00 uncorr_mc_unc: 3.95159750e+03 -- corr_lumi_unc: 7070.6 +- stat_uncorr_unc: 24680.0 + corr_lumi_unc: 7070.6 track_sys_unc: 2.3 brch_sys_unc: 2.4 muons_sys_unc: 1.2 From 220300d3d1a4f72222a52c5ca948872b19a9985c Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 10:49:30 +0000 Subject: [PATCH 05/16] Correct percentage for lumi description --- .../nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 74a9af15e1..9a4c533c43 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -221,7 +221,7 @@ def __build_unc_definitions(self, variant='default'): # Add lumi uncertainty unc_definitions['corr_lumi_unc'] = { - 'description': f'Luminosity uncertainty 2.6%', + 'description': f'Luminosity uncertainty 2.5%', 'treatment': 'MULT', 'type': 'CMSLUMI13', } From 64148c1fb2530e6bb9ca1c9f620e236736666ce1 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 10:50:24 +0000 Subject: [PATCH 06/16] Comment out unused bin --- .../CMS_WCHARM_13TEV/sys_uncertainties.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py index ed4753a5da..a759011699 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py @@ -11,16 +11,16 @@ SYS_UNC_BY_BIN = [ # First bin [0, 2.4] - [ - *IND_KIN_DICT, - {'label': 'bkgnorm_sys_unc', 'syserror': 0.5}, - {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +0.7, 'high': -0.9}}, - {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.0, 'high': -1.9}}, - {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.1, 'high': -1.1}}, - {'label': 'pdf_sys_unc', 'syserror': 1.2}, - {'label': 'frag_sys_unc', 'asyserror': {'low': +3.9, 'high': -3.2}}, - {'label': 'mc_sys_unc', 'asyserror': {'low': +3.6, 'high': -3.3}}, - ], + # [ + # *IND_KIN_DICT, + # {'label': 'bkgnorm_sys_unc', 'syserror': 0.5}, + # {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +0.7, 'high': -0.9}}, + # {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.0, 'high': -1.9}}, + # {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.1, 'high': -1.1}}, + # {'label': 'pdf_sys_unc', 'syserror': 1.2}, + # {'label': 'frag_sys_unc', 'asyserror': {'low': +3.9, 'high': -3.2}}, + # {'label': 'mc_sys_unc', 'asyserror': {'low': +3.6, 'high': -3.3}}, + # ], # Second bin [0, 0.4] [ *IND_KIN_DICT, From 06daacf9476a833ee079d1d9de209493c0639be7 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 10:52:23 +0000 Subject: [PATCH 07/16] Remove sys_10 variant --- .../commondata/CMS_WCHARM_13TEV/filter.py | 1 - .../CMS_WCHARM_13TEV/filter_utils.py | 11 +- .../commondata/CMS_WCHARM_13TEV/metadata.yaml | 3 - .../uncertainties_WPWM-TOT-UNNORM_sys_10.yaml | 137 ------------------ 4 files changed, 1 insertion(+), 151 deletions(-) delete mode 100644 nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py index f7d00a0e76..4e7e98cfbd 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py @@ -14,4 +14,3 @@ if __name__ == "__main__": CMS_WCHARM = Extractor("./metadata.yaml", "WPWM-TOT-UNNORM", mult_factor=1000) _, _, _ = CMS_WCHARM.generate_data(variant='default', save_to_yaml=True) - _, _, _ = CMS_WCHARM.generate_data(variant='sys_10', save_to_yaml=True) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 9a4c533c43..f13b7cb6f6 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -229,13 +229,7 @@ def __build_unc_definitions(self, variant='default'): # Add systematic uncertainty unc_definitions = unc_definitions | SYS_DEFINITIONS - if variant == 'sys_10': - unc_definitions['uncorr_mc_unc'] = { - 'description': f'MC uncertainty', - 'treatment': 'MULT', - 'type': 'UNCORR', - } - elif variant != 'default': + if variant != 'default': raise ValueError(f'The variant {variant} is not implemented yet.') return unc_definitions @@ -273,9 +267,6 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): # Add systematic uncertainties unc_dict = unc_dict | tmp - if variant == 'sys_10': - unc_dict['uncorr_mc_unc'] = central_data[data_idx] * 0.01 - sys_artificial.append(unc_dict) if save_to_yaml: diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml index d64a3c14a9..e28be8bbc5 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml @@ -52,9 +52,6 @@ implemented_observables: - - CMS_WCHARM_DIFF_UNNORM_13TEV-CMS_WCHARM_13TEV_WPCB_leptrap - - CMS_WCHARM_DIFF_UNNORM_13TEV-CMS_WCHARM_13TEV_WMC_leptrap variants: - sys_10: - data_uncertainties: - - uncertainties_WPWM-TOT-UNNORM_sys_10.yaml legacy: data_uncertainties: - uncertainties_legacy_WPWM-TOT-UNNORM.yaml diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml deleted file mode 100644 index 74dd7ceabc..0000000000 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM_sys_10.yaml +++ /dev/null @@ -1,137 +0,0 @@ -definitions: - stat_uncorr_unc: - description: Statistical uncertainty - treatment: ADD - type: UNCORR - corr_lumi_unc: - description: Luminosity uncertainty 2.6% - treatment: MULT - type: CMSLUMI13 - track_sys_unc: - description: Tracking efficiency systematic uncertainty - treatment: MULT - type: CORR - brch_sys_unc: - description: Branching fraction systematic uncertainty - treatment: MULT - type: CORR - muons_sys_unc: - description: Muon identification systematic uncertainty - treatment: MULT - type: CORR - nsel_sys_unc: - description: N_sel determination systematic uncertainty - treatment: MULT - type: CORR - dstar_sys_unc: - description: D*(2010)+- systematic uncertainty - treatment: MULT - type: CORR - bkgnorm_sys_unc: - description: Background normalization systematic uncertainty - treatment: MULT - type: CORR - ptmiss_sys_unc: - description: pT miss systematic uncertainty - treatment: MULT - type: CORR - pileup_sys_unc: - description: Pileup systematic uncertainty - treatment: MULT - type: CORR - secvrx_sys_unc: - description: PDF systematic uncertainty - treatment: MULT - type: CORR - pdf_sys_unc: - description: Fragmentation systematic uncertainty - treatment: MULT - type: CORR - frag_sys_unc: - description: MC statistics systematic uncertainty - treatment: MULT - type: CORR - mc_sys_unc: - description: Symmetrized systematic uncertainty - treatment: MULT - type: CORR - uncorr_mc_unc: - description: MC uncertainty - treatment: MULT - type: UNCORR -bins: -- stat_uncorr_unc: 37170.0 - corr_lumi_unc: 1.42174837e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 0.5 - ptmiss_sys_unc: 8.12403840e-01 - pileup_sys_unc: 1.95128163e+00 - secvrx_sys_unc: 1.55563492e+00 - pdf_sys_unc: 1.2 - frag_sys_unc: 3.58434094e+00 - mc_sys_unc: 3.45651559e+00 - uncorr_mc_unc: 5.68699350e+03 -- stat_uncorr_unc: 28410.0 - corr_lumi_unc: 1.16728088e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 8.52936105e-01 - ptmiss_sys_unc: 9.79795897e-01 - pileup_sys_unc: 4.55521679e-01 - secvrx_sys_unc: 1.83847763e+00 - pdf_sys_unc: 1.3 - frag_sys_unc: 2.83548938e+00 - mc_sys_unc: 8.20167666e+00 - uncorr_mc_unc: 4.66912350e+03 -- stat_uncorr_unc: 30210.0 - corr_lumi_unc: 1.19827362e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 1.55804365e+00 - ptmiss_sys_unc: 1.06770783e+00 - pileup_sys_unc: 2.95084734e+00 - secvrx_sys_unc: 1.69705627e+00 - pdf_sys_unc: 0.9 - frag_sys_unc: 6.48922183e+00 - mc_sys_unc: 1.06492958e+01 - uncorr_mc_unc: 4.79309450e+03 -- stat_uncorr_unc: 27450.0 - corr_lumi_unc: 9.87899375e+03 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 1.14345966e+00 - ptmiss_sys_unc: 1.05237826e+00 - pileup_sys_unc: 1.95128163e+00 - secvrx_sys_unc: 2.12132034e+00 - pdf_sys_unc: 1.4 - frag_sys_unc: 3.15713478e+00 - mc_sys_unc: 7.39104188e+00 - uncorr_mc_unc: 3.95159750e+03 -- stat_uncorr_unc: 24680.0 - corr_lumi_unc: 7070.6 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 9.11043358e-01 - ptmiss_sys_unc: 2.25166605e+00 - pileup_sys_unc: 4.86286952e+00 - secvrx_sys_unc: 3.81837662e+00 - pdf_sys_unc: 1.5 - frag_sys_unc: 1.84119526e+00 - mc_sys_unc: 1.23307542e+01 - uncorr_mc_unc: 2.82824000e+03 From e74f01f7f4dd3044e8bd83f28fcbf8d99d4d3a0d Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 10:53:10 +0000 Subject: [PATCH 08/16] Correct bug in data generation --- .../data_WPWM-TOT-UNNORM.yaml | 10 +- .../CMS_WCHARM_13TEV/filter_utils.py | 12 +- .../uncertainties_WPWM-TOT-UNNORM.yaml | 132 +++++++++--------- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml index f462c3da21..2170cc1648 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml @@ -1,6 +1,6 @@ data_central: -- 5.68699350e+05 -- 4.66912350e+05 -- 4.79309450e+05 -- 3.95159750e+05 -- 282824.0 +- 5.82064450e+05 +- 4.64341995e+05 +- 4.78111725e+05 +- 371450.4 +- 2.81840095e+05 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index f13b7cb6f6..2f2fc55886 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -249,17 +249,17 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): shift = 0 sys_unc_bin = symmetrized_sys_uncs[data_idx] + # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} + # Add shift from symmetrization tmp = {} for key, value in sys_unc_bin.items(): shift += value['shift'] - tmp[key] = value['sym_error'] + tmp[key] = value['sym_error'] * central_data[data_idx] * 0.01 - # Shift central data - central_data[data_idx] = central_data[data_idx] + shift - - # Statistical uncertainty - unc_dict = {STAT_LABEL: stat_unc[data_idx]} + # Shift central + central_data[data_idx] = central_data[data_idx] * (1.0 + shift * 0.01) # Lumi uncertainty unc_dict['corr_lumi_unc'] = central_data[data_idx] * CMSLUMI13 * 0.01 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml index 755b916c68..f735ebca5c 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml @@ -4,7 +4,7 @@ definitions: treatment: ADD type: UNCORR corr_lumi_unc: - description: Luminosity uncertainty 2.6% + description: Luminosity uncertainty 2.5% treatment: MULT type: CMSLUMI13 track_sys_unc: @@ -57,72 +57,72 @@ definitions: type: CORR bins: - stat_uncorr_unc: 37170.0 - corr_lumi_unc: 1.42174837e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 0.5 - ptmiss_sys_unc: 8.12403840e-01 - pileup_sys_unc: 1.95128163e+00 - secvrx_sys_unc: 1.55563492e+00 - pdf_sys_unc: 1.2 - frag_sys_unc: 3.58434094e+00 - mc_sys_unc: 3.45651559e+00 + corr_lumi_unc: 1.45516113e+04 + track_sys_unc: 13080.1 + brch_sys_unc: 1.36488000e+04 + muons_sys_unc: 6.82440000e+03 + nsel_sys_unc: 8530.5 + dstar_sys_unc: 2843.5 + bkgnorm_sys_unc: 4.85064763e+03 + ptmiss_sys_unc: 5.57209927e+03 + pileup_sys_unc: 2.59055179e+03 + secvrx_sys_unc: 1.04554223e+04 + pdf_sys_unc: 7393.1 + frag_sys_unc: 1.61254281e+04 + mc_sys_unc: 4.66429352e+04 - stat_uncorr_unc: 28410.0 - corr_lumi_unc: 1.16728088e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 8.52936105e-01 - ptmiss_sys_unc: 9.79795897e-01 - pileup_sys_unc: 4.55521679e-01 - secvrx_sys_unc: 1.83847763e+00 - pdf_sys_unc: 1.3 - frag_sys_unc: 2.83548938e+00 - mc_sys_unc: 8.20167666e+00 + corr_lumi_unc: 1.16085499e+04 + track_sys_unc: 10738.93 + brch_sys_unc: 11205.84 + muons_sys_unc: 5602.92 + nsel_sys_unc: 7.00365000e+03 + dstar_sys_unc: 2334.55 + bkgnorm_sys_unc: 7.27466158e+03 + ptmiss_sys_unc: 4.98523461e+03 + pileup_sys_unc: 1.37778013e+04 + secvrx_sys_unc: 7.92372545e+03 + pdf_sys_unc: 4.20219000e+03 + frag_sys_unc: 3.02988257e+04 + mc_sys_unc: 4.97226268e+04 - stat_uncorr_unc: 30210.0 - corr_lumi_unc: 1.19827362e+04 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 1.55804365e+00 - ptmiss_sys_unc: 1.06770783e+00 - pileup_sys_unc: 2.95084734e+00 - secvrx_sys_unc: 1.69705627e+00 - pdf_sys_unc: 0.9 - frag_sys_unc: 6.48922183e+00 - mc_sys_unc: 1.06492958e+01 + corr_lumi_unc: 1.19527931e+04 + track_sys_unc: 1.10241300e+04 + brch_sys_unc: 11503.44 + muons_sys_unc: 5751.72 + nsel_sys_unc: 7.18965000e+03 + dstar_sys_unc: 2396.55 + bkgnorm_sys_unc: 5.48071651e+03 + ptmiss_sys_unc: 5.04415423e+03 + pileup_sys_unc: 9.35268798e+03 + secvrx_sys_unc: 1.01677005e+04 + pdf_sys_unc: 6710.34 + frag_sys_unc: 1.51324627e+04 + mc_sys_unc: 3.54260028e+04 - stat_uncorr_unc: 27450.0 - corr_lumi_unc: 9.87899375e+03 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 1.14345966e+00 - ptmiss_sys_unc: 1.05237826e+00 - pileup_sys_unc: 1.95128163e+00 - secvrx_sys_unc: 2.12132034e+00 - pdf_sys_unc: 1.4 - frag_sys_unc: 3.15713478e+00 - mc_sys_unc: 7.39104188e+00 + corr_lumi_unc: 9286.26 + track_sys_unc: 9.08868000e+03 + brch_sys_unc: 9483.84 + muons_sys_unc: 4741.92 + nsel_sys_unc: 5.92740000e+03 + dstar_sys_unc: 1975.8 + bkgnorm_sys_unc: 3.60007893e+03 + ptmiss_sys_unc: 8.89768356e+03 + pileup_sys_unc: 1.92161152e+04 + secvrx_sys_unc: 1.50886970e+04 + pdf_sys_unc: 5.92740000e+03 + frag_sys_unc: 7.27566721e+03 + mc_sys_unc: 4.87262084e+04 - stat_uncorr_unc: 24680.0 - corr_lumi_unc: 7070.6 - track_sys_unc: 2.3 - brch_sys_unc: 2.4 - muons_sys_unc: 1.2 - nsel_sys_unc: 1.5 - dstar_sys_unc: 0.5 - bkgnorm_sys_unc: 9.11043358e-01 - ptmiss_sys_unc: 2.25166605e+00 - pileup_sys_unc: 4.86286952e+00 - secvrx_sys_unc: 3.81837662e+00 - pdf_sys_unc: 1.5 - frag_sys_unc: 1.84119526e+00 - mc_sys_unc: 1.23307542e+01 + corr_lumi_unc: 7.04600238e+03 + track_sys_unc: 6505.09 + brch_sys_unc: 6787.92 + muons_sys_unc: 3393.96 + nsel_sys_unc: 4242.45 + dstar_sys_unc: 1414.15 + bkgnorm_sys_unc: 1.46962779e+03 + ptmiss_sys_unc: 3.67406947e+03 + pileup_sys_unc: 7.49766273e+03 + secvrx_sys_unc: 9.99955055e+03 + pdf_sys_unc: 4808.11 + frag_sys_unc: 1.88347580e+04 + mc_sys_unc: 2.64971109e+04 From 4e6a5037a8dfcc3d17b0914d689ed4a0450e602b Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Dec 2024 10:59:14 +0000 Subject: [PATCH 09/16] Correct order for shifts --- .../commondata/CMS_WCHARM_13TEV/filter_utils.py | 6 +++--- .../uncertainties_WPWM-TOT-UNNORM.yaml | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 2f2fc55886..2bf20feda4 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -258,12 +258,12 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): shift += value['shift'] tmp[key] = value['sym_error'] * central_data[data_idx] * 0.01 - # Shift central - central_data[data_idx] = central_data[data_idx] * (1.0 + shift * 0.01) - # Lumi uncertainty unc_dict['corr_lumi_unc'] = central_data[data_idx] * CMSLUMI13 * 0.01 + # Shift central + central_data[data_idx] = central_data[data_idx] * (1.0 + shift * 0.01) + # Add systematic uncertainties unc_dict = unc_dict | tmp diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml index f735ebca5c..1f65c9407b 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/uncertainties_WPWM-TOT-UNNORM.yaml @@ -57,7 +57,7 @@ definitions: type: CORR bins: - stat_uncorr_unc: 37170.0 - corr_lumi_unc: 1.45516113e+04 + corr_lumi_unc: 14217.5 track_sys_unc: 13080.1 brch_sys_unc: 1.36488000e+04 muons_sys_unc: 6.82440000e+03 @@ -71,7 +71,7 @@ bins: frag_sys_unc: 1.61254281e+04 mc_sys_unc: 4.66429352e+04 - stat_uncorr_unc: 28410.0 - corr_lumi_unc: 1.16085499e+04 + corr_lumi_unc: 11672.75 track_sys_unc: 10738.93 brch_sys_unc: 11205.84 muons_sys_unc: 5602.92 @@ -85,7 +85,7 @@ bins: frag_sys_unc: 3.02988257e+04 mc_sys_unc: 4.97226268e+04 - stat_uncorr_unc: 30210.0 - corr_lumi_unc: 1.19527931e+04 + corr_lumi_unc: 11982.75 track_sys_unc: 1.10241300e+04 brch_sys_unc: 11503.44 muons_sys_unc: 5751.72 @@ -99,7 +99,7 @@ bins: frag_sys_unc: 1.51324627e+04 mc_sys_unc: 3.54260028e+04 - stat_uncorr_unc: 27450.0 - corr_lumi_unc: 9286.26 + corr_lumi_unc: 9879.0 track_sys_unc: 9.08868000e+03 brch_sys_unc: 9483.84 muons_sys_unc: 4741.92 @@ -113,7 +113,7 @@ bins: frag_sys_unc: 7.27566721e+03 mc_sys_unc: 4.87262084e+04 - stat_uncorr_unc: 24680.0 - corr_lumi_unc: 7.04600238e+03 + corr_lumi_unc: 7070.75 track_sys_unc: 6505.09 brch_sys_unc: 6787.92 muons_sys_unc: 3393.96 From 5e98fcf7f8ab008b5e382f5e0542c737e0fa3440 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:53:21 +0100 Subject: [PATCH 10/16] Clean code + remove unused code --- .../commondata/CMS_WCHARM_13TEV/filter.py | 5 +- .../CMS_WCHARM_13TEV/filter_utils.py | 92 +++++++------------ 2 files changed, 35 insertions(+), 62 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py index 4e7e98cfbd..e91f0a0fa3 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py @@ -5,12 +5,9 @@ import logging from filter_utils import Extractor -import numpy as np -import yaml logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') - if __name__ == "__main__": CMS_WCHARM = Extractor("./metadata.yaml", "WPWM-TOT-UNNORM", mult_factor=1000) - _, _, _ = CMS_WCHARM.generate_data(variant='default', save_to_yaml=True) + CMS_WCHARM.generate_data() diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 2bf20feda4..a96cca22e8 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -1,8 +1,6 @@ import logging -import os import numpy as np -import pandas as pd from sys_uncertainties import SYS_DEFINITIONS, SYS_UNC_BY_BIN import yaml @@ -10,21 +8,12 @@ yaml.add_representer(float, prettify_float) -SQRTS = 8000 MW2 = 80.385**2 CMSLUMI13 = 2.5 # % -# List of systematic uncertainties that shuold -# be considered uncorrelated -UNCORR_SYS_UNC = ['UnfoldMCstat', 'UnfoldOtherGen', 'UnfoldReweight'] -ART_LABEL = 'art_corr_unc' STAT_LABEL = 'stat_uncorr_unc' TABLE = '' -# From Table 1 of the paper -SYS_UNC_by_bin = [{}] - - class Extractor: """ Extracts kinematics, central data, and uncertainties for a given dataset @@ -61,11 +50,6 @@ def __init__(self, metadata_file, observable, mult_factor=1): self.kin_labels = self.metadata['kinematic_coverage'] self.ndata = self.metadata['ndata'] - # Collect diagonal absoulute uncertainties - # self.diag_unc = self.__collect_diag_unc() - # self.unc_labels = list(self.diag_unc[0].keys()) - # self.unc_labels.pop(0) - def __retrieve_table(self, table_id): """ Implementation of the lazy loading for the tables. If the table @@ -109,8 +93,6 @@ def __extract_kinematics(self, table: dict): ---------- table: dict Dictionary containing the bins in the transverse momentum - tab_number: int - Index to select the range of the second kinematic variable Return ------ @@ -209,7 +191,7 @@ def symmetrized_sys_unc(self): symmetrized_uncs.append(unc_dict) return symmetrized_uncs - def __build_unc_definitions(self, variant='default'): + def __build_unc_definitions(self): unc_definitions = {} # Statistical uncertainty @@ -229,18 +211,19 @@ def __build_unc_definitions(self, variant='default'): # Add systematic uncertainty unc_definitions = unc_definitions | SYS_DEFINITIONS - if variant != 'default': - raise ValueError(f'The variant {variant} is not implemented yet.') - return unc_definitions - def generate_data(self, variant='default', save_to_yaml=False, path='./'): + def generate_data(self): + ''' + Collect central data, kinematics, and uncertainties ans save them + into yaml files. + ''' # Get central data and kinematics central_data, stat_unc, _ = self.generate_data_and_unc(self.mult_factor) kinematics = self.generate_kinematics() # Uncertainty definitions - unc_definitions = self.__build_unc_definitions(variant=variant) + unc_definitions = self.__build_unc_definitions() sys_artificial = [] # Initialize vector of artificial uncertainties @@ -268,37 +251,30 @@ def generate_data(self, variant='default', save_to_yaml=False, path='./'): unc_dict = unc_dict | tmp sys_artificial.append(unc_dict) - - if save_to_yaml: - # Save kinematics into file - logging.info("Dumping kinematics to file...") - kinematics_yaml = {'bins': kinematics} - with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: - yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) - logging.info("Done!") - - # Save central data into file - logging.info("Dumping kinematics to file...") - dat_central_yaml = {'data_central': central_data} - file_name = self.metadata['data_central'] - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) - logging.info("Done!") - - # Save unertainties - logging.info("Dumping kinematics to file...") - uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} - file_name = ( - self.metadata['data_uncertainties'][0] - if variant == 'default' - else self.metadata['variants'][variant]['data_uncertainties'][0] - ) - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) - logging.info("Done!") - return kinematics, central_data, sys_artificial - else: - return kinematics, central_data, sys_artificial - - def get_table(self, table_id): - return self.__retrieve_table(table_id) + + # Local path for yaml files + path = './' + + # Save kinematics into file + logging.info("Dumping kinematics to file...") + kinematics_yaml = {'bins': kinematics} + with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: + yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + logging.info("Done!") + + # Save central data into file + logging.info("Dumping kinematics to file...") + dat_central_yaml = {'data_central': central_data} + file_name = self.metadata['data_central'] + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + + # Save unertainties + logging.info("Dumping kinematics to file...") + uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} + file_name = self.metadata['data_uncertainties'][0] + with open(path + file_name, 'w') as dat_out_file: + yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + logging.info("Done!") + return kinematics, central_data, sys_artificial From db7524f01ec0fed7129140cef286c75ce0121ec2 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:57:25 +0100 Subject: [PATCH 11/16] Add docstring in `sys_uncertainties.py` --- .../commondata/CMS_WCHARM_13TEV/sys_uncertainties.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py index a759011699..0d35e707d4 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py @@ -1,4 +1,9 @@ -import numpy as np +''' +The full break-down of the systematic uncertainties is not given in the +HepData format. However, Table 1 of the referenced paper provides the +different sources of systematic uncertainties bin-by-bin. This table +is reproduced in the following. +''' # Common dict independent of the kinematics IND_KIN_DICT = [ From f79bed31f0d12b67ab0dbfb4050d81adeb5ccf6d Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:33:23 +0000 Subject: [PATCH 12/16] Clean up filter files --- .../commondata/CMS_WCHARM_13TEV/filter.py | 6 +- .../CMS_WCHARM_13TEV/filter_utils.py | 267 ++++++++---------- .../CMS_WCHARM_13TEV/sys_uncertainties.py | 23 +- 3 files changed, 129 insertions(+), 167 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py index e91f0a0fa3..c8dc6a5327 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter.py @@ -3,11 +3,15 @@ ''' import logging +import os from filter_utils import Extractor logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') +current_dir = os.path.dirname(os.path.abspath(__file__)) + if __name__ == "__main__": - CMS_WCHARM = Extractor("./metadata.yaml", "WPWM-TOT-UNNORM", mult_factor=1000) + + CMS_WCHARM = Extractor(f"{current_dir}/metadata.yaml", "WPWM-TOT-UNNORM", mult_factor=1000) CMS_WCHARM.generate_data() diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index a96cca22e8..5a0a8d5a01 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -1,33 +1,36 @@ +import functools import logging +import yaml +import os import numpy as np -from sys_uncertainties import SYS_DEFINITIONS, SYS_UNC_BY_BIN -import yaml +from sys_uncertainties import SYS_DEFINITIONS, SYS_UNC_BY_BIN from nnpdf_data.filter_utils.utils import prettify_float, symmetrize_errors +current_dir = os.path.dirname(os.path.abspath(__file__)) + yaml.add_representer(float, prettify_float) MW2 = 80.385**2 CMSLUMI13 = 2.5 # % STAT_LABEL = 'stat_uncorr_unc' -TABLE = '' class Extractor: - """ - Extracts kinematics, central data, and uncertainties for a given dataset - - Parameters - ---------- - metadata_file: str - Path to the metadata file - observable: str - The name of the observable for which the data is extracted. The name must - be listed in the metadata file. - """ def __init__(self, metadata_file, observable, mult_factor=1): + """ + Extracts kinematics, central data, and uncertainties for a given dataset + + Parameters + ---------- + metadata_file: str + Path to the metadata file + observable: str + Name of the observable for which the data is extracted. The name must + be listed in the metadata file. + """ # Open metadata and select process with open(metadata_file, 'r') as file: @@ -41,71 +44,35 @@ def __init__(self, metadata_file, observable, mult_factor=1): None, ) if self.metadata is None: - raise Exception(f"{observable} is not listed in the metadata file.") + raise ValueError(f"{observable} is not listed in the metadata file.") - # Initialise dict of tables - self.tables = {} self.observable = observable self.mult_factor = mult_factor - self.kin_labels = self.metadata['kinematic_coverage'] - self.ndata = self.metadata['ndata'] - - def __retrieve_table(self, table_id): - """ - Implementation of the lazy loading for the tables. If the table - is loaded for the first time, it is stored into an internal - container of the class, so that it will not be loaded each time. - - When called, this functions checks if the table has already been stored - and, if that is the case, returns the stored table. - - Parameters - ---------- - table_id: int - Index that specifies the table - - Return - ------ - The table specified by `table_id`. If not previously loaded, it is also - stored into the internal container for future use. - """ - try: - table = self.tables[str(table_id)] - except KeyError: - logging.debug( - f'Table {table_id} has not already been used or stored.' f' Storing the table...' - ) - with open(f'./rawdata/{TABLE}{table_id}.yaml', 'r') as tab: - tab_dict = yaml.safe_load(tab) - self.tables[str(table_id)] = tab_dict - table = tab_dict - return table + + # Load the (only) table used for this dataset + table_id = self.metadata["tables"][0] + with open(f"{current_dir}/rawdata/{table_id}.yaml") as tab: + self.tab_dict = yaml.safe_load(tab) - def __extract_kinematics(self, table: dict): + def _generate_kinematics(self): """ - Extracts the kinematic variables of the single differential - distribution given a table. - - For each bin, it computes the max, min, and mid value of the transverse - momentum of the boson. - - Parameters - ---------- - table: dict - Dictionary containing the bins in the transverse momentum - - Return - ------ - List of bins containing min, max, and mid values for each of the kinematic - observables listed in the `kinematic_coverage` of the metadata file. - + The function generates the kinematics by reading and processing it from + the referenced table. Kinematics is processed in the format of a list of + dictionaries. The keys in each dictionaries specify the label (i.e. name) + for the kinematic variables. For this dataset, they are 'abs_eta' and 'm_W2'. + The labels are taken from the matadata file. The corresponding values are + 'min', 'mid', and 'max'. + + For this dataset, 'm_W2' is used in the computation of the (x,Q2)-map and + does not have any active role in the fit. For that reason, every bin has the + same value. Moreover, only the mid value is used. """ - data = table['independent_variables'][0] - label = self.kin_labels + data = self.tab_dict['independent_variables'][0] + label = self.metadata['kinematic_coverage'] kinematics = [] - for bin in data['values']: - abs_eta_min = bin['low'] - abs_eta_max = bin['high'] + for eta_bin in data['values']: + abs_eta_min = eta_bin['low'] + abs_eta_max = eta_bin['high'] kin_bin = { label[0]: { 'min': abs_eta_min, @@ -115,75 +82,66 @@ def __extract_kinematics(self, table: dict): label[1]: {'min': None, 'mid': MW2, 'max': None}, } kinematics.append(kin_bin) - return kinematics - - def generate_kinematics(self): - """ - Function that generates the kinematics by looping over all the - tables specified in the metadata file. The resulting kinematics - is then saved to a yaml file. It relies on the method - `__extract_kinematics`. - """ - - logging.info(f"Generating kinematics for ATLAS_{self.observable}...") - - # Initialise kinematics list - kinematics = [] - ndata = 0 - table = self.metadata["tables"][0] - tab_dict = self.__retrieve_table(table) - kin = self.__extract_kinematics(tab_dict) - kinematics = np.concatenate([kinematics, kin]) - ndata += len(kin) # Check number of data agrees with metadata - try: - assert self.metadata['ndata'] is not None - assert self.metadata['ndata'] == ndata - except AssertionError as e: - logging.warning( - f"The number of data in the metafile is either wrong or unspecified." - f" The correct number is {ndata}. Please, update the metafile." + ndata = len(kinematics) + if not self.metadata['ndata'] == ndata: + raise ValueError( + f"Mismatch in 'ndata': expected {self.metadata['ndata']}, but got {ndata}" ) - return - return kinematics.tolist() + return kinematics - def generate_data_and_unc(self, mult_factor=1.0): + def _generate_data_and_unc(self): """ - Same as `generate_kinematics`, but for central data points. + Return a list with central data points and two additional lists with the corresponding + statistical uncertainties. For this dataset, statistical uncertainties + are always symmetric. + + The table also provides the corresponding (asymmetric) systematic ucertainty for + data point. However, this uncertainty is not used as it is preferred to adopt the + full break-down of the systematic uncertainties. See `_generate_sym_sys_unc` """ logging.info(f"Generating central data for CMS_{self.observable}...") - dat_central = [] - stat_unc = [] - asy_sys_unc = [] - table = self.metadata['tables'][0] - tab_dict = self.__retrieve_table(table) - tab_dict = tab_dict['dependent_variables'][0]['values'] + + tab_dict = self.tab_dict['dependent_variables'][0]['values'] # Loop over bins + dat_central = [] + stat_unc = [] for rap_bin in tab_dict: - dat_central.append(rap_bin['value'] * mult_factor) - stat_unc.append(rap_bin['errors'][0]['symerror'] * mult_factor) - asy_sys_unc.append( - { - key: value * mult_factor - for key, value in rap_bin['errors'][1]['asymerror'].items() - } - ) - return dat_central, stat_unc, asy_sys_unc + dat_central.append(rap_bin['value'] * self.mult_factor) + stat_unc.append(rap_bin['errors'][0]['symerror'] * self.mult_factor) + return dat_central, stat_unc - def symmetrized_sys_unc(self): - """Symmetrise systematic uncertainties. Returns the symmetrized uncertainty - and the shift to the central data + def _generate_sym_sys_unc(self): + """ + The function reads the full break-down of the systematic uncertainties + as given in the paper. Since such a break-down is not provided in the form of + a table, but rather given as a table in the paper, the list of sources of + systematic uncertainties is read from an external file (`sys_uncertainties.py`) + that copies the table in the paper. + + Some of the uncertainties are given in the form of asymmetric uncertainties. These + asymmetric uncertainties are symmetrized using the usual prescription (see `symmetrize_errors`). + + It returns a list containing a dict for each bin in the absolute rapidity. The keys + in each dictionary are the names of the sources of uncertainties. The values + are dicts with keys 'shift', cotaining the shift from the symmetric prescription, and 'sym_error', + which is the (symmetrized) value of the uncertainty. Note that the shift is zero if the + original source of uncertainty is already symmetric. + + Note that uncertainties are given in percentage relative to the central data point + of the corresponding bin. Moreover, also the shift is a relative value to the central + data point. """ symmetrized_uncs = [] for bin in SYS_UNC_BY_BIN: unc_dict = {} for source in bin: if 'asyserror' in source.keys(): - error = source['asyserror'] - plus = error['high'] - minus = error['low'] + error_high_low = source['asyserror'] + plus = error_high_low['high'] + minus = error_high_low['low'] data_delta, sym_error = symmetrize_errors(plus, minus) unc_dict[source['label']] = {'shift': data_delta, 'sym_error': sym_error} elif 'syserror' in source.keys(): @@ -191,7 +149,14 @@ def symmetrized_sys_unc(self): symmetrized_uncs.append(unc_dict) return symmetrized_uncs - def __build_unc_definitions(self): + def _build_unc_definitions(self): + """ + Build the dictionary containing the definitions of the uncertainties to be + used in the uncertainty data file. + + The definitions of the systematic uncertainties are given in the external + file `sys_uncertainties.py`. + """ unc_definitions = {} # Statistical uncertainty @@ -215,25 +180,32 @@ def __build_unc_definitions(self): def generate_data(self): ''' - Collect central data, kinematics, and uncertainties ans save them + The function collects central data, kinematics, and uncertainties ans save them into yaml files. - ''' - # Get central data and kinematics - central_data, stat_unc, _ = self.generate_data_and_unc(self.mult_factor) - kinematics = self.generate_kinematics() - # Uncertainty definitions - unc_definitions = self.__build_unc_definitions() + The function adds the shifts from the symmetrization prescription to the central + data points before saving them to the yaml file. - sys_artificial = [] # Initialize vector of artificial uncertainties + The systematic uncertainties are given as percentages relative the central data point. + The absolute value of the uncertainty is obtained from the central data point before + the shifts are applied. + ''' + # Get central data, kinematics, and sys uncertainties + central_data, stat_unc = self._generate_data_and_unc() + kinematics = self._generate_kinematics() + symmetrized_sys_uncs = self._generate_sym_sys_unc() - symmetrized_sys_uncs = self.symmetrized_sys_unc() + # Uncertainty definitions + unc_definitions = self._build_unc_definitions() + + # Loop over the bins + sys_artificial = [] # Initialize vector of artificial uncertainties for data_idx, data in enumerate(central_data): shift = 0 - sys_unc_bin = symmetrized_sys_uncs[data_idx] + sys_unc_bin = symmetrized_sys_uncs[data_idx] # Dict of sys sources for the bin - # Statistical uncertainty - unc_dict = {STAT_LABEL: stat_unc[data_idx]} + # Initialize dict of uncertainties + unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Statistical uncertainty # Add shift from symmetrization tmp = {} @@ -251,30 +223,27 @@ def generate_data(self): unc_dict = unc_dict | tmp sys_artificial.append(unc_dict) - - # Local path for yaml files - path = './' # Save kinematics into file logging.info("Dumping kinematics to file...") kinematics_yaml = {'bins': kinematics} - with open(path + self.metadata['kinematics']['file'], 'w') as kin_out_file: - yaml.dump(kinematics_yaml, kin_out_file, sort_keys=False) + kins_file_name = self.metadata['kinematics']['file'] + with open(current_dir + "/" + kins_file_name, 'w') as file: + yaml.dump(kinematics_yaml, file, sort_keys=False) logging.info("Done!") # Save central data into file logging.info("Dumping kinematics to file...") dat_central_yaml = {'data_central': central_data} - file_name = self.metadata['data_central'] - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(dat_central_yaml, dat_out_file, sort_keys=False) + dat_file_name = self.metadata['data_central'] + with open(current_dir + "/" + dat_file_name, 'w') as file: + yaml.dump(dat_central_yaml, file, sort_keys=False) logging.info("Done!") # Save unertainties logging.info("Dumping kinematics to file...") uncertainties_yaml = {'definitions': unc_definitions, 'bins': sys_artificial} - file_name = self.metadata['data_uncertainties'][0] - with open(path + file_name, 'w') as dat_out_file: - yaml.dump(uncertainties_yaml, dat_out_file, sort_keys=False) + unc_file_name = self.metadata['data_uncertainties'][0] + with open(current_dir + "/" + unc_file_name, 'w') as file: + yaml.dump(uncertainties_yaml, file, sort_keys=False) logging.info("Done!") - return kinematics, central_data, sys_artificial diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py index 0d35e707d4..09a81b220d 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/sys_uncertainties.py @@ -2,7 +2,7 @@ The full break-down of the systematic uncertainties is not given in the HepData format. However, Table 1 of the referenced paper provides the different sources of systematic uncertainties bin-by-bin. This table -is reproduced in the following. +is reproduced in the following list of dicts. ''' # Common dict independent of the kinematics @@ -15,18 +15,7 @@ ] SYS_UNC_BY_BIN = [ - # First bin [0, 2.4] - # [ - # *IND_KIN_DICT, - # {'label': 'bkgnorm_sys_unc', 'syserror': 0.5}, - # {'label': 'ptmiss_sys_unc', 'asyserror': {'low': +0.7, 'high': -0.9}}, - # {'label': 'pileup_sys_unc', 'asyserror': {'low': +2.0, 'high': -1.9}}, - # {'label': 'secvrx_sys_unc', 'asyserror': {'low': -1.1, 'high': -1.1}}, - # {'label': 'pdf_sys_unc', 'syserror': 1.2}, - # {'label': 'frag_sys_unc', 'asyserror': {'low': +3.9, 'high': -3.2}}, - # {'label': 'mc_sys_unc', 'asyserror': {'low': +3.6, 'high': -3.3}}, - # ], - # Second bin [0, 0.4] + # First bin [0, 0.4] [ *IND_KIN_DICT, {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.9, 'high': -0.8}}, @@ -37,7 +26,7 @@ {'label': 'frag_sys_unc', 'asyserror': {'low': +3.4, 'high': -1.8}}, {'label': 'mc_sys_unc', 'asyserror': {'low': +8.8, 'high': -7.5}}, ], - # Third bin [0.4, 0.8] + # Second bin [0.4, 0.8] [ *IND_KIN_DICT, {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +1.9, 'high': -0.8}}, @@ -48,7 +37,7 @@ {'label': 'frag_sys_unc', 'asyserror': {'low': +7.4, 'high': -5.2}}, {'label': 'mc_sys_unc', 'asyserror': {'low': +9.0, 'high': -11.9}}, ], - # Fourth bin [0.8, 1.3] + # Third bin [0.8, 1.3] [ *IND_KIN_DICT, {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +1.4, 'high': -0.5}}, @@ -59,7 +48,7 @@ {'label': 'frag_sys_unc', 'asyserror': {'low': +3.3, 'high': -3.0}}, {'label': 'mc_sys_unc', 'asyserror': {'low': +7.9, 'high': -6.8}}, ], - # Fifth bin [1.3, 1.8] + # Fourth bin [1.3, 1.8] [ *IND_KIN_DICT, {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.8, 'high': -1.0}}, @@ -70,7 +59,7 @@ {'label': 'frag_sys_unc', 'asyserror': {'low': +2.2, 'high': -1.2}}, {'label': 'mc_sys_unc', 'asyserror': {'low': +9.8, 'high': -14.1}}, ], - # Sixth bin [1.8, 2.4] + # Fifth bin [1.8, 2.4] [ *IND_KIN_DICT, {'label': 'bkgnorm_sys_unc', 'asyserror': {'low': +0.0, 'high': -0.6}}, From 10575e9f12bdd8773233452a74f67f0826646a24 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Tue, 7 Jan 2025 14:35:12 +0000 Subject: [PATCH 13/16] run pre-commit --- .../CMS_WCHARM_13TEV/filter_utils.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 5a0a8d5a01..9641ec3c42 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -1,11 +1,9 @@ -import functools import logging -import yaml -import os - -import numpy as np +import os from sys_uncertainties import SYS_DEFINITIONS, SYS_UNC_BY_BIN +import yaml + from nnpdf_data.filter_utils.utils import prettify_float, symmetrize_errors current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -17,6 +15,7 @@ STAT_LABEL = 'stat_uncorr_unc' + class Extractor: def __init__(self, metadata_file, observable, mult_factor=1): @@ -33,7 +32,7 @@ def __init__(self, metadata_file, observable, mult_factor=1): """ # Open metadata and select process - with open(metadata_file, 'r') as file: + with open(metadata_file) as file: metadata = yaml.safe_load(file) self.metadata = next( ( @@ -48,7 +47,7 @@ def __init__(self, metadata_file, observable, mult_factor=1): self.observable = observable self.mult_factor = mult_factor - + # Load the (only) table used for this dataset table_id = self.metadata["tables"][0] with open(f"{current_dir}/rawdata/{table_id}.yaml") as tab: @@ -56,7 +55,7 @@ def __init__(self, metadata_file, observable, mult_factor=1): def _generate_kinematics(self): """ - The function generates the kinematics by reading and processing it from + The function generates the kinematics by reading and processing it from the referenced table. Kinematics is processed in the format of a list of dictionaries. The keys in each dictionaries specify the label (i.e. name) for the kinematic variables. For this dataset, they are 'abs_eta' and 'm_W2'. @@ -127,7 +126,7 @@ def _generate_sym_sys_unc(self): It returns a list containing a dict for each bin in the absolute rapidity. The keys in each dictionary are the names of the sources of uncertainties. The values are dicts with keys 'shift', cotaining the shift from the symmetric prescription, and 'sym_error', - which is the (symmetrized) value of the uncertainty. Note that the shift is zero if the + which is the (symmetrized) value of the uncertainty. Note that the shift is zero if the original source of uncertainty is already symmetric. Note that uncertainties are given in percentage relative to the central data point @@ -197,15 +196,15 @@ def generate_data(self): # Uncertainty definitions unc_definitions = self._build_unc_definitions() - + # Loop over the bins - sys_artificial = [] # Initialize vector of artificial uncertainties + sys_artificial = [] # Initialize vector of artificial uncertainties for data_idx, data in enumerate(central_data): shift = 0 - sys_unc_bin = symmetrized_sys_uncs[data_idx] # Dict of sys sources for the bin + sys_unc_bin = symmetrized_sys_uncs[data_idx] # Dict of sys sources for the bin # Initialize dict of uncertainties - unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Statistical uncertainty # Add shift from symmetrization tmp = {} From 831345ff53ce518f5039b5e0cffda5fde0781140 Mon Sep 17 00:00:00 2001 From: Roy Stegeman Date: Sun, 12 Jan 2025 14:45:19 +0000 Subject: [PATCH 14/16] add some comments to new data implementation --- .../data_WPWM-TOT-UNNORM.yaml | 2 +- .../CMS_WCHARM_13TEV/filter_utils.py | 71 +++++++++++-------- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml index 2170cc1648..3c03add16f 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/data_WPWM-TOT-UNNORM.yaml @@ -2,5 +2,5 @@ data_central: - 5.82064450e+05 - 4.64341995e+05 - 4.78111725e+05 -- 371450.4 +- 3.71450400e+05 - 2.81840095e+05 diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 9641ec3c42..097818318b 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -10,8 +10,8 @@ yaml.add_representer(float, prettify_float) -MW2 = 80.385**2 -CMSLUMI13 = 2.5 # % +MW2 = 80.385**2 # W mass squared in GeV^2 +CMSLUMI13 = 2.5 # Luminosity uncertainty in percentage STAT_LABEL = 'stat_uncorr_unc' @@ -25,10 +25,13 @@ def __init__(self, metadata_file, observable, mult_factor=1): Parameters ---------- metadata_file: str - Path to the metadata file + Path to the metadata file observable: str - Name of the observable for which the data is extracted. The name must - be listed in the metadata file. + Name of the observable for which the data is extracted. The name + must be listed in the metadata file. + mult_factor : float, optional + Multiplication factor to scale the data. For this dataset it is used + for a scaling from pb to fb, so a factor 1000. """ # Open metadata and select process @@ -66,8 +69,8 @@ def _generate_kinematics(self): does not have any active role in the fit. For that reason, every bin has the same value. Moreover, only the mid value is used. """ - data = self.tab_dict['independent_variables'][0] - label = self.metadata['kinematic_coverage'] + [data] = self.tab_dict['independent_variables'] + label = self.metadata['kinematic_coverage'] # ['abs_eta', 'm_W2'] kinematics = [] for eta_bin in data['values']: abs_eta_min = eta_bin['low'] @@ -102,21 +105,23 @@ def _generate_data_and_unc(self): """ logging.info(f"Generating central data for CMS_{self.observable}...") - tab_dict = self.tab_dict['dependent_variables'][0]['values'] + [data] = self.tab_dict['dependent_variables'] # Loop over bins dat_central = [] stat_unc = [] - for rap_bin in tab_dict: + for rap_bin in data['values']: dat_central.append(rap_bin['value'] * self.mult_factor) - stat_unc.append(rap_bin['errors'][0]['symerror'] * self.mult_factor) + symerror_dict, _asymerror_dict = rap_bin['errors'] + stat_unc.append(symerror_dict['symerror'] * self.mult_factor) + return dat_central, stat_unc def _generate_sym_sys_unc(self): """ The function reads the full break-down of the systematic uncertainties as given in the paper. Since such a break-down is not provided in the form of - a table, but rather given as a table in the paper, the list of sources of + a table in HEPData, but rather given as a table in the paper, the list of sources of systematic uncertainties is read from an external file (`sys_uncertainties.py`) that copies the table in the paper. @@ -125,7 +130,7 @@ def _generate_sym_sys_unc(self): It returns a list containing a dict for each bin in the absolute rapidity. The keys in each dictionary are the names of the sources of uncertainties. The values - are dicts with keys 'shift', cotaining the shift from the symmetric prescription, and 'sym_error', + are dicts with keys 'shift', containing the shift from the symmetric prescription, and 'sym_error', which is the (symmetrized) value of the uncertainty. Note that the shift is zero if the original source of uncertainty is already symmetric. @@ -153,7 +158,7 @@ def _build_unc_definitions(self): Build the dictionary containing the definitions of the uncertainties to be used in the uncertainty data file. - The definitions of the systematic uncertainties are given in the external + The definitions of the systematic uncertainties are given in the file `sys_uncertainties.py`. """ unc_definitions = {} @@ -179,7 +184,7 @@ def _build_unc_definitions(self): def generate_data(self): ''' - The function collects central data, kinematics, and uncertainties ans save them + The function collects central data, kinematics, and uncertainties and saves them into yaml files. The function adds the shifts from the symmetrization prescription to the central @@ -197,31 +202,35 @@ def generate_data(self): # Uncertainty definitions unc_definitions = self._build_unc_definitions() - # Loop over the bins + # This loop iterates over the bins of the data.For each bin, it + # 1) computes the sys_artificial uncertainties, consisting of: + # - The effect of symmetrized systematic uncertainties (shift and + # sym_error). + # - The statistical uncertainty from stat_unc array. + # - The luminosity uncertainty. + # 2) Shifts the central data points central_data[data_idx] to account + # for the shift due to the uncertainty symmetrization sys_artificial = [] # Initialize vector of artificial uncertainties - for data_idx, data in enumerate(central_data): - shift = 0 + for data_idx, central_value in enumerate(central_data): sys_unc_bin = symmetrized_sys_uncs[data_idx] # Dict of sys sources for the bin + shift = 0 # Initialize shift from symmetrization - # Initialize dict of uncertainties - unc_dict = {STAT_LABEL: stat_unc[data_idx]} # Statistical uncertainty + # Statistical uncertainty + unc_dict = {STAT_LABEL: stat_unc[data_idx]} + # Lmi uncertainty, 0.01 is to convert from percentage to relative value + unc_dict['corr_lumi_unc'] = central_value * CMSLUMI13 * 0.01 # Add shift from symmetrization - tmp = {} for key, value in sys_unc_bin.items(): - shift += value['shift'] - tmp[key] = value['sym_error'] * central_data[data_idx] * 0.01 - - # Lumi uncertainty - unc_dict['corr_lumi_unc'] = central_data[data_idx] * CMSLUMI13 * 0.01 - - # Shift central - central_data[data_idx] = central_data[data_idx] * (1.0 + shift * 0.01) - - # Add systematic uncertainties - unc_dict = unc_dict | tmp + # 0.01 is to convert from percentage to relative value + shift += value['shift'] * 0.01 + unc_dict[key] = value['sym_error'] * central_value * 0.01 + # output of this loop to be saved in the YAML file: + # 1) list containg uncertainties and + # 2) central values updated to account for the shift due to symmetization sys_artificial.append(unc_dict) + central_data[data_idx] *= 1.0 + shift # Save kinematics into file logging.info("Dumping kinematics to file...") From b95aaf18e96f2acaf996ccabc7dcb021c612de48 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Sun, 12 Jan 2025 17:11:28 +0000 Subject: [PATCH 15/16] Correct docstring --- .../nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py index 097818318b..2934b735b0 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/filter_utils.py @@ -95,7 +95,7 @@ def _generate_kinematics(self): def _generate_data_and_unc(self): """ - Return a list with central data points and two additional lists with the corresponding + Return a list with central data points and a list with the corresponding statistical uncertainties. For this dataset, statistical uncertainties are always symmetric. From be779ee6dc5d04107ba028b201ff927833561a0b Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 13 Jan 2025 10:41:01 +0000 Subject: [PATCH 16/16] Add legacy data central to legacy variants --- nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml index e28be8bbc5..bf7e62fe65 100644 --- a/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml +++ b/nnpdf_data/nnpdf_data/commondata/CMS_WCHARM_13TEV/metadata.yaml @@ -55,7 +55,9 @@ implemented_observables: legacy: data_uncertainties: - uncertainties_legacy_WPWM-TOT-UNNORM.yaml + data_central: data_legacy_WPWM-TOT-UNNORM.yaml legacy_10: data_uncertainties: - uncertainties_legacy_WPWM-TOT-UNNORM_sys_10.yaml + data_central: data_legacy_WPWM-TOT-UNNORM.yaml ported_from: CMS_WCHARM_DIFF_UNNORM_13TEV