diff --git a/Calibration/HcalCalibAlgos/macros/CalibCorr.C b/Calibration/HcalCalibAlgos/macros/CalibCorr.C index 98c60921e3cd8..6374f6d76b01b 100644 --- a/Calibration/HcalCalibAlgos/macros/CalibCorr.C +++ b/Calibration/HcalCalibAlgos/macros/CalibCorr.C @@ -7,8 +7,16 @@ // Defines depth of the DetId guided by the value of truncateFlag // double puFactor(type, ieta, pmom, eHcal, ediff, debug) // Returns a multiplicative factor to the energy as PU correction +// int type = The type of data being considered +// (1: Run 1 old; 2: Run1 new; 3: Run2 2016; +// 4: Run 2 2017; 5: Run2 2018; 6: Run3 Old; +// 7: Run 3 June 2021; 97: dlphin Try 3; +// 98: dlphin Try 2; 99: dlphin Try 1) // double puFactorRho(type, ieta, rho, double eHcal) // Returns a multiplicative factor as PU correction evaluated from rho +// int type = The type of data being considered +// (1: 2017 Data; 2: 2017 MC; 3: 2018 MC; 4: 2018AB; +// 5: 2018BC; 6: 2016 MC) // double puweight(vtx) // Return PU weight for QCD PU sample // bool fillChain(chain, inputFileList) diff --git a/Calibration/HcalCalibAlgos/macros/CalibSort.C b/Calibration/HcalCalibAlgos/macros/CalibSort.C index 2db36a495f84c..2a6092a8b9166 100644 --- a/Calibration/HcalCalibAlgos/macros/CalibSort.C +++ b/Calibration/HcalCalibAlgos/macros/CalibSort.C @@ -1635,6 +1635,11 @@ void CalibFitPU::Loop(bool extract_PU_parameters, std::string fileName) { } //End of Event Loop to extract PU correction parameters + for (int k = 0; k < n; k++) + std::cout << "Bin " << k << " for 2D Hist " << vec_h2[k] << ":" << vec_h2[k]->GetEntries() << " Graph " + << vec_gr[k] << ":" << points[k] << " Profile " << vec_hp[k] << ":" << vec_hp[k]->GetEntries() + << std::endl; + std::ofstream myfile0, myfile1, myfile2; sprintf(filename, "%s_par2d.txt", fileName.c_str()); myfile0.open(filename); @@ -1677,7 +1682,7 @@ void CalibFitPU::Loop(bool extract_PU_parameters, std::string fileName) { sprintf(namepng, "%s.png", pad1->GetName()); pad1->Print(namepng); - TF1 *f2 = ((k < 2) ? (new TF1("f2", "[0]+[1]*x", 0, 5)) : (new TF1("f1", "[0]+[1]*x+[2]*x*x", 0, 5))); + TF1 *f2 = ((k < 2) ? (new TF1("f2", "[0]+[1]*x", 0, 5)) : (new TF1("f2", "[0]+[1]*x+[2]*x*x", 0, 5))); sprintf(name, "c_ieta%dPr", k); TCanvas *pad2 = new TCanvas(name, name, 500, 500); pad2->SetLeftMargin(0.10); @@ -1706,7 +1711,7 @@ void CalibFitPU::Loop(bool extract_PU_parameters, std::string fileName) { sprintf(namepng, "%s.png", pad2->GetName()); pad2->Print(namepng); - TF1 *f3 = ((k < 2) ? (new TF1("f3", "[0]+[1]*x", 0, 5)) : (new TF1("f1", "[0]+[1]*x+[2]*x*x", 0, 5))); + TF1 *f3 = ((k < 2) ? (new TF1("f3", "[0]+[1]*x", 0, 5)) : (new TF1("f3", "[0]+[1]*x+[2]*x*x", 0, 5))); sprintf(name, "c_ieta%dGr", k); TCanvas *pad3 = new TCanvas(name, name, 500, 500); pad3->SetLeftMargin(0.10); diff --git a/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py b/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py index af41cb0f0ec1b..f2aea052e93fb 100644 --- a/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py +++ b/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py @@ -1,14 +1,14 @@ ###################################################################################### # Evaluates regressor from loaded model # Usage: -# python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model1.h5 -B endcap -O corrfac1.txt -# python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model2.h5 -B barrel -O corrfac2.txt +# source /cvmfs/sft.cern.ch/lcg/views/LCG_97apython3/x86_64-centos7-gcc8-opt/setup.csh +# python3 isotrackApplyRegressor.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -M ./models/model1.h5 -B endcap -O corrfac1.txt +# python3 isotrackApplyRegressor.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -M ./models/model2.h5 -B barrel -O corrfac2.txt ###################################################################################### # coding: utf-8 # In[1]: - import pandas as pd import numpy as np import matplotlib.pyplot as plt @@ -30,8 +30,7 @@ # In[2]: parser = argparse.ArgumentParser() -parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root") -#parser.add_argument("-PU", "--filePU",help="input PU file",default="/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root") +parser.add_argument("-PU", "--filePU",help="input PU file",default="root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root") parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel') parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5") parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt") diff --git a/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py b/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py index de33ab542d99f..c44a1f492cfb5 100644 --- a/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py +++ b/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py @@ -1,7 +1,8 @@ ###################################################################################### # Makes pkl and text files comparing PU and noPU samples for training regressor and other stuff # Usage: -# python3 isotrackNtupler.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -NPU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root -O isotk_relval +# source /cvmfs/sft.cern.ch/lcg/views/LCG_97apython3/x86_64-centos7-gcc8-opt/setup.csh +# python3 isotrackNtupler.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -NPU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -O isotk_relval ###################################################################################### @@ -13,8 +14,8 @@ import matplotlib.pyplot as plt parser = argparse.ArgumentParser() -parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root") -parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root") +parser.add_argument("-PU", "--filePU",help="input PU file",default="root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root") +parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="//eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root") parser.add_argument("-O", "--opfilename",help="ouput file name",default="isotk_relval") diff --git a/Calibration/HcalCalibAlgos/macros/isotrackRootTreeMaker.py b/Calibration/HcalCalibAlgos/macros/isotrackRootTreeMaker.py index 96c5efc4f63f9..0e911628efa59 100644 --- a/Calibration/HcalCalibAlgos/macros/isotrackRootTreeMaker.py +++ b/Calibration/HcalCalibAlgos/macros/isotrackRootTreeMaker.py @@ -1,7 +1,8 @@ ###################################################################################### # Makes pkl, root and text files comparing PU and noPU samples for training regressor and other stuff # Usage: -# python3 isotrackRootTreeMaker.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -NPU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root -O isotrackRelval +# source /cvmfs/sft.cern.ch/lcg/views/LCG_97apython3/x86_64-centos7-gcc8-opt/setup.csh +# python3 isotrackRootTreeMaker.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -NPU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -O isotrackRelval ###################################################################################### import uproot diff --git a/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py index 4abcc261de16a..309e47196d272 100644 --- a/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py +++ b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py @@ -143,7 +143,7 @@ def propweights(y_true): # In[6]: -from keras import optimizers +from tensorflow.keras import optimizers print ("creating model=========>") model = Sequential() model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu')) @@ -153,7 +153,7 @@ def propweights(y_true): model.add(Dense(60, activation='relu',kernel_regularizer=regularizers.l2(0.01))) model.add(Dense(1)) -RMS = keras.optimizers.RMSprop(lr=0.001, rho=0.9) +RMS = tensorflow.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9) # Compile model print ("compilation up next=======>") model.compile(loss='logcosh',optimizer='adam') diff --git a/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressorNoOptimization.py b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressorNoOptimization.py new file mode 100644 index 0000000000000..08b8817ebc6e6 --- /dev/null +++ b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressorNoOptimization.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +###################################################################################### +# Trains regressor and saves model for evaluation +# Usage: +# python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_hi.pkl -V 1 +# python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_lo.pkl -V 2 +###################################################################################### + + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import argparse +import tensorflow.keras +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from tensorflow.keras.layers import Dropout +from tensorflow.keras.utils import to_categorical +from tensorflow.keras.utils import plot_model +from tensorflow.keras import regularizers +from tensorflow.keras.wrappers.scikit_learn import KerasRegressor +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split, cross_val_score +from sklearn.metrics import roc_curve, auc +from tensorflow.keras.layers import Activation +from tensorflow.keras import backend as K +from tensorflow.keras.models import save_model +import tensorflow as tf +from tensorflow import keras + + +# In[37]: + + +parser1 = argparse.ArgumentParser() +parser1.add_argument("-I", "--input",help="input file",default="isotk_relval_lowinter1.pkl") +parser1.add_argument("-V", "--modelv",help="model version (any number) ",default="2") + +args = parser1.parse_args(args=[]) + +fName = args.input +modv = args.modelv + + +# In[38]: + + +print ("file Name:", fName) +print ("modv :", modv) + + +# In[39]: + + +df = pd.read_pickle(fName) +print ("vars in file:",df.keys()) +print("events in df original:",df.shape[0]) +df = df.loc[df['t_eHcal_y'] > 20] +print("events in df after energy cut:",df.shape[0]) +df['t_eHcal_xun'] = df['t_eHcal_x'] +df['t_delta_un'] = df['t_delta'] +df['t_ieta_un'] = df['t_ieta'] + + +# In[40]: + + +mina = [] +maxa = [] +keya = [] + + +for i in df.keys(): + keya.append(i) + mina.append(df[i].min()) + maxa.append(df[i].max()) + +print('var = ',keya) +print('mina = ',mina) +print('maxa = ',maxa) + + +# In[41]: + + +cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x'] +#df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std())) +#df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) / (x.max() - x.min())) +# #(x.max() - x.min())) +df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min())) + +data = df.values +print ('data shape:',data.shape) +targets = data[:,12] +targets.shape +print ('targets shape:', targets.shape) +print ("vars in file:",df.keys()) + + +# In[42]: + + +data = df.values +ntest = 20000 +testindx = data.shape[0] - ntest +# data.shape[0]: 438118 is no of events +# data.shape[1] : 18 ==> columns +# testindx = 438118-20000 +print ('data shape:',data.shape[0]) +print ('testindx: ' ,testindx) +# this :testindx = 438118-20000 = 418118 +X_train = data[:testindx,0:12] +Y_train = data[:testindx,12] +X_test = data[testindx:,:] +print ("shape of X_train:",X_train.shape) +print ("shape of Y_train:",Y_train.shape) +print ("shape of X_test:",X_test.shape) +meany = np.mean(Y_train) +print ("mean y:",meany) +stdy = np.std(Y_train) +print ("std y:", stdy) + + +# In[43]: + + +############################################# marinas correction form +a0 = [0.973, 0.998, 0.992, 0.965 ] +a1 = [0, -0.318, -0.261, -0.406] +a2 = [0, 0, 0.047, 0.089] +def fac0(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a0[idx] +def fac1(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a1[idx] +def fac2(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a2[idx] + + +# In[44]: + + +vec0 = np.vectorize(fac0) +vec1 = np.vectorize(fac1) +vec2 = np.vectorize(fac2) + +X_test[:,17] +eop = (X_test[:,15]/X_test[:,13]) +dop = (X_test[:,16]/X_test[:,13]) +print ("eop: ", eop) +#mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*(X_test[:,15]/X_test[:,13])*(X_test[:,16]/X_test[:,13])*( 1 + vec2(fac(abs(X_test[:,17])))*(X_test[:,16]/X_test[:,13])) + +mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*eop*dop*( 1 + vec2(abs(X_test[:,17]))*dop) + + +# In[45]: + + +def propweights(y_true): + weight = np.copy(y_true) + weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy #1.25 +# weight[abs(y_true - meany) > stdy] = 1.75*abs((weight[abs(y_true - meany) > stdy]) - meany)/(stdy) + weight[abs(y_true - meany) < stdy] = 1 + print ("wieght : ", weight) + return weight + + +# In[46]: + + +from keras import optimizers +print ("creating model=========>") +model = Sequential() +model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu')) +#model.add(Dropout(0.2)) +model.add(Dense(271, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(301, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(241, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(1)) + + +# In[47]: + + +print ("compilation up next=======>") +model.compile(loss='logcosh',optimizer='adam') +model.summary() +#print ("Y_train : ", Y_train) +#fitting +print ("fitting now=========>") +history = model.fit(X_train,Y_train , batch_size=5000, epochs=50, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train)) + + +# In[48]: + + +plt.plot(history.history['loss']) +plt.plot(history.history['val_loss']) +plt.title('model loss') +plt.ylabel('loss') +plt.xlabel('epoch') +plt.legend(['train', 'validation'], loc='upper left') +plt.savefig(modv+'_loss_distributions_lowWinter.png') +#plt.show() +plt.close() + + +# In[49]: + + +preds = model.predict(X_test[:,0:12]) +targets = X_test[:,12] +uncorrected = X_test[:,15] +marinascorr = X_test[:,15]*mcorrval +plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6) +plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6) +plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6) +#plt.hist(marinascorr, bins =100, range=(0,200),label='marinas correction',alpha=0.6) +plt.yscale('log') +plt.title("Energy distribution") +plt.legend(loc='upper right') +plt.savefig(modv+'_energy_distributions_lowWinter.png') +#plt.show() +plt.close() + +preds = preds.reshape(preds.shape[0]) +print (preds.shape) + + +# In[50]: + + +plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6) +#plt.hist(abs(marinascorr -targets)/targets*100, bins =100, range=(0,100),label='marinas correction',alpha=0.6) +plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6) +#plt.yscale('log') +plt.title("error distribution") +plt.legend(loc='upper right') +plt.savefig(modv+'_errors_low.png') +#plt.show() +plt.close() + + +# In[51]: + + +plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr') +plt.scatter(targets, preds,alpha=0.3,label='PUcorr') +plt.xlabel('True Values ') +plt.ylabel('Predictions ') +plt.legend(loc='upper right') +lims = [0, 200] +plt.xlim(lims) +plt.ylim(lims) +_ = plt.plot(lims, lims) +plt.savefig(modv+'_energyscatt_lowWinter.png') +#plt.show() +plt.close() + + +# In[52]: + + +pmom= X_test[:,13] +#get_ipython().run_line_magic('matplotlib', 'inline') +plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6) +#plt.hist(preds/pmom, bins =100, range=(0,5),label='E/p PUcorr',alpha=0.6) +#plt.hist(uncorrected/pmom, bins =100, range=(0,5),label='E/p uncorr',alpha=0.6) +plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6) +#plt.hist(np.exp(preds.reshape((preds.shape[0])))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3) +#plt.hist(preds.reshape((preds.shape[0]))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3) +plt.legend(loc='upper right') +plt.yscale('log') +plt.title("E/p distributions") +plt.savefig(modv+'_eopdist_lowWinter.png') +#plt.show() +plt.close() + + +# In[53]: + + +import os +############## save model +if not os.path.exists('models'): + os.makedirs('models') +model.save('models/model_BarrelWinter'+modv+'.h5') +#new_model_2 = load_model('my_model.h5') + + +# In[ ]: + + + +