Skip to content

Commit

Permalink
Merge pull request #31680 from kdlong/nanoParsingAndreasFix
Browse files Browse the repository at this point in the history
Workaround for NanoAOD LHE weights in newer MadGraph
  • Loading branch information
cmsbuild authored Nov 9, 2020
2 parents 92154ca + 9dc6703 commit 5c3035d
Showing 1 changed file with 44 additions and 12 deletions.
56 changes: 44 additions & 12 deletions PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::vector<ScaleVarWeight> scaleVariationIDs;
std::vector<PDFSetWeights> pdfSetWeightIDs;
std::vector<std::string> lheReweighingIDs;
bool isFirstGroup = true;

std::regex weightgroupmg26x("<weightgroup\\s+(?:name|type)=\"(.*)\"\\s+combine=\"(.*)\"\\s*>");
std::regex weightgroup("<weightgroup\\s+combine=\"(.*)\"\\s+(?:name|type)=\"(.*)\"\\s*>");
Expand All @@ -557,6 +558,11 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::regex scalewmg26x(
"<weight\\s+(?:.*\\s+)?id=\"(\\d+)\"\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:[mM][uU][rR]|renscfact)=\"("
"\\S+)\"\\s+(?:[mM][uU][Ff]|facscfact)=\"(\\S+)\")(\\s+.*)?</weight>");
std::regex scalewmg26xNew(
"<weight\\s*((?:[mM][uU][fF]|facscfact)=\"(\\S+)\"\\s+(?:[mM][uU][Rr]|renscfact)=\"(\\S+)\").+id=\"(\\d+)\"(."
"*)?</weight>");

//<weight MUF="1.0" MUR="2.0" PDF="306000" id="1006"> MUR=2.0 </weight>
std::regex scalew(
"<weight\\s+(?:.*\\s+)?id=\"(\\d+)\">\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:mu[rR]|renscfact)=(\\S+)\\s+("
"?:mu[Ff]|facscfact)=(\\S+)(\\s+.*)?)</weight>");
Expand All @@ -567,6 +573,14 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
"<weight\\s+id=\"(\\d+)\"\\s*MUR=\"(?:\\S+)\"\\s*MUF=\"(?:\\S+)\"\\s*(?:PDF "
"set|lhapdf|PDF|pdfset)\\s*=\\s*\"(\\d+)\"\\s*>\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?</"
"weight>");
//<weightgroup combine="symmhessian+as" name="NNPDF31_nnlo_as_0118_mc_hessian_pdfas">

//<weight MUF="1.0" MUR="1.0" PDF="325300" id="1048"> PDF=325300 MemberID=0 </weight>
std::regex pdfwmg26xNew(
"<weight\\s+MUF=\"(?:\\S+)\"\\s*MUR=\"(?:\\S+)\"\\s*PDF=\"(?:\\S+)\"\\s*id=\"(\\S+)\"\\s*>"
"\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?</"
"weight>");

std::regex rwgt("<weight\\s+id=\"(.+)\">(.+)?(</weight>)?");
std::smatch groups;
for (auto iter = lheInfo->headers_begin(), end = lheInfo->headers_end(); iter != end; ++iter) {
Expand All @@ -581,12 +595,16 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
bool missed_weightgroup =
false; //Needed because in some of the samples ( produced with MG26X ) a small part of the header info is ordered incorrectly
bool ismg26x = false;
bool ismg26xNew = false;
for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines;
++iLine) { //First start looping through the lines to see which weightgroup pattern is matched
boost::replace_all(lines[iLine], "&lt;", "<");
boost::replace_all(lines[iLine], "&gt;", ">");
if (std::regex_search(lines[iLine], groups, weightgroupmg26x)) {
ismg26x = true;
} else if (std::regex_search(lines[iLine], groups, scalewmg26xNew) ||
std::regex_search(lines[iLine], groups, pdfwmg26xNew)) {
ismg26xNew = true;
}
}
for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; ++iLine) {
Expand All @@ -598,17 +616,26 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
groupname = groups.str(1);
if (lheDebug)
std::cout << ">>> Looks like the beginning of a weight group for '" << groupname << "'" << std::endl;
if (groupname.find("scale_variation") == 0 || groupname == "Central scale variation") {
if (lheDebug)
if (groupname.find("scale_variation") == 0 || groupname == "Central scale variation" || isFirstGroup) {
if (lheDebug && groupname.find("scale_variation") != 0 && groupname != "Central scale variation")
std::cout << ">>> First weight is not scale variation, but assuming is the Central Weight" << std::endl;
else if (lheDebug)
std::cout << ">>> Looks like scale variation for theory uncertainties" << std::endl;
isFirstGroup = false;
for (++iLine; iLine < nLines; ++iLine) {
if (lheDebug)
if (lheDebug) {
std::cout << " " << lines[iLine];
if (std::regex_search(lines[iLine], groups, ismg26x ? scalewmg26x : scalew)) {
}
if (std::regex_search(
lines[iLine], groups, ismg26x ? scalewmg26x : (ismg26xNew ? scalewmg26xNew : scalew))) {
if (lheDebug)
std::cout << " >>> Scale weight " << groups[1].str() << " for " << groups[3].str() << " , "
<< groups[4].str() << " , " << groups[5].str() << std::endl;
scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4));
if (ismg26xNew) {
scaleVariationIDs.emplace_back(groups.str(4), groups.str(1), groups.str(3), groups.str(2));
} else {
scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4));
}
} else if (std::regex_search(lines[iLine], endweightgroup)) {
if (lheDebug)
std::cout << ">>> Looks like the end of a weight group" << std::endl;
Expand All @@ -621,7 +648,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end "
"of the group."
<< std::endl;
if (ismg26x)
if (ismg26x || ismg26xNew)
missed_weightgroup = true;
--iLine; // rewind by one, and go back to the outer loop
break;
Expand Down Expand Up @@ -653,7 +680,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end "
"of the group."
<< std::endl;
if (ismg26x)
if (ismg26x || ismg26xNew)
missed_weightgroup = true;
--iLine; // rewind by one, and go back to the outer loop
break;
Expand Down Expand Up @@ -690,7 +717,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end "
"of the group."
<< std::endl;
if (ismg26x)
if (ismg26x || ismg26xNew)
missed_weightgroup = true;
--iLine; // rewind by one, and go back to the outer loop
break;
Expand All @@ -704,10 +731,15 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
for (++iLine; iLine < nLines; ++iLine) {
if (lheDebug)
std::cout << " " << lines[iLine];
if (std::regex_search(lines[iLine], groups, ismg26x ? pdfwmg26x : pdfwOld)) {
if (std::regex_search(
lines[iLine], groups, ismg26x ? pdfwmg26x : (ismg26xNew ? pdfwmg26xNew : pdfwOld))) {
unsigned int member = 0;
if (ismg26x == 0) {
if (!ismg26x && !ismg26xNew) {
member = std::stoi(groups.str(2));
} else if (ismg26xNew) {
if (!groups.str(3).empty()) {
member = std::stoi(groups.str(3));
}
} else {
if (!groups.str(4).empty()) {
member = std::stoi(groups.str(4));
Expand Down Expand Up @@ -736,7 +768,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end "
"of the group."
<< std::endl;
if (ismg26x)
if (ismg26x || ismg26xNew)
missed_weightgroup = true;
--iLine; // rewind by one, and go back to the outer loop
break;
Expand Down Expand Up @@ -778,7 +810,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end "
"of the group."
<< std::endl;
if (ismg26x)
if (ismg26x || ismg26xNew)
missed_weightgroup = true;
--iLine; // rewind by one, and go back to the outer loop
break;
Expand Down

0 comments on commit 5c3035d

Please sign in to comment.