From 8169bf63ef1155cfb60d435eaf46fd70e0333178 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 4 Dec 2019 21:13:52 +0100 Subject: [PATCH 1/7] Add option to enforce GUID in file name to (input) RootFile --- .../test/cmsExceptionsFatalOption_cff.py | 3 +- FWCore/Utilities/interface/EDMException.h | 2 ++ FWCore/Utilities/src/EDMException.cc | 1 + IOPool/Input/src/RootEmbeddedFileSequence.cc | 10 ++++-- IOPool/Input/src/RootEmbeddedFileSequence.h | 1 + IOPool/Input/src/RootFile.cc | 25 ++++++++++++++- IOPool/Input/src/RootFile.h | 16 +++++++--- IOPool/Input/src/RootPrimaryFileSequence.cc | 10 ++++-- IOPool/Input/src/RootPrimaryFileSequence.h | 1 + IOPool/Input/src/RootSecondaryFileSequence.cc | 9 ++++-- IOPool/Input/src/RootSecondaryFileSequence.h | 1 + IOPool/Input/test/PoolGUIDTest_cfg.py | 31 +++++++++++++++++++ IOPool/Input/test/PrePoolInputTest_cfg.py | 22 ++++++++----- IOPool/Input/test/TestPoolInput.sh | 8 ++++- 14 files changed, 119 insertions(+), 21 deletions(-) create mode 100644 IOPool/Input/test/PoolGUIDTest_cfg.py diff --git a/FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py b/FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py index c78bf7f515d12..9253a546b5fbd 100644 --- a/FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py +++ b/FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py @@ -30,5 +30,6 @@ 'ProductDoesNotSupportViews', 'ProductDoesNotSupportPtr', 'NotFound', - 'FormatIncompatibility' + 'FormatIncompatibility', + 'FileNameInconsistentWithGUID', ) diff --git a/FWCore/Utilities/interface/EDMException.h b/FWCore/Utilities/interface/EDMException.h index e092475f849b4..8f9d31c0c208d 100644 --- a/FWCore/Utilities/interface/EDMException.h +++ b/FWCore/Utilities/interface/EDMException.h @@ -66,6 +66,8 @@ namespace edm { FileWriteError = 8033, + FileNameInconsistentWithGUID = 8034, + EventGenerationFailure = 8501, CaughtSignal = 9000 diff --git a/FWCore/Utilities/src/EDMException.cc b/FWCore/Utilities/src/EDMException.cc index 0b16f8641a702..93d667507ed7d 100644 --- a/FWCore/Utilities/src/EDMException.cc +++ b/FWCore/Utilities/src/EDMException.cc @@ -42,6 +42,7 @@ namespace edm { FILLENTRY(ExceededResourceRSS), FILLENTRY(ExceededResourceTime), FILLENTRY(FileWriteError), + FILLENTRY(FileNameInconsistentWithGUID), FILLENTRY(EventGenerationFailure), FILLENTRY(CaughtSignal)}; static const std::string kUnknownCode("unknownCode"); diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.cc b/IOPool/Input/src/RootEmbeddedFileSequence.cc index 544c698059bcf..e3c9217158480 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.cc +++ b/IOPool/Input/src/RootEmbeddedFileSequence.cc @@ -42,7 +42,8 @@ namespace edm { // and should be deleted from the code. initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents", 0U)), treeCacheSize_(pset.getUntrackedParameter("cacheSize", roottree::defaultCacheSize)), - enablePrefetching_(false) { + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName", false)) { if (noFiles()) { throw Exception(errors::NoSecondaryFiles) << "RootEmbeddedFileSequence no input files specified for secondary input source.\n"; @@ -138,7 +139,8 @@ namespace edm { currentIndexIntoFile, orderedProcessHistoryIDs_, input_.bypassVersionCheck(), - enablePrefetching_); + enablePrefetching_, + enforceGUIDInFileName_); } void RootEmbeddedFileSequence::skipEntries(unsigned int offset) { @@ -336,5 +338,9 @@ namespace edm { "Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False"); desc.addUntracked("cacheSize", roottree::defaultCacheSize) ->setComment("Size of ROOT TTree prefetch cache. Affects performance."); + desc.addUntracked("enforceGUIDInFileName", false) + ->setComment( + "True: file name part is required to be equal to the GUID of the file\n" + "False: file name can be anything"); } } // namespace edm diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.h b/IOPool/Input/src/RootEmbeddedFileSequence.h index b01dc310fa567..2480ef8e19d43 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.h +++ b/IOPool/Input/src/RootEmbeddedFileSequence.h @@ -70,6 +70,7 @@ namespace edm { int initialNumberOfEventsToSkip_; unsigned int treeCacheSize_; bool enablePrefetching_; + bool enforceGUIDInFileName_; }; // class RootEmbeddedFileSequence } // namespace edm #endif diff --git a/IOPool/Input/src/RootFile.cc b/IOPool/Input/src/RootFile.cc index b6e6ed6b2212d..26edcf006927c 100644 --- a/IOPool/Input/src/RootFile.cc +++ b/IOPool/Input/src/RootFile.cc @@ -164,7 +164,8 @@ namespace edm { bool bypassVersionCheck, bool labelRawDataLikeMC, bool usingGoToEvent, - bool enablePrefetching) + bool enablePrefetching, + bool enforceGUIDInFileName) : file_(fileName), logicalFile_(logicalFileName), processConfiguration_(processConfiguration), @@ -187,6 +188,7 @@ namespace edm { savedRunAuxiliary_(), skipAnyEvents_(skipAnyEvents), noEventSort_(noEventSort), + enforceGUIDInFileName_(enforceGUIDInFileName), whyNotFastClonable_(0), hasNewlyDroppedBranch_(), branchListIndexesUnchanged_(false), @@ -1139,6 +1141,27 @@ namespace edm { throw Exception(errors::EventCorruption) << "'Events' tree is corrupted or not present\n" << "in the input file.\n"; } + if (enforceGUIDInFileName_) { + auto begin = file_.rfind("/"); + if (begin == std::string::npos) { + begin = file_.rfind(":"); + if (begin == std::string::npos) { + // shouldn't really happen? + begin = 0; + } else { + begin += 1; + } + } else { + begin += 1; + } + auto end = file_.find(".", begin); + auto guidFromName = std::string_view(file_).substr(begin, end - begin); + if (guidFromName != fid_.fid()) { + throw edm::Exception(edm::errors::FileNameInconsistentWithGUID) + << "GUID " << guidFromName << " extracted from file name " << file_ + << " is inconsistent with the GUID read from the file " << fid_.fid(); + } + } if (fileFormatVersion().hasIndexIntoFile()) { if (runTree().entries() > 0) { diff --git a/IOPool/Input/src/RootFile.h b/IOPool/Input/src/RootFile.h index 0003beb464c99..65cf33caf8ca6 100644 --- a/IOPool/Input/src/RootFile.h +++ b/IOPool/Input/src/RootFile.h @@ -91,7 +91,8 @@ namespace edm { bool bypassVersionCheck, bool labelRawDataLikeMC, bool usingGoToEvent, - bool enablePrefetching); + bool enablePrefetching, + bool enforceGUIDInFileName); RootFile(std::string const& fileName, ProcessConfiguration const& processConfiguration, @@ -113,7 +114,8 @@ namespace edm { std::vector& orderedProcessHistoryIDs, bool bypassVersionCheck, bool labelRawDataLikeMC, - bool enablePrefetching) + bool enablePrefetching, + bool enforceGUIDInFileName) : RootFile(fileName, processConfiguration, logicalFileName, @@ -142,7 +144,8 @@ namespace edm { bypassVersionCheck, labelRawDataLikeMC, false, - enablePrefetching) {} + enablePrefetching, + enforceGUIDInFileName) {} RootFile(std::string const& fileName, ProcessConfiguration const& processConfiguration, @@ -159,7 +162,8 @@ namespace edm { std::vector>::size_type currentIndexIntoFile, std::vector& orderedProcessHistoryIDs, bool bypassVersionCheck, - bool enablePrefetching) + bool enablePrefetching, + bool enforceGUIDInFileName) : RootFile(fileName, processConfiguration, logicalFileName, @@ -188,7 +192,8 @@ namespace edm { bypassVersionCheck, false, false, - enablePrefetching) {} + enablePrefetching, + enforceGUIDInFileName) {} ~RootFile(); @@ -325,6 +330,7 @@ namespace edm { edm::propagate_const> savedRunAuxiliary_; bool skipAnyEvents_; bool noEventSort_; + bool enforceGUIDInFileName_; int whyNotFastClonable_; std::array hasNewlyDroppedBranch_; bool branchListIndexesUnchanged_; diff --git a/IOPool/Input/src/RootPrimaryFileSequence.cc b/IOPool/Input/src/RootPrimaryFileSequence.cc index ff8b17bba1c16..48ecd0115df87 100644 --- a/IOPool/Input/src/RootPrimaryFileSequence.cc +++ b/IOPool/Input/src/RootPrimaryFileSequence.cc @@ -32,7 +32,8 @@ namespace edm { treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter("cacheSize") : 0U), duplicateChecker_(new DuplicateChecker(pset)), usingGoToEvent_(false), - enablePrefetching_(false) { + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { // The SiteLocalConfig controls the TTreeCache size and the prefetching settings. Service pSLC; if (pSLC.isAvailable()) { @@ -137,7 +138,8 @@ namespace edm { input_.bypassVersionCheck(), input_.labelRawDataLikeMC(), usingGoToEvent_, - enablePrefetching_); + enablePrefetching_, + enforceGUIDInFileName_); } bool RootPrimaryFileSequence::nextFile() { @@ -322,6 +324,10 @@ namespace edm { ->setComment( "'strict': Branches in each input file must match those in the first file.\n" "'permissive': Branches in each input file may be any subset of those in the first file."); + desc.addUntracked("enforceGUIDInFileName", false) + ->setComment( + "True: file name part is required to be equal to the GUID of the file\n" + "False: file name can be anything"); EventSkipperByID::fillDescription(desc); DuplicateChecker::fillDescription(desc); diff --git a/IOPool/Input/src/RootPrimaryFileSequence.h b/IOPool/Input/src/RootPrimaryFileSequence.h index e365c109125c9..903e1001c82f5 100644 --- a/IOPool/Input/src/RootPrimaryFileSequence.h +++ b/IOPool/Input/src/RootPrimaryFileSequence.h @@ -77,6 +77,7 @@ namespace edm { edm::propagate_const> duplicateChecker_; bool usingGoToEvent_; bool enablePrefetching_; + bool enforceGUIDInFileName_; }; // class RootPrimaryFileSequence } // namespace edm #endif diff --git a/IOPool/Input/src/RootSecondaryFileSequence.cc b/IOPool/Input/src/RootSecondaryFileSequence.cc index cf5cbe46a7899..e0f14147c48c0 100644 --- a/IOPool/Input/src/RootSecondaryFileSequence.cc +++ b/IOPool/Input/src/RootSecondaryFileSequence.cc @@ -21,7 +21,11 @@ namespace edm { RootSecondaryFileSequence::RootSecondaryFileSequence(ParameterSet const& pset, PoolSource& input, InputFileCatalog const& catalog) - : RootInputFileSequence(pset, catalog), input_(input), orderedProcessHistoryIDs_(), enablePrefetching_(false) { + : RootInputFileSequence(pset, catalog), + input_(input), + orderedProcessHistoryIDs_(), + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { // The SiteLocalConfig controls the TTreeCache size and the prefetching settings. Service pSLC; if (pSLC.isAvailable()) { @@ -85,7 +89,8 @@ namespace edm { orderedProcessHistoryIDs_, input_.bypassVersionCheck(), input_.labelRawDataLikeMC(), - enablePrefetching_); + enablePrefetching_, + enforceGUIDInFileName_); } void RootSecondaryFileSequence::initAssociationsFromSecondary(std::set const& associationsFromSecondary) { diff --git a/IOPool/Input/src/RootSecondaryFileSequence.h b/IOPool/Input/src/RootSecondaryFileSequence.h index 1f12aec0fdc81..6ff9be7c5f884 100644 --- a/IOPool/Input/src/RootSecondaryFileSequence.h +++ b/IOPool/Input/src/RootSecondaryFileSequence.h @@ -45,6 +45,7 @@ namespace edm { std::vector associationsFromSecondary_; std::vector orderedProcessHistoryIDs_; bool enablePrefetching_; + bool enforceGUIDInFileName_; }; // class RootSecondaryFileSequence } // namespace edm #endif diff --git a/IOPool/Input/test/PoolGUIDTest_cfg.py b/IOPool/Input/test/PoolGUIDTest_cfg.py new file mode 100644 index 0000000000000..cc0db354e3559 --- /dev/null +++ b/IOPool/Input/test/PoolGUIDTest_cfg.py @@ -0,0 +1,31 @@ +# Configuration file for PoolInputTest + +import FWCore.ParameterSet.Config as cms +import sys + +argv = [] +foundpy = False +for a in sys.argv: + if foundpy: + argv.append(a) + if ".py" in a: + foundpy = True + +process = cms.Process("TESTRECO") +process.load("FWCore.Framework.test.cmsExceptionsFatal_cff") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(-1) +) +process.OtherThing = cms.EDProducer("OtherThingProducer") +process.Analysis = cms.EDAnalyzer("OtherThingAnalyzer") + +process.source = cms.Source("PoolSource", + setRunNumber = cms.untracked.uint32(621), + fileNames = cms.untracked.vstring(argv[0]), + enforceGUIDInFileName = cms.untracked.bool(True) +) + +process.p = cms.Path(process.OtherThing*process.Analysis) + + diff --git a/IOPool/Input/test/PrePoolInputTest_cfg.py b/IOPool/Input/test/PrePoolInputTest_cfg.py index f3d0a9e308576..767a704daf0fc 100644 --- a/IOPool/Input/test/PrePoolInputTest_cfg.py +++ b/IOPool/Input/test/PrePoolInputTest_cfg.py @@ -3,26 +3,34 @@ # Configuration file for PrePoolInputTest import FWCore.ParameterSet.Config as cms -from sys import argv +import sys + +argv = [] +foundpy = False +for a in sys.argv: + if foundpy: + argv.append(a) + if ".py" in a: + foundpy = True process = cms.Process("TESTPROD") process.load("FWCore.Framework.test.cmsExceptionsFatal_cff") process.maxEvents = cms.untracked.PSet( - input = cms.untracked.int32(int(argv[3])) + input = cms.untracked.int32(int(argv[1])) ) process.Thing = cms.EDProducer("ThingProducer") process.output = cms.OutputModule("PoolOutputModule", - fileName = cms.untracked.string(argv[2]) + fileName = cms.untracked.string(argv[0]) ) process.source = cms.Source("EmptySource", - firstRun = cms.untracked.uint32(int(argv[4])), - numberEventsInRun = cms.untracked.uint32(int(argv[5])), - firstLuminosityBlock = cms.untracked.uint32(int(argv[6])), - numberEventsInLuminosityBlock = cms.untracked.uint32(int(argv[7])) + firstRun = cms.untracked.uint32(int(argv[2])), + numberEventsInRun = cms.untracked.uint32(int(argv[3])), + firstLuminosityBlock = cms.untracked.uint32(int(argv[4])), + numberEventsInLuminosityBlock = cms.untracked.uint32(int(argv[5])) ) process.p = cms.Path(process.Thing) diff --git a/IOPool/Input/test/TestPoolInput.sh b/IOPool/Input/test/TestPoolInput.sh index a6ceaf7675bf3..519470d552130 100755 --- a/IOPool/Input/test/TestPoolInput.sh +++ b/IOPool/Input/test/TestPoolInput.sh @@ -4,7 +4,13 @@ function die { echo $1: status $2 ; exit $2; } pushd ${LOCAL_TMP_DIR} -cmsRun ${LOCAL_TEST_DIR}/PrePoolInputTest_cfg.py PoolInputTest.root 11 561 7 6 3 || die 'Failure using PrePoolInputTest_cfg.py' $? +cmsRun -j PoolInputTest_jobreport.xml ${LOCAL_TEST_DIR}/PrePoolInputTest_cfg.py PoolInputTest.root 11 561 7 6 3 || die 'Failure using PrePoolInputTest_cfg.py' $? + +cmsRun ${LOCAL_TEST_DIR}/PoolGUIDTest_cfg.py file:PoolInputTest.root && die 'PoolGUIDTest_cfg.py PoolInputTest.root did not throw an exception' 1 +GUID_NAME=$(fgrep GUID PoolInputTest_jobreport.xml | sed -E 's/<.?GUID>//g').root +cp PoolInputTest.root ${GUID_NAME} +cmsRun ${LOCAL_TEST_DIR}/PoolGUIDTest_cfg.py file:${GUID_NAME} || die 'Failure using PoolGUIDTest_cfg.py ${GUID_NAME}' $? + cp PoolInputTest.root PoolInputOther.root From 81592199412a8a5291a568e0f470ce589f5410e3 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Wed, 4 Dec 2019 23:09:54 +0100 Subject: [PATCH 2/7] Update comment Actions.cc does not exist anymore --- FWCore/Utilities/interface/EDMException.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/FWCore/Utilities/interface/EDMException.h b/FWCore/Utilities/interface/EDMException.h index 8f9d31c0c208d..1b4b08e16d4c7 100644 --- a/FWCore/Utilities/interface/EDMException.h +++ b/FWCore/Utilities/interface/EDMException.h @@ -16,8 +16,7 @@ namespace edm { namespace errors { // If you add a new entry to the set of values, make sure to - // update the translation map in EDMException.cc, the actions - // table in FWCore/Framework/src/Actions.cc, and the configuration + // update the translation map in EDMException.cc, and the configuration // fragment FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py. enum ErrorCodes { From 162d51bfc0ee2af4869d90296baa1edbd372455d Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Thu, 5 Dec 2019 18:49:48 +0100 Subject: [PATCH 3/7] Simplify maxEvents in test --- IOPool/Input/test/PoolGUIDTest_cfg.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/IOPool/Input/test/PoolGUIDTest_cfg.py b/IOPool/Input/test/PoolGUIDTest_cfg.py index cc0db354e3559..e1a2b89fa7ed8 100644 --- a/IOPool/Input/test/PoolGUIDTest_cfg.py +++ b/IOPool/Input/test/PoolGUIDTest_cfg.py @@ -14,9 +14,7 @@ process = cms.Process("TESTRECO") process.load("FWCore.Framework.test.cmsExceptionsFatal_cff") -process.maxEvents = cms.untracked.PSet( - input = cms.untracked.int32(-1) -) +process.maxEvents.input = -1 process.OtherThing = cms.EDProducer("OtherThingProducer") process.Analysis = cms.EDAnalyzer("OtherThingAnalyzer") From 3534c0372497ee32cf5a1df0274220c5aa5659a7 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 6 Dec 2019 16:21:54 +0100 Subject: [PATCH 4/7] Add script to extract GUID and exit code from Framework Job Report --- FWCore/MessageLogger/scripts/edmFjrDump | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 FWCore/MessageLogger/scripts/edmFjrDump diff --git a/FWCore/MessageLogger/scripts/edmFjrDump b/FWCore/MessageLogger/scripts/edmFjrDump new file mode 100755 index 0000000000000..7ba788686f5e6 --- /dev/null +++ b/FWCore/MessageLogger/scripts/edmFjrDump @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +from __future__ import print_function +import xml.etree.ElementTree as ET +import argparse + +def printGUID(root): + for f in root.findall("File"): + print(f.find("GUID").text) + +def printExitCode(root): + error = root.find("FrameworkError") + if error is None: + print("0") + return + print(error.get("ExitStatus")) + +def main(opts): + tree = ET.parse(opts.file) + root = tree.getroot() + if opts.guid: + printGUID(root) + if opts.exitCode: + printExitCode(root) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Extract some values from the Framework Job Report") + parser.add_argument("file", type=str, + help="Framework Job Report XML file") + parser.add_argument("--guid", action="store_true", + help="GUID of the output file") + parser.add_argument("--exitCode", action="store_true", + help="Job exit code") + + opts = parser.parse_args() + main(opts) From 2745370749405d1302ce68efb60b48d29f669c1d Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 6 Dec 2019 17:25:43 +0100 Subject: [PATCH 5/7] Check the exit code in the unit test --- IOPool/Input/test/TestPoolInput.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/IOPool/Input/test/TestPoolInput.sh b/IOPool/Input/test/TestPoolInput.sh index 519470d552130..211f3f6107002 100755 --- a/IOPool/Input/test/TestPoolInput.sh +++ b/IOPool/Input/test/TestPoolInput.sh @@ -6,8 +6,13 @@ pushd ${LOCAL_TMP_DIR} cmsRun -j PoolInputTest_jobreport.xml ${LOCAL_TEST_DIR}/PrePoolInputTest_cfg.py PoolInputTest.root 11 561 7 6 3 || die 'Failure using PrePoolInputTest_cfg.py' $? -cmsRun ${LOCAL_TEST_DIR}/PoolGUIDTest_cfg.py file:PoolInputTest.root && die 'PoolGUIDTest_cfg.py PoolInputTest.root did not throw an exception' 1 -GUID_NAME=$(fgrep GUID PoolInputTest_jobreport.xml | sed -E 's/<.?GUID>//g').root +cmsRun -j PoolGuidTest_jobreport.xml ${LOCAL_TEST_DIR}/PoolGUIDTest_cfg.py file:PoolInputTest.root && die 'PoolGUIDTest_cfg.py PoolInputTest.root did not throw an exception' 1 +GUID_EXIT_CODE=$(edmFjrDump --exitCode PoolGuidTest_jobreport.xml) +if [ "x${GUID_EXIT_CODE}" != "x8034" ]; then + echo "Inconsistent GUID test reported exit code ${GUID_EXIT_CODE} which is different from the expected 8034" + exit 1 +fi +GUID_NAME=$(edmFjrDump --guid PoolInputTest_jobreport.xml).root cp PoolInputTest.root ${GUID_NAME} cmsRun ${LOCAL_TEST_DIR}/PoolGUIDTest_cfg.py file:${GUID_NAME} || die 'Failure using PoolGUIDTest_cfg.py ${GUID_NAME}' $? From 04139429ae84dcff5a87dfa37f8279db13b44bcd Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 6 Dec 2019 21:03:41 +0100 Subject: [PATCH 6/7] Move the stem extraction to its own function This way the function can be tested, and the functionality shared elsewhere. --- FWCore/Utilities/interface/stemFromPath.h | 20 +++++++++++++++++++ FWCore/Utilities/src/stemFromPath.cc | 20 +++++++++++++++++++ FWCore/Utilities/test/BuildFile.xml | 4 ++++ FWCore/Utilities/test/test_catch2_main.cc | 2 ++ .../test/test_catch2_stemFromPath.cc | 15 ++++++++++++++ IOPool/Input/src/RootFile.cc | 16 ++------------- 6 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 FWCore/Utilities/interface/stemFromPath.h create mode 100644 FWCore/Utilities/src/stemFromPath.cc create mode 100644 FWCore/Utilities/test/test_catch2_main.cc create mode 100644 FWCore/Utilities/test/test_catch2_stemFromPath.cc diff --git a/FWCore/Utilities/interface/stemFromPath.h b/FWCore/Utilities/interface/stemFromPath.h new file mode 100644 index 0000000000000..b0e8000720e61 --- /dev/null +++ b/FWCore/Utilities/interface/stemFromPath.h @@ -0,0 +1,20 @@ +#ifndef FWCore_Utilities_stemFromPath_h +#define FWCore_Utilities_stemFromPath_h + +#include +#include + +namespace edm { + // This functions extracts the stem of a file from the path (= file + // name without the extension). The returned value is a string_view + // to the input string. Caller should ensure that the input string + // object lives long enough. + // + // The reason to have our own function instead of + // std/boost::filesystem is that tehcnically these paths are not + // filesystem paths, but paths in CMS LFN/PFN space that (may) have + // different rules. + std::string_view stemFromPath(const std::string& path); +} // namespace edm + +#endif diff --git a/FWCore/Utilities/src/stemFromPath.cc b/FWCore/Utilities/src/stemFromPath.cc new file mode 100644 index 0000000000000..c97f6e2a83571 --- /dev/null +++ b/FWCore/Utilities/src/stemFromPath.cc @@ -0,0 +1,20 @@ +#include "FWCore/Utilities/interface/stemFromPath.h" + +namespace edm { + std::string_view stemFromPath(const std::string& path) { + auto begin = path.rfind("/"); + if (begin == std::string::npos) { + begin = path.rfind(":"); + if (begin == std::string::npos) { + // shouldn't really happen? + begin = 0; + } else { + begin += 1; + } + } else { + begin += 1; + } + auto end = path.find(".", begin); + return std::string_view(path).substr(begin, end - begin); + } +} // namespace edm diff --git a/FWCore/Utilities/test/BuildFile.xml b/FWCore/Utilities/test/BuildFile.xml index ff886e58929bc..be3cefb0f8e3c 100644 --- a/FWCore/Utilities/test/BuildFile.xml +++ b/FWCore/Utilities/test/BuildFile.xml @@ -34,6 +34,10 @@ + + + + diff --git a/FWCore/Utilities/test/test_catch2_main.cc b/FWCore/Utilities/test/test_catch2_main.cc new file mode 100644 index 0000000000000..0c7c351f437f5 --- /dev/null +++ b/FWCore/Utilities/test/test_catch2_main.cc @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hpp" diff --git a/FWCore/Utilities/test/test_catch2_stemFromPath.cc b/FWCore/Utilities/test/test_catch2_stemFromPath.cc new file mode 100644 index 0000000000000..f815b7ba0515f --- /dev/null +++ b/FWCore/Utilities/test/test_catch2_stemFromPath.cc @@ -0,0 +1,15 @@ +#include "FWCore/Utilities/interface/stemFromPath.h" + +#include "catch.hpp" + +TEST_CASE("Test stemFromPath", "[sources]") { + CHECK(edm::stemFromPath("foo.root") == "foo"); + CHECK(edm::stemFromPath("/foo.root") == "foo"); + CHECK(edm::stemFromPath("/bar/foo.root") == "foo"); + CHECK(edm::stemFromPath("/bar///....//...///foo.root") == "foo"); + CHECK(edm::stemFromPath("/bar/foo.xyzzy") == "foo"); + CHECK(edm::stemFromPath("/bar/xyzzy.foo.root") == "xyzzy"); + CHECK(edm::stemFromPath("file:foo.root") == "foo"); + CHECK(edm::stemFromPath("file:/path/to/bar.txt") == "bar"); + CHECK(edm::stemFromPath("root://server.somewhere:port/whatever?param=path/to/bar.txt") == "bar"); +} diff --git a/IOPool/Input/src/RootFile.cc b/IOPool/Input/src/RootFile.cc index 26edcf006927c..fb32b64d33e01 100644 --- a/IOPool/Input/src/RootFile.cc +++ b/IOPool/Input/src/RootFile.cc @@ -43,6 +43,7 @@ #include "FWCore/Utilities/interface/FriendlyName.h" #include "FWCore/Utilities/interface/GlobalIdentifier.h" #include "FWCore/Utilities/interface/ReleaseVersion.h" +#include "FWCore/Utilities/interface/stemFromPath.h" #include "FWCore/Version/interface/GetReleaseVersion.h" #include "IOPool/Common/interface/getWrapperBasePtr.h" @@ -1142,20 +1143,7 @@ namespace edm { << "in the input file.\n"; } if (enforceGUIDInFileName_) { - auto begin = file_.rfind("/"); - if (begin == std::string::npos) { - begin = file_.rfind(":"); - if (begin == std::string::npos) { - // shouldn't really happen? - begin = 0; - } else { - begin += 1; - } - } else { - begin += 1; - } - auto end = file_.find(".", begin); - auto guidFromName = std::string_view(file_).substr(begin, end - begin); + auto guidFromName = stemFromPath(file_); if (guidFromName != fid_.fid()) { throw edm::Exception(edm::errors::FileNameInconsistentWithGUID) << "GUID " << guidFromName << " extracted from file name " << file_ From d7ff8529f8fb19937ae26e0deeb953dd6d6e3b0b Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Sat, 7 Dec 2019 03:14:47 +0100 Subject: [PATCH 7/7] Take parameter via string_view as well To avoid creating temporary strings from const char* arguments. --- FWCore/Utilities/interface/stemFromPath.h | 3 +-- FWCore/Utilities/src/stemFromPath.cc | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/FWCore/Utilities/interface/stemFromPath.h b/FWCore/Utilities/interface/stemFromPath.h index b0e8000720e61..6453e71d770aa 100644 --- a/FWCore/Utilities/interface/stemFromPath.h +++ b/FWCore/Utilities/interface/stemFromPath.h @@ -1,7 +1,6 @@ #ifndef FWCore_Utilities_stemFromPath_h #define FWCore_Utilities_stemFromPath_h -#include #include namespace edm { @@ -14,7 +13,7 @@ namespace edm { // std/boost::filesystem is that tehcnically these paths are not // filesystem paths, but paths in CMS LFN/PFN space that (may) have // different rules. - std::string_view stemFromPath(const std::string& path); + std::string_view stemFromPath(std::string_view path); } // namespace edm #endif diff --git a/FWCore/Utilities/src/stemFromPath.cc b/FWCore/Utilities/src/stemFromPath.cc index c97f6e2a83571..b5a2c4f0e15ca 100644 --- a/FWCore/Utilities/src/stemFromPath.cc +++ b/FWCore/Utilities/src/stemFromPath.cc @@ -1,11 +1,11 @@ #include "FWCore/Utilities/interface/stemFromPath.h" namespace edm { - std::string_view stemFromPath(const std::string& path) { + std::string_view stemFromPath(std::string_view path) { auto begin = path.rfind("/"); - if (begin == std::string::npos) { + if (begin == std::string_view::npos) { begin = path.rfind(":"); - if (begin == std::string::npos) { + if (begin == std::string_view::npos) { // shouldn't really happen? begin = 0; } else { @@ -15,6 +15,6 @@ namespace edm { begin += 1; } auto end = path.find(".", begin); - return std::string_view(path).substr(begin, end - begin); + return path.substr(begin, end - begin); } } // namespace edm