Skip to content

Commit

Permalink
Merge pull request #41309 from nhduongvn/new-data-access_125X
Browse files Browse the repository at this point in the history
Backport codes for adding new data catalogs from Rucio storage description (RucioCatalog) and use it by default instead of trivial data catalogs (TrivialCatalog) #37278, and related bug fixes
  • Loading branch information
cmsbuild authored Apr 19, 2023
2 parents 10fa4ed + 63ca0fd commit 7302051
Show file tree
Hide file tree
Showing 15 changed files with 687 additions and 197 deletions.
1 change: 1 addition & 0 deletions Configuration/PyReleaseValidation/scripts/runTheMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ def runSelected(opt):
if os.path.exists(cmssw_base):
os.environ["PATH"]=cmssw_base+":"+os.getenv("PATH")
os.environ["CMS_PATH"]="/cvmfs/cms-ib.cern.ch"
os.environ["SITECONFIG_PATH"]="/cvmfs/cms-ib.cern.ch/SITECONF/local"
os.environ["CMSSW_USE_IBEOS"]="true"
print(">> WARNING: You are using SITECONF from /cvmfs/cms-ib.cern.ch")
break
Expand Down
28 changes: 22 additions & 6 deletions FWCore/Catalog/interface/FileLocator.h
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
#ifndef FWCore_Catalog_FileLocator_h
#define FWCore_Catalog_FileLocator_h

#include "FWCore/Catalog/interface/SiteLocalConfig.h"
#include <string>
#include <list>
#include <map>
#include <utility>
#include <regex>
#include "tinyxml2.h"
#include <boost/property_tree/ptree.hpp>

namespace edm {

class FileLocator {
public:
explicit FileLocator(
edm::CatalogAttributes const& catAttr,
unsigned iCatalog = 0,
//storageDescriptionPath is used to override path provided by SiteLocalConfig. This is used in FileLocator_t.cpp tests
std::string const& storageDescriptionPath = std::string());
explicit FileLocator(std::string const& catUrl, unsigned iCatalog = 0);

~FileLocator();

std::string pfn(std::string const& ilfn) const;
std::string lfn(std::string const& ipfn) const;
std::string pfn(std::string const& ilfn, edm::CatalogType catType) const;

private:
/** For the time being the only allowed configuration item is a
Expand All @@ -34,9 +41,15 @@ namespace edm {
typedef std::vector<Rule> Rules;
typedef std::map<std::string, Rules> ProtocolRules;

void init(std::string const& catUrl, unsigned iCatalog);

void parseRule(tinyxml2::XMLElement* ruleNode, ProtocolRules& rules);
void init_trivialCatalog(std::string const& catUrl, unsigned iCatalog);
void parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleNode, ProtocolRules& rules);
//using data-access
void init(edm::CatalogAttributes const& input_dataCatalog,
unsigned iCatalog,
std::string const& storageDescriptionPath);
void parseRule(boost::property_tree::ptree::value_type const& storageRule,
std::string const& protocol,
ProtocolRules& rules);

std::string applyRules(ProtocolRules const& protocolRules,
std::string const& protocol,
Expand All @@ -47,14 +60,17 @@ namespace edm {
std::string convert(std::string const& input, ProtocolRules const& rules, bool direct) const;

/** Direct rules are used to do the mapping from LFN to PFN.*/
ProtocolRules m_directRules;
ProtocolRules m_directRules_trivialCatalog;
/** Inverse rules are used to do the mapping from PFN to LFN*/
ProtocolRules m_inverseRules;
/** Direct rules are used to do the mapping from LFN to PFN taken from storage.json*/
ProtocolRules m_directRules;

std::string m_fileType;
std::string m_filename;
std::vector<std::string> m_protocols;
std::string m_destination;
std::string m_prefix;
};
} // namespace edm

Expand Down
15 changes: 12 additions & 3 deletions FWCore/Catalog/interface/InputFileCatalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
// Class InputFileCatalog. Services to manage InputFile catalog.
// Physical file names, pfns_ of FileCatalogItem, are constructed from multiple data catalogs in site-local-config.xml. Each member of pfns_ corresponds to a data catalog.
// Note that fileNames(unsigned iCatalog) of InputFileCatalog return physical file names of all input files corresponding to a data catalog (for example, a job has 10 input files provided as a PoolSource, the fileNames(unsigned iCatalog) will return PFNs of these 10 files constructed from a data catalog)
// Set catType=TrivialCatalog: use trivial data catalogs from <event-data>
// Set catType=RucioCatalog: use data catalogs from <data-access> and storage.json
//
//////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -33,7 +35,10 @@ namespace edm {
public:
InputFileCatalog(std::vector<std::string> const& fileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound = false);
bool useLFNasPFNifLFNnotFound = false,
//switching between two catalog types
//edm::CatalogType catType = edm::CatalogType::TrivialCatalog);
edm::CatalogType catType = edm::CatalogType::RucioCatalog);

~InputFileCatalog();
std::vector<FileCatalogItem> const& fileCatalogItems() const { return fileCatalogItems_; }
Expand All @@ -43,13 +48,17 @@ namespace edm {
static bool isPhysical(std::string const& name) { return (name.empty() || name.find(':') != std::string::npos); }

private:
void init(std::string const& override, bool useLFNasPFNifLFNnotFound);
void findFile(std::string const& lfn, std::vector<std::string>& pfns, bool useLFNasPFNifLFNnotFound);
void init(std::string const& override, bool useLFNasPFNifLFNnotFound, edm::CatalogType catType);
void findFile(std::string const& lfn,
std::vector<std::string>& pfns,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType);
std::vector<std::string> logicalFileNames_;
std::vector<std::string> fileNames_;
std::vector<FileCatalogItem> fileCatalogItems_;
edm::propagate_const<std::unique_ptr<FileLocator>> overrideFileLocator_;

std::vector<edm::propagate_const<std::unique_ptr<FileLocator>>> fileLocators_trivalCatalog_;
std::vector<edm::propagate_const<std::unique_ptr<FileLocator>>> fileLocators_;
};
} // namespace edm
Expand Down
34 changes: 32 additions & 2 deletions FWCore/Catalog/interface/SiteLocalConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,40 @@
#include <string>
#include <vector>
#include <netdb.h>

#include <filesystem>
// PUBLIC DEFINES
// PUBLIC CONSTANTS
// PUBLIC TYPES
namespace edm {
class ParameterSet;
class ActivityRegistry;

//attributes of a data catalog (Rucio format) defined in <data-access> block of site-local-config.xml. See further description in SiteLocalConfigService.cc
struct CatalogAttributes {
CatalogAttributes() = default;
CatalogAttributes(std::string input_site,
std::string input_subSite,
std::string input_storageSite,
std::string input_volume,
std::string input_protocol)
: site(std::move(input_site)),
subSite(std::move(input_subSite)),
storageSite(std::move(input_storageSite)),
volume(std::move(input_volume)),
protocol(std::move(input_protocol)) {}
bool operator==(const CatalogAttributes& aCatalog) const {
return site == aCatalog.site && subSite == aCatalog.subSite && storageSite == aCatalog.storageSite &&
volume == aCatalog.volume && protocol == aCatalog.protocol;
}
bool empty() const { return site.empty() && storageSite.empty() && volume.empty() && protocol.empty(); }
std::string site;
std::string subSite;
std::string storageSite; //site where storage description is used
std::string volume;
std::string protocol;
};

enum class CatalogType { TrivialCatalog, RucioCatalog };
} // namespace edm

// PUBLIC VARIABLES
Expand All @@ -30,7 +57,9 @@ namespace edm {
SiteLocalConfig() {}
virtual ~SiteLocalConfig() {}

virtual std::vector<std::string> const& dataCatalogs(void) const = 0;
virtual std::vector<std::string> const& trivialDataCatalogs() const = 0;
virtual std::vector<edm::CatalogAttributes> const& dataCatalogs() const = 0;
virtual std::filesystem::path const storageDescriptionPath(const edm::CatalogAttributes& aDataCatalog) const = 0;
virtual std::string const lookupCalibConnect(std::string const& input) const = 0;
virtual std::string const rfioType(void) const = 0;

Expand All @@ -47,6 +76,7 @@ namespace edm {
virtual struct addrinfo const* statisticsDestination() const = 0;
virtual std::set<std::string> const* statisticsInfo() const = 0;
virtual std::string const& siteName(void) const = 0;
virtual std::string const& subSiteName(void) const = 0;
virtual bool useLocalConnectString() const = 0;
virtual std::string const& localConnectPrefix() const = 0;
virtual std::string const& localConnectSuffix() const = 0;
Expand Down
Loading

0 comments on commit 7302051

Please sign in to comment.