-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NanoAOD prototype [RFC] #20626
NanoAOD prototype [RFC] #20626
Changes from 18 commits
1d61603
edae6c2
06da61b
29879a4
010e333
372bb17
1f8f91d
f27f976
89d6897
c09afb6
d7dc162
22ee0e2
19d62c4
6faa025
1eabc06
4c8d6a7
c27a59b
fae71a4
70e3238
860a578
dcd0f32
ceacf76
097fd26
58986b1
3f52220
ea650eb
7d6154b
fae6ee6
80d7284
6d36855
6fc5d71
b6ecbc3
c43e136
a36123f
711bfda
a8aba11
53674c0
54ccc00
794b16f
ef23232
35ca40d
a23e554
3e9aab9
5647425
4d5e095
144f8b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<use name="FWCore/Utilities"/> | ||
<use name="FWCore/Common"/> | ||
<use name="DataFormats/Common"/> | ||
<use name="DataFormats/StdDictionaries"/> | ||
<use name="boost"/> | ||
<flags LCG_DICT_HEADER="classes.h"/> | ||
<flags LCG_DICT_XML="classes_def.xml"/> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these LCG_DICT are apparently not needed (not present in other DF) |
||
<export> | ||
<lib name="1"/> | ||
</export> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
#ifndef DataFormats_NanoAOD_FlatTable_h | ||
#define DataFormats_NanoAOD_FlatTable_h | ||
|
||
#include <cstdint> | ||
#include <vector> | ||
#include <string> | ||
#include <boost/range/sub_range.hpp> | ||
#include <FWCore/Utilities/interface/Exception.h> | ||
#include <DataFormats/PatCandidates/interface/libminifloat.h> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please use double-quotes |
||
|
||
namespace flatTableHelper { | ||
template<typename T> struct MaybeMantissaReduce { | ||
MaybeMantissaReduce(int mantissaBits) {} | ||
inline T one(const T &val) const { return val; } | ||
inline void bulk(boost::sub_range<std::vector<T>> data) const { } | ||
}; | ||
template<> struct MaybeMantissaReduce<float> { | ||
int bits_; | ||
MaybeMantissaReduce(int mantissaBits) : bits_(mantissaBits) {} | ||
inline float one(const float &val) const { return (bits_ > 0 ? MiniFloatConverter::reduceMantissaToNbitsRounding(val, bits_) : val); } | ||
inline void bulk(boost::sub_range<std::vector<float>> data) const { if (bits_ > 0) MiniFloatConverter::reduceMantissaToNbitsRounding(bits_, data.begin(), data.end(), data.begin()); } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pass by const ref? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sub_range is an iterator pair, it's to be passed by value |
||
}; | ||
} | ||
class FlatTable { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this and others be in a namespace? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will put them in a namespace |
||
public: | ||
enum ColumnType { FloatColumn, IntColumn, UInt8Column, BoolColumn }; // We could have other Float types with reduced mantissa, and similar | ||
|
||
FlatTable() : size_(0) {} | ||
FlatTable(unsigned int size, const std::string & name, bool singleton, bool extension=false) : size_(size), name_(name), singleton_(singleton), extension_(extension) {} | ||
~FlatTable() {} | ||
|
||
unsigned int nColumns() const { return columns_.size(); }; | ||
unsigned int nRows() const { return size_; }; | ||
unsigned int size() const { return size_; } | ||
bool singleton() const { return singleton_; } | ||
bool extension() const { return extension_; } | ||
const std::string & name() const { return name_; } | ||
|
||
const std::string & columnName(unsigned int col) const { return columns_[col].name; } | ||
int columnIndex(const std::string & name) const ; | ||
|
||
ColumnType columnType(unsigned int col) const { return columns_[col].type; } | ||
|
||
void setDoc(const std::string & doc) { doc_ = doc; } | ||
const std::string & doc() const { return doc_; } | ||
const std::string & columnDoc(unsigned int col) const { return columns_[col].doc; } | ||
|
||
/// get a column by index (const) | ||
template<typename T> | ||
boost::sub_range<const std::vector<T>> columnData(unsigned int column) const { | ||
auto begin = beginData<T>(column); | ||
return boost::sub_range<const std::vector<T>>(begin, begin+size_); | ||
} | ||
|
||
/// get a column by index (non-const) | ||
template<typename T> | ||
boost::sub_range<std::vector<T>> columnData(unsigned int column) { | ||
auto begin = beginData<T>(column); | ||
return boost::sub_range<std::vector<T>>(begin, begin+size_); | ||
} | ||
|
||
/// get a column value for singleton (const) | ||
template<typename T> | ||
const T & columValue(unsigned int column) const { | ||
if (!singleton()) throw cms::Exception("LogicError", "columnValue works only for singleton tables"); | ||
return * beginData<T>(column); | ||
} | ||
|
||
template<typename T, typename C = std::vector<T>> | ||
void addColumn(const std::string & name, const C & values, const std::string & docString, ColumnType type = defaultColumnType<T>(),int mantissaBits=-1) { | ||
if (columnIndex(name) != -1) throw cms::Exception("LogicError", "Duplicated column: "+name); | ||
if (values.size() != size()) throw cms::Exception("LogicError", "Mismatched size for "+name); | ||
check_type<T>(type); // throws if type is wrong | ||
auto & vec = bigVector<T>(); | ||
columns_.emplace_back(name,docString,type,vec.size()); | ||
vec.insert(vec.end(), values.begin(), values.end()); | ||
if (type == FloatColumn) { | ||
flatTableHelper::MaybeMantissaReduce<T>(mantissaBits).bulk(columnData<T>(columns_.size()-1)); | ||
} | ||
} | ||
template<typename T, typename C> | ||
void addColumnValue(const std::string & name, const C & value, const std::string & docString, ColumnType type = defaultColumnType<T>(),int mantissaBits=-1) { | ||
if (!singleton()) throw cms::Exception("LogicError", "addColumnValue works only for singleton tables"); | ||
if (columnIndex(name) != -1) throw cms::Exception("LogicError", "Duplicated column: "+name); | ||
check_type<T>(type); // throws if type is wrong | ||
auto & vec = bigVector<T>(); | ||
columns_.emplace_back(name,docString,type,vec.size()); | ||
if (type == FloatColumn) { | ||
vec.push_back( flatTableHelper::MaybeMantissaReduce<T>(mantissaBits).one(value) ); | ||
} else { | ||
vec.push_back( value ); | ||
} | ||
} | ||
|
||
template<typename T> static ColumnType defaultColumnType() { throw cms::Exception("unsupported type"); } | ||
|
||
// this below needs to be public for ROOT, but it is to be considered private otherwise | ||
struct Column { | ||
std::string name, doc; | ||
ColumnType type; | ||
unsigned int firstIndex; | ||
Column() {} // for ROOT | ||
Column(const std::string & aname, const std::string & docString, ColumnType atype, unsigned int anIndex) : name(aname), doc(docString), type(atype), firstIndex(anIndex) {} | ||
}; | ||
|
||
private: | ||
|
||
template<typename T> | ||
typename std::vector<T>::const_iterator beginData(unsigned int column) const { | ||
const Column & col = columns_[column]; | ||
check_type<T>(col.type); // throws if type is wrong | ||
return bigVector<T>().begin() + col.firstIndex; | ||
} | ||
template<typename T> | ||
typename std::vector<T>::iterator beginData(unsigned int column) { | ||
const Column & col = columns_[column]; | ||
check_type<T>(col.type); // throws if type is wrong | ||
return bigVector<T>().begin() + col.firstIndex; | ||
} | ||
|
||
template<typename T> | ||
const std::vector<T> & bigVector() const { throw cms::Exception("unsupported type"); } | ||
template<typename T> | ||
std::vector<T> & bigVector() { throw cms::Exception("unsupported type"); } | ||
|
||
|
||
unsigned int size_; | ||
std::string name_, doc_; | ||
bool singleton_, extension_; | ||
std::vector<Column> columns_; | ||
std::vector<float> floats_; | ||
std::vector<int> ints_; | ||
std::vector<uint8_t> uint8s_; | ||
|
||
template<typename T> | ||
static void check_type(FlatTable::ColumnType type) { throw cms::Exception("unsupported type"); } | ||
}; | ||
|
||
template<> inline void FlatTable::check_type<float>(FlatTable::ColumnType type) { | ||
if (type != FlatTable::FloatColumn) throw cms::Exception("mismatched type"); | ||
} | ||
template<> inline void FlatTable::check_type<int>(FlatTable::ColumnType type) { | ||
if (type != FlatTable::IntColumn) throw cms::Exception("mismatched type"); | ||
} | ||
template<> inline void FlatTable::check_type<uint8_t>(FlatTable::ColumnType type) { | ||
if (type != FlatTable::UInt8Column && type != FlatTable::BoolColumn) throw cms::Exception("mismatched type"); | ||
} | ||
|
||
|
||
|
||
template<> inline const std::vector<float> & FlatTable::bigVector<float>() const { return floats_; } | ||
template<> inline const std::vector<int> & FlatTable::bigVector<int>() const { return ints_; } | ||
template<> inline const std::vector<uint8_t> & FlatTable::bigVector<uint8_t>() const { return uint8s_; } | ||
template<> inline std::vector<float> & FlatTable::bigVector<float>() { return floats_; } | ||
template<> inline std::vector<int> & FlatTable::bigVector<int>() { return ints_; } | ||
template<> inline std::vector<uint8_t> & FlatTable::bigVector<uint8_t>() { return uint8s_; } | ||
|
||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#ifndef DataFormats_NanoAOD_MergeableCounterTable_h | ||
#define DataFormats_NanoAOD_MergeableCounterTable_h | ||
|
||
#include <FWCore/Utilities/interface/Exception.h> | ||
#include <vector> | ||
#include <string> | ||
|
||
class MergeableCounterTable { | ||
public: | ||
MergeableCounterTable() {} | ||
typedef long long int_accumulator; // we accumulate in long long int, to avoid overflow | ||
typedef double float_accumulator; // we accumulate in double, to preserve precision | ||
|
||
template<typename T> | ||
struct SingleColumn { | ||
typedef T value_type; | ||
SingleColumn() {} | ||
SingleColumn(const std::string & aname, const std::string & adoc, T avalue = T()) : name(aname), doc(adoc), value(avalue) {} | ||
std::string name, doc; | ||
T value; | ||
void operator+=(const SingleColumn<T> & other) { | ||
//if (name != other.name) throw cms::Exception("LogicError", "Trying to merge "+name+" with "+other.name+"\n"); | ||
value += other.value; | ||
} | ||
bool compatible(const SingleColumn<T> & other) { | ||
return name == other.name; // we don't check the doc, not needed | ||
} | ||
}; | ||
typedef SingleColumn<float_accumulator> FloatColumn; | ||
typedef SingleColumn<int_accumulator> IntColumn; | ||
|
||
template<typename T> | ||
struct VectorColumn { | ||
typedef T element_type; | ||
VectorColumn() {} | ||
VectorColumn(const std::string & aname, const std::string & adoc, unsigned int size) : name(aname), doc(adoc), values(size, T()) {} | ||
VectorColumn(const std::string & aname, const std::string & adoc, const std::vector<T> & somevalues) : name(aname), doc(adoc), values(somevalues) {} | ||
std::string name, doc; | ||
std::vector<T> values; | ||
void operator+=(const VectorColumn<T> & other) { | ||
//if (name != other.name) throw cms::Exception("LogicError", "Trying to merge "+name+" with "+other.name+"\n"); | ||
//if (values.size() != other.values.size()) throw cms::Exception("LogicError", "Trying to merge "+name+" with different number of values!\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this commented out code needed? |
||
for (unsigned int i = 0, n = values.size(); i < n; ++i) { | ||
values[i] += other.values[i]; | ||
} | ||
} | ||
bool compatible(const VectorColumn<T> & other) { | ||
return name == other.name && values.size() == other.values.size(); // we don't check the doc, not needed | ||
} | ||
}; | ||
typedef VectorColumn<float_accumulator> VFloatColumn; | ||
typedef VectorColumn<int_accumulator> VIntColumn; | ||
|
||
const std::vector<FloatColumn> & floatCols() const { return floatCols_; } | ||
const std::vector<VFloatColumn> & vfloatCols() const { return vfloatCols_; } | ||
const std::vector<IntColumn> & intCols() const { return intCols_; } | ||
const std::vector<VIntColumn> & vintCols() const { return vintCols_; } | ||
|
||
template<typename F> | ||
void addFloat(const std::string & name, const std::string & doc, F value) { floatCols_.push_back(FloatColumn(name, doc, value)); } | ||
|
||
template<typename I> | ||
void addInt(const std::string & name, const std::string & doc, I value) { intCols_.push_back(IntColumn(name, doc, value)); } | ||
|
||
template<typename F> | ||
void addVFloat(const std::string & name, const std::string & doc, const std::vector<F> values) { | ||
vfloatCols_.push_back(VFloatColumn(name, doc, values.size())); | ||
std::copy(values.begin(), values.end(), vfloatCols_.back().values.begin()); | ||
} | ||
|
||
template<typename I> | ||
void addVInt(const std::string & name, const std::string & doc, const std::vector<I> values) { | ||
vintCols_.push_back(VIntColumn(name, doc, values.size())); | ||
std::copy(values.begin(), values.end(), vintCols_.back().values.begin()); | ||
} | ||
|
||
|
||
bool mergeProduct(const MergeableCounterTable & other) { | ||
if (!tryMerge(intCols_, other.intCols_)) return false; | ||
if (!tryMerge(vintCols_, other.vintCols_)) return false; | ||
if (!tryMerge(floatCols_, other.floatCols_)) return false; | ||
if (!tryMerge(vfloatCols_, other.vfloatCols_)) return false; | ||
return true; | ||
} | ||
|
||
private: | ||
std::vector<FloatColumn> floatCols_; | ||
std::vector<VFloatColumn> vfloatCols_; | ||
std::vector<IntColumn> intCols_; | ||
std::vector<VIntColumn> vintCols_; | ||
|
||
template<typename T> | ||
bool tryMerge(std::vector<T> & one, const std::vector<T> & two) { | ||
if (one.size() != two.size()) return false; | ||
for (unsigned int i = 0, n = one.size(); i < n; ++i) { | ||
if (!one[i].compatible(two[i])) return false; | ||
one[i] += two[i]; | ||
} | ||
return true; | ||
} | ||
}; | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#ifndef PhysicsTools_NanoAOD_UniqueString_h | ||
#define PhysicsTools_NanoAOD_UniqueString_h | ||
|
||
#include <string> | ||
|
||
class UniqueString { | ||
public: | ||
UniqueString() {} | ||
UniqueString(const std::string & str) : str_(str) {} | ||
const std::string & str() const { return str_; } | ||
bool operator==(const std::string & other) const { return str_ == other; } | ||
bool operator==(const UniqueString & other) const { return str_ == other.str_; } | ||
bool isProductEqual(const UniqueString & other) const { return (*this) == other; } | ||
private: | ||
std::string str_; | ||
}; | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#include <DataFormats/NanoAOD/interface/FlatTable.h> | ||
|
||
int FlatTable::columnIndex(const std::string & name) const { | ||
for (unsigned int i = 0, n = columns_.size(); i < n; ++i) { | ||
if (columns_[i].name == name) return i; | ||
} | ||
return -1; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#include "DataFormats/NanoAOD/interface/MergeableCounterTable.h" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#include "Rtypes.h" | ||
|
||
#include <DataFormats/NanoAOD/interface/FlatTable.h> | ||
#include <DataFormats/NanoAOD/interface/MergeableCounterTable.h> | ||
#include <DataFormats/NanoAOD/interface/UniqueString.h> | ||
#include "DataFormats/Common/interface/Wrapper.h" | ||
|
||
namespace DataFormats_NanoAOD { | ||
struct dictionary { | ||
edm::Wrapper<FlatTable> w_table; | ||
edm::Wrapper<MergeableCounterTable> w_mtable; | ||
edm::Wrapper<UniqueString> w_ustr; | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if this runs already, perhaps a step can be added to a standard short matrix workflow
so that the code is tested regularly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are the externals already in the IB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the cmsDriver integration works already in this PR, but I'm a bit lost at where and how to insert it into PyReleaseValidation for the matrix (for the standard configuration, we probably want to run it on top of the miniAOD produced by the same 94X release)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense to do that yes - this should just be a new step added to ~all run2 workflows that already produce miniAOD output. (relval_steps.py is the fun place to start)