From 159a737baffe3128200aee44fc6ff61ba1f7cba2 Mon Sep 17 00:00:00 2001 From: Chris Jones Date: Mon, 18 Nov 2024 15:30:57 -0600 Subject: [PATCH] Avoid allocations in ParentageID We were cycling memory each time a module put a data product into the event. This now has the ParentageID hold the 16 bytes directly instead of indirectly via a std::string. --- .../Provenance/interface/CompactHash.h | 163 ++++++++++++++++++ .../Provenance/interface/ParentageID.h | 7 +- DataFormats/Provenance/src/CompactHash.cc | 71 ++++++++ DataFormats/Provenance/src/classes_def.xml | 9 +- DataFormats/Provenance/test/BuildFile.xml | 7 +- DataFormats/Provenance/test/CompactHash_t.cpp | 35 ++++ 6 files changed, 287 insertions(+), 5 deletions(-) create mode 100644 DataFormats/Provenance/interface/CompactHash.h create mode 100644 DataFormats/Provenance/src/CompactHash.cc create mode 100644 DataFormats/Provenance/test/CompactHash_t.cpp diff --git a/DataFormats/Provenance/interface/CompactHash.h b/DataFormats/Provenance/interface/CompactHash.h new file mode 100644 index 0000000000000..7f9517d60a5b4 --- /dev/null +++ b/DataFormats/Provenance/interface/CompactHash.h @@ -0,0 +1,163 @@ +#ifndef DataFormats_Provenance_CompactHash_h +#define DataFormats_Provenance_CompactHash_h + +#include +#include +#include + +namespace cms { + class Digest; +} + +namespace edm { + + namespace detail { + // This string is the 16-byte, non-printable version. + std::array const& InvalidCompactHash(); + } // namespace detail + + namespace compact_hash_detail { + using value_type = std::array; + void toString_(std::string& result, value_type const& hash); + void toDigest_(cms::Digest& digest, value_type const& hash); + std::ostream& print_(std::ostream& os, value_type const& hash); + bool isValid_(value_type const& hash); + size_t smallHash_(value_type const& hash); + value_type fromHex_(std::string_view); + void throwIfIllFormed(std::string_view v); + } // namespace compact_hash_detail + + template + class CompactHash { + public: + typedef compact_hash_detail::value_type value_type; + + CompactHash(); + explicit CompactHash(value_type const& v); + explicit CompactHash(std::string_view v); + + CompactHash(CompactHash const&) = default; + CompactHash& operator=(CompactHash const& iRHS) = default; + + CompactHash(CompactHash&&) = default; + CompactHash& operator=(CompactHash&&) = default; + + void reset(); + + // For now, just check the most basic: a default constructed + // ParameterSetID is not valid. This is very crude: we are + // assuming that nobody created a ParameterSetID from an empty + // string, nor from any string that is not a valid string + // representation of an MD5 checksum. + bool isValid() const; + + bool operator<(CompactHash const& other) const; + bool operator>(CompactHash const& other) const; + bool operator==(CompactHash const& other) const; + bool operator!=(CompactHash const& other) const; + std::ostream& print(std::ostream& os) const; + void toString(std::string& result) const; + void toDigest(cms::Digest& digest) const; + + // Return the 16-byte (non-printable) string form. + value_type const& compactForm() const; + + ///returns a short hash which can be used with hashing containers + size_t smallHash() const; + + //Used by ROOT storage + // CMS_CLASS_VERSION(3) // This macro is not defined here, so expand it. + static short Class_Version() { return 3; } + + private: + template + bool compareUsing(CompactHash const& iOther, Op op) const { + return op(this->hash_, iOther.hash_); + } + + value_type hash_; + }; + + //-------------------------------------------------------------------- + // + // Implementation details follow... + //-------------------------------------------------------------------- + + template + inline CompactHash::CompactHash() : hash_(detail::InvalidCompactHash()) {} + + template + inline CompactHash::CompactHash(value_type const& v) : hash_(v) {} + + template + inline CompactHash::CompactHash(std::string_view v) { + if (v.size() == 32) { + hash_ = compact_hash_detail::fromHex_(v); + } else { + compact_hash_detail::throwIfIllFormed(v); + std::copy(v.begin(), v.end(), hash_.begin()); + } + } + + template + inline void CompactHash::reset() { + hash_ = detail::InvalidCompactHash(); + } + + template + inline bool CompactHash::isValid() const { + return compact_hash_detail::isValid_(hash_); + } + + template + inline bool CompactHash::operator<(CompactHash const& other) const { + return this->compareUsing(other, std::less()); + } + + template + inline bool CompactHash::operator>(CompactHash const& other) const { + return this->compareUsing(other, std::greater()); + } + + template + inline bool CompactHash::operator==(CompactHash const& other) const { + return this->compareUsing(other, std::equal_to()); + } + + template + inline bool CompactHash::operator!=(CompactHash const& other) const { + return this->compareUsing(other, std::not_equal_to()); + } + + template + inline std::ostream& CompactHash::print(std::ostream& os) const { + return compact_hash_detail::print_(os, hash_); + } + + template + inline void CompactHash::toString(std::string& result) const { + compact_hash_detail::toString_(result, hash_); + } + + template + inline void CompactHash::toDigest(cms::Digest& digest) const { + compact_hash_detail::toDigest_(digest, hash_); + } + + template + inline typename CompactHash::value_type const& CompactHash::compactForm() const { + return hash_; + } + + template + inline size_t CompactHash::smallHash() const { + return compact_hash_detail::smallHash_(hash_); + } + + template + inline std::ostream& operator<<(std::ostream& os, CompactHash const& h) { + return h.print(os); + } + +} // namespace edm +#endif diff --git a/DataFormats/Provenance/interface/ParentageID.h b/DataFormats/Provenance/interface/ParentageID.h index 4721d99b0525a..7d1b3e75a4f0f 100644 --- a/DataFormats/Provenance/interface/ParentageID.h +++ b/DataFormats/Provenance/interface/ParentageID.h @@ -2,10 +2,13 @@ #define DataFormats_Provenance_ParentageID_h #include "DataFormats/Provenance/interface/HashedTypes.h" +#include "DataFormats/Provenance/interface/CompactHash.h" #include "DataFormats/Provenance/interface/Hash.h" namespace edm { - typedef Hash ParentageID; -} + using ParentageID = CompactHash; + + using StoredParentageID = Hash; +} // namespace edm #endif diff --git a/DataFormats/Provenance/src/CompactHash.cc b/DataFormats/Provenance/src/CompactHash.cc new file mode 100644 index 0000000000000..de311d5a9aaf1 --- /dev/null +++ b/DataFormats/Provenance/src/CompactHash.cc @@ -0,0 +1,71 @@ +#include "DataFormats/Provenance/interface/CompactHash.h" +#include "FWCore/Utilities/interface/Algorithms.h" +#include "FWCore/Utilities/interface/Digest.h" +#include "FWCore/Utilities/interface/EDMException.h" + +#include +#include + +namespace { + std::array convert(std::string const& v) { + assert(v.size() == 16); + std::array retValue; + std::copy(v.begin(), v.end(), retValue.begin()); + return retValue; + } +} // namespace +namespace edm { + namespace detail { + // This string is the 16-byte, non-printable version. + std::array const& InvalidCompactHash() { + static std::array const invalid = convert(cms::MD5Result().compactForm()); + return invalid; + } + } // namespace detail + + namespace compact_hash_detail { + size_t smallHash_(value_type const& hash) { + //NOTE: In future we could try to xor the first 8bytes into the second 8bytes of the string to make the hash + std::hash h; + return h(std::string_view(reinterpret_cast(hash.data()), hash.size())); + } + + std::array fromHex_(std::string_view v) { + cms::MD5Result temp; + temp.fromHexifiedString(std::string(v)); + auto hash = temp.compactForm(); + std::array ret; + std::copy(hash.begin(), hash.end(), ret.begin()); + return ret; + } + + bool isValid_(value_type const& hash) { return hash != detail::InvalidCompactHash(); } + + void throwIfIllFormed(std::string_view v) { + // Fixup not needed here. + if (v.size() != 16) { + throw Exception(errors::LogicError) << "Ill-formed CompactHash instance. " + << "A string_view of size " << v.size() << " passed to constructor."; + } + } + + void toString_(std::string& result, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + result += temp.toString(); + } + + void toDigest_(cms::Digest& digest, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + digest.append(temp.toString()); + } + + std::ostream& print_(std::ostream& os, value_type const& hash) { + cms::MD5Result temp; + copy_all(hash, temp.bytes.begin()); + os << temp.toString(); + return os; + } + } // namespace compact_hash_detail +} // namespace edm diff --git a/DataFormats/Provenance/src/classes_def.xml b/DataFormats/Provenance/src/classes_def.xml index 0cd4c91e3409d..cdb733eff0e4a 100644 --- a/DataFormats/Provenance/src/classes_def.xml +++ b/DataFormats/Provenance/src/classes_def.xml @@ -73,7 +73,8 @@ - + + @@ -162,6 +163,7 @@ + @@ -259,5 +261,10 @@ newObj->initializeTransients(); ]]> + + (onfile.hash_); + ]]> + diff --git a/DataFormats/Provenance/test/BuildFile.xml b/DataFormats/Provenance/test/BuildFile.xml index d5cb05c7beeec..688b612b633ba 100644 --- a/DataFormats/Provenance/test/BuildFile.xml +++ b/DataFormats/Provenance/test/BuildFile.xml @@ -1,12 +1,15 @@ + - - + + + + diff --git a/DataFormats/Provenance/test/CompactHash_t.cpp b/DataFormats/Provenance/test/CompactHash_t.cpp new file mode 100644 index 0000000000000..5cef47f2a6a8e --- /dev/null +++ b/DataFormats/Provenance/test/CompactHash_t.cpp @@ -0,0 +1,35 @@ +#include "catch.hpp" + +#include "DataFormats/Provenance/interface/CompactHash.h" +#include "FWCore/Utilities/interface/Digest.h" + +namespace { + using TestHash = edm::CompactHash<100>; +} + +TEST_CASE("CompactHash", "[CompactHash]") { + SECTION("Default construction is invalid") { REQUIRE(TestHash{}.isValid() == false); } + + SECTION("Basic operations") { + cms::Digest d("foo"); + auto result = d.digest().bytes; + + TestHash id{result}; + REQUIRE(id.isValid() == true); + REQUIRE(id.compactForm() == result); + + TestHash id2 = id; + REQUIRE(id2.isValid() == true); + REQUIRE(id2.compactForm() == result); + + cms::Digest b("bar"); + auto diffResult = b.digest().bytes; + REQUIRE(id2 == TestHash{result}); + REQUIRE(id2 != TestHash{diffResult}); + + REQUIRE(id2 > TestHash{diffResult}); + REQUIRE(TestHash{diffResult} < id2); + + REQUIRE(not(id2 < id2)); + } +}