Skip to content

Commit

Permalink
Avoid allocations in ParentageID
Browse files Browse the repository at this point in the history
We were cycling memory each time a module put a data product into
the event. This now has the ParentageID hold the 16 bytes directly
instead of indirectly via a std::string.
  • Loading branch information
Dr15Jones committed Nov 18, 2024
1 parent 6c29499 commit 159a737
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 5 deletions.
163 changes: 163 additions & 0 deletions DataFormats/Provenance/interface/CompactHash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#ifndef DataFormats_Provenance_CompactHash_h
#define DataFormats_Provenance_CompactHash_h

#include <string_view>
#include <array>
#include <functional>

namespace cms {
class Digest;
}

namespace edm {

namespace detail {
// This string is the 16-byte, non-printable version.
std::array<unsigned char, 16> const& InvalidCompactHash();
} // namespace detail

namespace compact_hash_detail {
using value_type = std::array<unsigned char, 16>;
void toString_(std::string& result, value_type const& hash);
void toDigest_(cms::Digest& digest, value_type const& hash);
std::ostream& print_(std::ostream& os, value_type const& hash);
bool isValid_(value_type const& hash);
size_t smallHash_(value_type const& hash);
value_type fromHex_(std::string_view);
void throwIfIllFormed(std::string_view v);
} // namespace compact_hash_detail

template <int I>
class CompactHash {
public:
typedef compact_hash_detail::value_type value_type;

CompactHash();
explicit CompactHash(value_type const& v);
explicit CompactHash(std::string_view v);

CompactHash(CompactHash<I> const&) = default;
CompactHash<I>& operator=(CompactHash<I> const& iRHS) = default;

CompactHash(CompactHash<I>&&) = default;
CompactHash<I>& operator=(CompactHash<I>&&) = default;

void reset();

// For now, just check the most basic: a default constructed
// ParameterSetID is not valid. This is very crude: we are
// assuming that nobody created a ParameterSetID from an empty
// string, nor from any string that is not a valid string
// representation of an MD5 checksum.
bool isValid() const;

bool operator<(CompactHash<I> const& other) const;
bool operator>(CompactHash<I> const& other) const;
bool operator==(CompactHash<I> const& other) const;
bool operator!=(CompactHash<I> const& other) const;
std::ostream& print(std::ostream& os) const;
void toString(std::string& result) const;
void toDigest(cms::Digest& digest) const;

// Return the 16-byte (non-printable) string form.
value_type const& compactForm() const;

///returns a short hash which can be used with hashing containers
size_t smallHash() const;

//Used by ROOT storage
// CMS_CLASS_VERSION(3) // This macro is not defined here, so expand it.
static short Class_Version() { return 3; }

private:
template <typename Op>
bool compareUsing(CompactHash<I> const& iOther, Op op) const {
return op(this->hash_, iOther.hash_);
}

value_type hash_;
};

//--------------------------------------------------------------------
//
// Implementation details follow...
//--------------------------------------------------------------------

template <int I>
inline CompactHash<I>::CompactHash() : hash_(detail::InvalidCompactHash()) {}

template <int I>
inline CompactHash<I>::CompactHash(value_type const& v) : hash_(v) {}

template <int I>
inline CompactHash<I>::CompactHash(std::string_view v) {
if (v.size() == 32) {
hash_ = compact_hash_detail::fromHex_(v);
} else {
compact_hash_detail::throwIfIllFormed(v);
std::copy(v.begin(), v.end(), hash_.begin());
}
}

template <int I>
inline void CompactHash<I>::reset() {
hash_ = detail::InvalidCompactHash();
}

template <int I>
inline bool CompactHash<I>::isValid() const {
return compact_hash_detail::isValid_(hash_);
}

template <int I>
inline bool CompactHash<I>::operator<(CompactHash<I> const& other) const {
return this->compareUsing(other, std::less<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator>(CompactHash<I> const& other) const {
return this->compareUsing(other, std::greater<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator==(CompactHash<I> const& other) const {
return this->compareUsing(other, std::equal_to<value_type>());
}

template <int I>
inline bool CompactHash<I>::operator!=(CompactHash<I> const& other) const {
return this->compareUsing(other, std::not_equal_to<value_type>());
}

template <int I>
inline std::ostream& CompactHash<I>::print(std::ostream& os) const {
return compact_hash_detail::print_(os, hash_);
}

template <int I>
inline void CompactHash<I>::toString(std::string& result) const {
compact_hash_detail::toString_(result, hash_);
}

template <int I>
inline void CompactHash<I>::toDigest(cms::Digest& digest) const {
compact_hash_detail::toDigest_(digest, hash_);
}

template <int I>
inline typename CompactHash<I>::value_type const& CompactHash<I>::compactForm() const {
return hash_;
}

template <int I>
inline size_t CompactHash<I>::smallHash() const {
return compact_hash_detail::smallHash_(hash_);
}

template <int I>
inline std::ostream& operator<<(std::ostream& os, CompactHash<I> const& h) {
return h.print(os);
}

} // namespace edm
#endif
7 changes: 5 additions & 2 deletions DataFormats/Provenance/interface/ParentageID.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#define DataFormats_Provenance_ParentageID_h

#include "DataFormats/Provenance/interface/HashedTypes.h"
#include "DataFormats/Provenance/interface/CompactHash.h"
#include "DataFormats/Provenance/interface/Hash.h"

namespace edm {
typedef Hash<ParentageType> ParentageID;
}
using ParentageID = CompactHash<ParentageType>;

using StoredParentageID = Hash<ParentageType>;
} // namespace edm

#endif
71 changes: 71 additions & 0 deletions DataFormats/Provenance/src/CompactHash.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "DataFormats/Provenance/interface/CompactHash.h"
#include "FWCore/Utilities/interface/Algorithms.h"
#include "FWCore/Utilities/interface/Digest.h"
#include "FWCore/Utilities/interface/EDMException.h"

#include <functional>
#include <cassert>

namespace {
std::array<unsigned char, 16> convert(std::string const& v) {
assert(v.size() == 16);
std::array<unsigned char, 16> retValue;
std::copy(v.begin(), v.end(), retValue.begin());
return retValue;
}
} // namespace
namespace edm {
namespace detail {
// This string is the 16-byte, non-printable version.
std::array<unsigned char, 16> const& InvalidCompactHash() {
static std::array<unsigned char, 16> const invalid = convert(cms::MD5Result().compactForm());
return invalid;
}
} // namespace detail

namespace compact_hash_detail {
size_t smallHash_(value_type const& hash) {
//NOTE: In future we could try to xor the first 8bytes into the second 8bytes of the string to make the hash
std::hash<std::string_view> h;
return h(std::string_view(reinterpret_cast<const char*>(hash.data()), hash.size()));
}

std::array<unsigned char, 16> fromHex_(std::string_view v) {
cms::MD5Result temp;
temp.fromHexifiedString(std::string(v));
auto hash = temp.compactForm();
std::array<unsigned char, 16> ret;
std::copy(hash.begin(), hash.end(), ret.begin());
return ret;
}

bool isValid_(value_type const& hash) { return hash != detail::InvalidCompactHash(); }

void throwIfIllFormed(std::string_view v) {
// Fixup not needed here.
if (v.size() != 16) {
throw Exception(errors::LogicError) << "Ill-formed CompactHash instance. "
<< "A string_view of size " << v.size() << " passed to constructor.";
}
}

void toString_(std::string& result, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
result += temp.toString();
}

void toDigest_(cms::Digest& digest, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
digest.append(temp.toString());
}

std::ostream& print_(std::ostream& os, value_type const& hash) {
cms::MD5Result temp;
copy_all(hash, temp.bytes.begin());
os << temp.toString();
return os;
}
} // namespace compact_hash_detail
} // namespace edm
9 changes: 8 additions & 1 deletion DataFormats/Provenance/src/classes_def.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
<class name="edm::BranchChildren" ClassVersion="10">
<version ClassVersion="10" checksum="137742168"/>
</class>
<class name="edm::ProductProvenance" ClassVersion="11">
<class name="edm::ProductProvenance" ClassVersion="12">
<version ClassVersion="12" checksum="737844716"/>
<version ClassVersion="11" checksum="3594205707"/>
<version ClassVersion="10" checksum="3685381930"/>
</class>
Expand Down Expand Up @@ -162,6 +163,7 @@
<class name="std::pair<const edm::ParameterSetID,edm::ParameterSetBlob>"/>
<class name="edm::ProcessConfigurationID"/>
<class name="std::vector<edm::ProcessConfigurationID>"/>
<class name="edm::StoredParentageID"/>
<class name="edm::ParentageID"/>
<class name="edm::Parentage" ClassVersion="11">
<version ClassVersion="11" checksum="3091321815"/>
Expand Down Expand Up @@ -259,5 +261,10 @@
newObj->initializeTransients();
]]>
</ioread>
<ioread sourceClass="edm::Hash<5>" targetClass="edm::CompactHash<5>" version="[1-]" source="std::string hash_" target="hash_">
<![CDATA[
*newObj = edm::CompactHash<5>(onfile.hash_);
]]>
</ioread>

</lcgdict>
7 changes: 5 additions & 2 deletions DataFormats/Provenance/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
<bin name="testDataFormatsProvenance" file="testRunner.cpp,eventid_t.cppunit.cc,timestamp_t.cppunit.cc,parametersetid_t.cppunit.cc,indexIntoFile_t.cppunit.cc,indexIntoFile1_t.cppunit.cc,indexIntoFile2_t.cppunit.cc,indexIntoFile3_t.cppunit.cc,indexIntoFile4_t.cppunit.cc,indexIntoFile5_t.cppunit.cc,lumirange_t.cppunit.cc,eventrange_t.cppunit.cc,compactEventAuxiliaryVector_t.cppunit.cc">
<use name="cppunit"/>
<use name="DataFormats/Provenance"/>
<bin name="testDataFormatsProvenance" file="testRunner.cpp,eventid_t.cppunit.cc,timestamp_t.cppunit.cc,parametersetid_t.cppunit.cc,indexIntoFile_t.cppunit.cc,indexIntoFile1_t.cppunit.cc,indexIntoFile2_t.cppunit.cc,indexIntoFile3_t.cppunit.cc,indexIntoFile4_t.cppunit.cc,indexIntoFile5_t.cppunit.cc,lumirange_t.cppunit.cc,eventrange_t.cppunit.cc,compactEventAuxiliaryVector_t.cppunit.cc">
</bin>

<bin name="testDataFormatsProvenanceCatch2" file="ElementID_t.cpp,Parentage_t.cpp,StoredProcessBlockHelper_t.cpp">
<bin name="testDataFormatsProvenanceCatch2" file="ElementID_t.cpp,Parentage_t.cpp,StoredProcessBlockHelper_t.cpp,CompactHash_t.cpp">
<use name="DataFormats/Provenance"/>
<use name="catch2"/>
</bin>

<bin name="testProductResolverIndexHelper" file="testRunner.cpp productResolverIndexHelper_t.cppunit.cc">
<use name="DataFormats/TestObjects"/>
<use name="cppunit"/>
<use name="DataFormats/Provenance"/>
</bin>
35 changes: 35 additions & 0 deletions DataFormats/Provenance/test/CompactHash_t.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "catch.hpp"

#include "DataFormats/Provenance/interface/CompactHash.h"
#include "FWCore/Utilities/interface/Digest.h"

namespace {
using TestHash = edm::CompactHash<100>;
}

TEST_CASE("CompactHash", "[CompactHash]") {
SECTION("Default construction is invalid") { REQUIRE(TestHash{}.isValid() == false); }

SECTION("Basic operations") {
cms::Digest d("foo");
auto result = d.digest().bytes;

TestHash id{result};
REQUIRE(id.isValid() == true);
REQUIRE(id.compactForm() == result);

TestHash id2 = id;
REQUIRE(id2.isValid() == true);
REQUIRE(id2.compactForm() == result);

cms::Digest b("bar");
auto diffResult = b.digest().bytes;
REQUIRE(id2 == TestHash{result});
REQUIRE(id2 != TestHash{diffResult});

REQUIRE(id2 > TestHash{diffResult});
REQUIRE(TestHash{diffResult} < id2);

REQUIRE(not(id2 < id2));
}
}

0 comments on commit 159a737

Please sign in to comment.