Skip to content

Commit

Permalink
[BOLT] Add pseudo probe inline tree to YAML profile
Browse files Browse the repository at this point in the history
To be used for pseudo probe function matching (#100446).

Test Plan: updated pseudoprobe-decoding-inline.test

Pull Request: #107137
  • Loading branch information
aaupov committed Sep 4, 2024
1 parent e0a705e commit 66fe5d5
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 39 deletions.
49 changes: 38 additions & 11 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,28 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {

namespace bolt {
struct PseudoProbeInfo {
llvm::yaml::Hex64 GUID;
uint64_t Index;
uint32_t InlineTreeIndex;
llvm::yaml::Hex32 Offset{0};
uint8_t Type;

bool operator==(const PseudoProbeInfo &Other) const {
return GUID == Other.GUID && Index == Other.Index;
return InlineTreeIndex == Other.InlineTreeIndex && Index == Other.Index;
}
bool operator!=(const PseudoProbeInfo &Other) const {
return !(*this == Other);
bool operator<(const PseudoProbeInfo &Other) const {
if (InlineTreeIndex == Other.InlineTreeIndex)
return Index < Other.Index;
return InlineTreeIndex < Other.InlineTreeIndex;
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
YamlIO.mapOptional("inline_tree_id", PI.InlineTreeIndex, (uint32_t)0);
YamlIO.mapOptional("offset", PI.Offset, (uint32_t)0);
}

static const bool flow = true;
Expand All @@ -122,7 +126,7 @@ template <> struct MappingTraits<bolt::PseudoProbeInfo> {

LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)

namespace llvm {
namespace yaml {
Expand Down Expand Up @@ -163,10 +167,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
}
};

namespace bolt {
struct InlineTreeInfo {
uint32_t Index;
uint32_t ParentIndex;
uint32_t CallSiteProbe;
llvm::yaml::Hex64 GUID;
llvm::yaml::Hex64 Hash;
bool operator==(const InlineTreeInfo &Other) const {
return Index == Other.Index;
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::InlineTreeInfo> {
static void mapping(IO &YamlIO, bolt::InlineTreeInfo &ITI) {
YamlIO.mapRequired("guid", ITI.GUID);
YamlIO.mapRequired("hash", ITI.Hash);
YamlIO.mapRequired("id", ITI.Index);
YamlIO.mapOptional("parent", ITI.ParentIndex, (uint32_t)0);
YamlIO.mapOptional("callsite", ITI.CallSiteProbe, 0);
}

static const bool flow = true;
};
} // end namespace yaml
} // end namespace llvm

LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo)

namespace llvm {
namespace yaml {
Expand All @@ -179,8 +208,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
llvm::yaml::Hex64 GUID{0};
llvm::yaml::Hex64 PseudoProbeDescHash{0};
std::vector<InlineTreeInfo> InlineTree;
bool Used{false};
};
} // end namespace bolt
Expand All @@ -194,9 +222,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
(uint64_t)0);
YamlIO.mapOptional("inline_tree", BFP.InlineTree,
std::vector<bolt::InlineTreeInfo>());
}
};

Expand Down
53 changes: 46 additions & 7 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <optional>
#include <queue>
#include <unordered_map>
#include <utility>

Expand Down Expand Up @@ -2402,12 +2403,43 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF->getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
InlineTreeNodeId;
if (PseudoProbeDecoder && BF->getGUID()) {
std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
// FIXME: faster inline tree lookup by top-level GUID
if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
[&](const auto &InlineTree) {
return InlineTree.Guid == BF->getGUID();
})) {
ITWorklist.push(InlineTree);
InlineTreeNodeId[InlineTree] = 0;
auto Hash =
PseudoProbeDecoder->getFuncDescForGUID(BF->getGUID())->FuncHash;
YamlBF.InlineTree.emplace_back(
yaml::bolt::InlineTreeInfo{0, 0, 0, BF->getGUID(), Hash});
}
uint32_t ParentId = 0;
uint32_t NodeId = 1;
while (!ITWorklist.empty()) {
const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
for (const MCDecodedPseudoProbeInlineTree &Child :
Cur->getChildren()) {
InlineTreeNodeId[&Child] = NodeId;
auto Hash =
PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
NodeId++, ParentId, std::get<1>(Child.getInlineSite()),
Child.Guid, Hash});
ITWorklist.push(&Child);
}
ITWorklist.pop();
++ParentId;
}
}

if (PseudoProbeDecoder) {
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
Expand All @@ -2420,12 +2452,19 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
const auto [BlockOffset, BlockIndex] = getBlock(InputOffset);
uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
uint32_t Offset = InputOffset - BlockOffset;
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
yaml::bolt::PseudoProbeInfo{Probe.getIndex(), NodeId, Offset,
Probe.getType()});
}
}
for (yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
llvm::sort(YamlBB.PseudoProbes);
YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
YamlBB.PseudoProbes.end());
}
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
Expand Down
59 changes: 49 additions & 10 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <deque>
#include <queue>

#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"
Expand Down Expand Up @@ -77,13 +81,6 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF.getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
}
}

BinaryFunction::BasicBlockOrderType Order;
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
Expand All @@ -92,6 +89,40 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const FunctionLayout Layout = BF.getLayout();
Layout.updateLayoutIndices(Order);

DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
// FIXME: faster inline tree lookup by top-level GUID
if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
[&](const auto &InlineTree) {
return InlineTree.Guid == BF.getGUID();
})) {
ITWorklist.push(InlineTree);
InlineTreeNodeId[InlineTree] = 0;
auto Hash =
PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash;
YamlBF.InlineTree.emplace_back(
yaml::bolt::InlineTreeInfo{0, 0, 0, BF.getGUID(), Hash});
}
uint32_t ParentId = 0;
uint32_t NodeId = 1;
while (!ITWorklist.empty()) {
const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) {
InlineTreeNodeId[&Child] = NodeId;
auto Hash =
PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
NodeId++, ParentId, std::get<1>(Child.getInlineSite()), Child.Guid,
Hash});
ITWorklist.push(&Child);
}
ITWorklist.pop();
++ParentId;
}
}

for (const BinaryBasicBlock *BB : Order) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
Expand Down Expand Up @@ -198,10 +229,18 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
const std::pair<uint64_t, uint64_t> BlockAddrRange = {
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
for (const MCDecodedPseudoProbe &Probe :
ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second)) {
uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
uint32_t Offset = Probe.getAddress() - BlockAddrRange.first;
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
Probe.getIndex(), NodeId, Offset, Probe.getType()});
}
llvm::sort(YamlBB.PseudoProbes);
YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
YamlBB.PseudoProbes.end());
}

YamlBF.Blocks.emplace_back(YamlBB);
Expand Down
31 changes: 20 additions & 11 deletions bolt/test/X86/pseudoprobe-decoding-inline.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,38 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
# CHECK-YAML: guid: 0xE413754A191DB537
# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0
# CHECK-YAML-NEXT: - { id: 4, type: 0
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 0 }
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0 }
# CHECK-YAML-NEXT: - { id: 2, type: 0 }
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0x5CF8C24CDB18BDAC, hash: 0x200205A19C5B4, id: 0 }
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 1, callsite: 8 }
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0xDB956436E78DD5FA
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
# CHECK-YAML: pseudo_probes:
# CHECK-YAML-NEXT: - { id: 1, type: 0 }
# CHECK-YAML-NEXT: - { id: 1, type: 0, inline_tree_id: 1 }
# CHECK-YAML-NEXT: - { id: 2, type: 0, inline_tree_id: 1 }
# CHECK-YAML: inline_tree:
# CHECK-YAML-NEXT: - { guid: 0xDB956436E78DD5FA, hash: 0x10000FFFFFFFF, id: 0 }
# CHECK-YAML-NEXT: - { guid: 0x5CF8C24CDB18BDAC, hash: 0x200205A19C5B4, id: 1, callsite: 2 }
# CHECK-YAML-NEXT: - { guid: 0xE413754A191DB537, hash: 0x10E852DA94, id: 2, parent: 1, callsite: 8 }
#
## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
# CHECK-NO-OPT-NOT: pseudo_probes
# CHECK-NO-OPT-NOT: guid
# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash
# CHECK-NO-OPT-NOT: inline_tree

CHECK: Report of decoding input pseudo probe binaries

Expand Down

0 comments on commit 66fe5d5

Please sign in to comment.