mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 11:57:39 +08:00
using offsets for CG
Summary: Arc->AvgOffset can be used for function/block ordering to distinguish between calls from the beggining of a function and calls from the end of the function. This makes a difference for large functions. (cherry picked from FBD6094221)
This commit is contained in:
committed by
Maksim Panchenko
parent
61e5fbf8c3
commit
244a476a2e
@@ -89,6 +89,13 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
BinaryFunctionCallGraph Cg;
|
||||
static constexpr auto COUNT_NO_PROFILE = BinaryBasicBlock::COUNT_NO_PROFILE;
|
||||
|
||||
// Compute function size
|
||||
auto functionSize = [&](const BinaryFunction *Function) {
|
||||
return UseFunctionHotSize && Function->isSplit()
|
||||
? Function->estimateHotSize(UseSplitHotSize)
|
||||
: Function->estimateSize();
|
||||
};
|
||||
|
||||
// Add call graph nodes.
|
||||
auto lookupNode = [&](BinaryFunction *Function) {
|
||||
const auto Id = Cg.maybeGetNodeId(Function);
|
||||
@@ -97,9 +104,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
// because emitFunctions will emit the hot part first in the order that is
|
||||
// computed by ReorderFunctions. The cold part will be emitted with the
|
||||
// rest of the cold functions and code.
|
||||
const auto Size = UseFunctionHotSize && Function->isSplit()
|
||||
? Function->estimateHotSize(UseSplitHotSize)
|
||||
: Function->estimateSize();
|
||||
const auto Size = functionSize(Function);
|
||||
// NOTE: for functions without a profile, we set the number of samples
|
||||
// to zero. This will keep these functions from appearing in the hot
|
||||
// section. This is a little weird because we wouldn't be trying to
|
||||
@@ -125,14 +130,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
for (auto &It : BFs) {
|
||||
auto *Function = &It.second;
|
||||
|
||||
if(Filter(*Function)) {
|
||||
if (Filter(*Function)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto *BranchData = Function->getBranchData();
|
||||
const auto SrcId = lookupNode(Function);
|
||||
uint64_t Offset = Function->getAddress();
|
||||
uint64_t LastInstSize = 0;
|
||||
// Offset of the current basic block from the beginning of the function
|
||||
uint64_t Offset = 0;
|
||||
|
||||
auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
|
||||
if (auto *DstFunc =
|
||||
@@ -145,11 +150,11 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
return false;
|
||||
}
|
||||
const auto DstId = lookupNode(DstFunc);
|
||||
const auto AvgDelta = UseEdgeCounts ? 0 : Offset - DstFunc->getAddress();
|
||||
const bool IsValidCount = Count != COUNT_NO_PROFILE;
|
||||
const auto AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
|
||||
if (!IsValidCount) ++NoProfileCallsites;
|
||||
Cg.incArcWeight(SrcId, DstId, AdjCount, AvgDelta);
|
||||
if (!IsValidCount)
|
||||
++NoProfileCallsites;
|
||||
Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
|
||||
DEBUG(
|
||||
if (opts::Verbosity > 1) {
|
||||
dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
|
||||
@@ -157,6 +162,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
@@ -209,8 +215,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
|
||||
<< " for " << *Function << "\n");
|
||||
++NumFallbacks;
|
||||
const auto Size = functionSize(Function);
|
||||
for (const auto &BI : BranchData->Data) {
|
||||
Offset = Function->getAddress() + BI.From.Offset;
|
||||
Offset = BI.From.Offset;
|
||||
// The computed offset may exceed the hot part of the function; hence,
|
||||
// bound it the size
|
||||
if (Offset > Size)
|
||||
Offset = Size;
|
||||
|
||||
const auto CI = getCallInfoFromBranchData(BI, true);
|
||||
if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch
|
||||
continue;
|
||||
@@ -225,30 +237,38 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
if (BB->isCold() && !IncludeColdCalls)
|
||||
continue;
|
||||
|
||||
// Determine whether the block is included in Function's (hot) size
|
||||
// See BinaryFunction::estimateHotSize
|
||||
bool BBIncludedInFunctionSize = false;
|
||||
if (UseFunctionHotSize && Function->isSplit()) {
|
||||
if (UseSplitHotSize)
|
||||
BBIncludedInFunctionSize = !BB->isCold();
|
||||
else
|
||||
BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
|
||||
} else {
|
||||
BBIncludedInFunctionSize = true;
|
||||
}
|
||||
|
||||
for (auto &Inst : *BB) {
|
||||
if (!UseEdgeCounts) {
|
||||
Offset += LastInstSize;
|
||||
LastInstSize = BC.computeCodeSize(&Inst, &Inst + 1);
|
||||
}
|
||||
|
||||
// Find call instructions and extract target symbols from each one.
|
||||
if (!BC.MIA->isCall(Inst))
|
||||
continue;
|
||||
if (BC.MIA->isCall(Inst)) {
|
||||
const auto CallInfo = getCallInfo(BB, Inst);
|
||||
|
||||
const auto CallInfo = getCallInfo(BB, Inst);
|
||||
|
||||
if (CallInfo.empty()) {
|
||||
++TotalCallsites;
|
||||
++NotProcessed;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &CI : CallInfo) {
|
||||
++TotalCallsites;
|
||||
if (!recordCall(CI.first, CI.second)) {
|
||||
if (!CallInfo.empty()) {
|
||||
for (const auto &CI : CallInfo) {
|
||||
++TotalCallsites;
|
||||
if (!recordCall(CI.first, CI.second))
|
||||
++NotProcessed;
|
||||
}
|
||||
} else {
|
||||
++TotalCallsites;
|
||||
++NotProcessed;
|
||||
}
|
||||
}
|
||||
// Increase Offset if needed
|
||||
if (BBIncludedInFunctionSize) {
|
||||
Offset += BC.computeCodeSize(&Inst, &Inst + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +65,8 @@ inline bool NoFilter(const BinaryFunction &) { return false; }
|
||||
/// graph, otherwise they are ignored.
|
||||
/// UseFunctionHotSize controls whether the hot size of a function is used when
|
||||
/// filling in the Size attribute of new Nodes.
|
||||
/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is
|
||||
/// computed using the offsets of call instructions.
|
||||
/// UseEdgeCounts is used to control if the Weight attribute on Arcs is computed
|
||||
/// using the number of calls.
|
||||
BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
|
||||
std::map<uint64_t, BinaryFunction> &BFs,
|
||||
CgFilterFunction Filter = NoFilter,
|
||||
|
||||
@@ -44,7 +44,7 @@ inline size_t hash_int64(int64_t k) {
|
||||
return hash_int64_fallback(k);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
|
||||
#if defined(USE_SSECRC) && defined(__SSE4_2__)
|
||||
// crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes
|
||||
@@ -56,7 +56,7 @@ inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
|
||||
return (hash_int64(k1) << 1) ^ hash_int64(k2);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
@@ -79,36 +79,31 @@ CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint64_t Samples) {
|
||||
|
||||
const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W,
|
||||
double Offset) {
|
||||
assert(Offset <= size(Src) && "Call offset exceeds function size");
|
||||
|
||||
auto Res = Arcs.emplace(Src, Dst, W);
|
||||
if (!Res.second) {
|
||||
Res.first->Weight += W;
|
||||
Res.first->AvgCallOffset += Offset * W;
|
||||
return *Res.first;
|
||||
}
|
||||
Res.first->AvgCallOffset += Offset;
|
||||
Res.first->AvgCallOffset = Offset * W;
|
||||
Nodes[Src].Succs.push_back(Dst);
|
||||
Nodes[Dst].Preds.push_back(Src);
|
||||
return *Res.first;
|
||||
}
|
||||
|
||||
void CallGraph::normalizeArcWeights(bool UseEdgeCounts) {
|
||||
// Normalize arc weights.
|
||||
if (!UseEdgeCounts) {
|
||||
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
|
||||
auto& Func = getNode(FuncId);
|
||||
for (auto Caller : Func.predecessors()) {
|
||||
auto Arc = findArc(Caller, FuncId);
|
||||
Arc->NormalizedWeight = Arc->weight() / Func.samples();
|
||||
void CallGraph::normalizeArcWeights() {
|
||||
// Normalize arc weights
|
||||
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
|
||||
auto& Func = getNode(FuncId);
|
||||
for (auto Caller : Func.predecessors()) {
|
||||
auto Arc = findArc(Caller, FuncId);
|
||||
Arc->NormalizedWeight = Arc->weight() / Func.samples();
|
||||
if (Arc->weight() > 0)
|
||||
Arc->AvgCallOffset /= Arc->weight();
|
||||
assert(Arc->AvgCallOffset < size(Caller));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
|
||||
auto &Func = getNode(FuncId);
|
||||
for (auto Caller : Func.predecessors()) {
|
||||
auto Arc = findArc(Caller, FuncId);
|
||||
Arc->NormalizedWeight = Arc->weight() / Func.samples();
|
||||
}
|
||||
assert(Arc->AvgCallOffset <= size(Caller) &&
|
||||
"Avg call offset exceeds function size");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ public:
|
||||
return double(Arcs.size()) / (Nodes.size()*Nodes.size());
|
||||
}
|
||||
|
||||
void normalizeArcWeights(bool UseEdgeCounts);
|
||||
void normalizeArcWeights();
|
||||
|
||||
template <typename L>
|
||||
void printDot(char* fileName, L getLabel) const;
|
||||
|
||||
@@ -134,7 +134,7 @@ namespace bolt {
|
||||
|
||||
using NodeId = CallGraph::NodeId;
|
||||
using Arc = CallGraph::Arc;
|
||||
using Node = CallGraph::Node;
|
||||
using Node = CallGraph::Node;
|
||||
|
||||
void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
|
||||
std::map<uint64_t, BinaryFunction> &BFs) {
|
||||
@@ -310,7 +310,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
|
||||
opts::CgUseSplitHotSize,
|
||||
opts::UseEdgeCounts,
|
||||
opts::CgIgnoreRecursiveCalls);
|
||||
Cg.normalizeArcWeights(opts::UseEdgeCounts);
|
||||
Cg.normalizeArcWeights();
|
||||
}
|
||||
|
||||
std::vector<Cluster> Clusters;
|
||||
|
||||
Reference in New Issue
Block a user