using offsets for CG

Summary: Arc->AvgOffset can be used for function/block ordering to distinguish between calls from the beggining of a function and calls from the end of the function. This makes a difference for large functions.

(cherry picked from FBD6094221)
This commit is contained in:
spupyrev
2017-10-18 15:18:52 -07:00
committed by Maksim Panchenko
parent 61e5fbf8c3
commit 244a476a2e
5 changed files with 69 additions and 54 deletions

View File

@@ -89,6 +89,13 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
BinaryFunctionCallGraph Cg;
static constexpr auto COUNT_NO_PROFILE = BinaryBasicBlock::COUNT_NO_PROFILE;
// Compute function size
auto functionSize = [&](const BinaryFunction *Function) {
return UseFunctionHotSize && Function->isSplit()
? Function->estimateHotSize(UseSplitHotSize)
: Function->estimateSize();
};
// Add call graph nodes.
auto lookupNode = [&](BinaryFunction *Function) {
const auto Id = Cg.maybeGetNodeId(Function);
@@ -97,9 +104,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
// because emitFunctions will emit the hot part first in the order that is
// computed by ReorderFunctions. The cold part will be emitted with the
// rest of the cold functions and code.
const auto Size = UseFunctionHotSize && Function->isSplit()
? Function->estimateHotSize(UseSplitHotSize)
: Function->estimateSize();
const auto Size = functionSize(Function);
// NOTE: for functions without a profile, we set the number of samples
// to zero. This will keep these functions from appearing in the hot
// section. This is a little weird because we wouldn't be trying to
@@ -125,14 +130,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
for (auto &It : BFs) {
auto *Function = &It.second;
if(Filter(*Function)) {
if (Filter(*Function)) {
continue;
}
const auto *BranchData = Function->getBranchData();
const auto SrcId = lookupNode(Function);
uint64_t Offset = Function->getAddress();
uint64_t LastInstSize = 0;
// Offset of the current basic block from the beginning of the function
uint64_t Offset = 0;
auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
if (auto *DstFunc =
@@ -145,11 +150,11 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
return false;
}
const auto DstId = lookupNode(DstFunc);
const auto AvgDelta = UseEdgeCounts ? 0 : Offset - DstFunc->getAddress();
const bool IsValidCount = Count != COUNT_NO_PROFILE;
const auto AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
if (!IsValidCount) ++NoProfileCallsites;
Cg.incArcWeight(SrcId, DstId, AdjCount, AvgDelta);
if (!IsValidCount)
++NoProfileCallsites;
Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
DEBUG(
if (opts::Verbosity > 1) {
dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
@@ -157,6 +162,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
});
return true;
}
return false;
};
@@ -209,8 +215,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
<< " for " << *Function << "\n");
++NumFallbacks;
const auto Size = functionSize(Function);
for (const auto &BI : BranchData->Data) {
Offset = Function->getAddress() + BI.From.Offset;
Offset = BI.From.Offset;
// The computed offset may exceed the hot part of the function; hence,
// bound it the size
if (Offset > Size)
Offset = Size;
const auto CI = getCallInfoFromBranchData(BI, true);
if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch
continue;
@@ -225,30 +237,38 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
if (BB->isCold() && !IncludeColdCalls)
continue;
// Determine whether the block is included in Function's (hot) size
// See BinaryFunction::estimateHotSize
bool BBIncludedInFunctionSize = false;
if (UseFunctionHotSize && Function->isSplit()) {
if (UseSplitHotSize)
BBIncludedInFunctionSize = !BB->isCold();
else
BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
} else {
BBIncludedInFunctionSize = true;
}
for (auto &Inst : *BB) {
if (!UseEdgeCounts) {
Offset += LastInstSize;
LastInstSize = BC.computeCodeSize(&Inst, &Inst + 1);
}
// Find call instructions and extract target symbols from each one.
if (!BC.MIA->isCall(Inst))
continue;
if (BC.MIA->isCall(Inst)) {
const auto CallInfo = getCallInfo(BB, Inst);
const auto CallInfo = getCallInfo(BB, Inst);
if (CallInfo.empty()) {
++TotalCallsites;
++NotProcessed;
continue;
}
for (const auto &CI : CallInfo) {
++TotalCallsites;
if (!recordCall(CI.first, CI.second)) {
if (!CallInfo.empty()) {
for (const auto &CI : CallInfo) {
++TotalCallsites;
if (!recordCall(CI.first, CI.second))
++NotProcessed;
}
} else {
++TotalCallsites;
++NotProcessed;
}
}
// Increase Offset if needed
if (BBIncludedInFunctionSize) {
Offset += BC.computeCodeSize(&Inst, &Inst + 1);
}
}
}
}

View File

@@ -65,8 +65,8 @@ inline bool NoFilter(const BinaryFunction &) { return false; }
/// graph, otherwise they are ignored.
/// UseFunctionHotSize controls whether the hot size of a function is used when
/// filling in the Size attribute of new Nodes.
/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is
/// computed using the offsets of call instructions.
/// UseEdgeCounts is used to control if the Weight attribute on Arcs is computed
/// using the number of calls.
BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
CgFilterFunction Filter = NoFilter,

View File

@@ -44,7 +44,7 @@ inline size_t hash_int64(int64_t k) {
return hash_int64_fallback(k);
#endif
}
inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
#if defined(USE_SSECRC) && defined(__SSE4_2__)
// crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes
@@ -56,7 +56,7 @@ inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
return (hash_int64(k1) << 1) ^ hash_int64(k2);
#endif
}
}
namespace llvm {
@@ -79,36 +79,31 @@ CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint64_t Samples) {
const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W,
double Offset) {
assert(Offset <= size(Src) && "Call offset exceeds function size");
auto Res = Arcs.emplace(Src, Dst, W);
if (!Res.second) {
Res.first->Weight += W;
Res.first->AvgCallOffset += Offset * W;
return *Res.first;
}
Res.first->AvgCallOffset += Offset;
Res.first->AvgCallOffset = Offset * W;
Nodes[Src].Succs.push_back(Dst);
Nodes[Dst].Preds.push_back(Src);
return *Res.first;
}
void CallGraph::normalizeArcWeights(bool UseEdgeCounts) {
// Normalize arc weights.
if (!UseEdgeCounts) {
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
auto& Func = getNode(FuncId);
for (auto Caller : Func.predecessors()) {
auto Arc = findArc(Caller, FuncId);
Arc->NormalizedWeight = Arc->weight() / Func.samples();
void CallGraph::normalizeArcWeights() {
// Normalize arc weights
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
auto& Func = getNode(FuncId);
for (auto Caller : Func.predecessors()) {
auto Arc = findArc(Caller, FuncId);
Arc->NormalizedWeight = Arc->weight() / Func.samples();
if (Arc->weight() > 0)
Arc->AvgCallOffset /= Arc->weight();
assert(Arc->AvgCallOffset < size(Caller));
}
}
} else {
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
auto &Func = getNode(FuncId);
for (auto Caller : Func.predecessors()) {
auto Arc = findArc(Caller, FuncId);
Arc->NormalizedWeight = Arc->weight() / Func.samples();
}
assert(Arc->AvgCallOffset <= size(Caller) &&
"Avg call offset exceeds function size");
}
}
}

View File

@@ -153,7 +153,7 @@ public:
return double(Arcs.size()) / (Nodes.size()*Nodes.size());
}
void normalizeArcWeights(bool UseEdgeCounts);
void normalizeArcWeights();
template <typename L>
void printDot(char* fileName, L getLabel) const;

View File

@@ -134,7 +134,7 @@ namespace bolt {
using NodeId = CallGraph::NodeId;
using Arc = CallGraph::Arc;
using Node = CallGraph::Node;
using Node = CallGraph::Node;
void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
std::map<uint64_t, BinaryFunction> &BFs) {
@@ -310,7 +310,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
opts::CgUseSplitHotSize,
opts::UseEdgeCounts,
opts::CgIgnoreRecursiveCalls);
Cg.normalizeArcWeights(opts::UseEdgeCounts);
Cg.normalizeArcWeights();
}
std::vector<Cluster> Clusters;