From 244a476a2e53cbffcccb337daec23ad414f40bf4 Mon Sep 17 00:00:00 2001 From: spupyrev Date: Wed, 18 Oct 2017 15:18:52 -0700 Subject: [PATCH] using offsets for CG Summary: Arc->AvgOffset can be used for function/block ordering to distinguish between calls from the beggining of a function and calls from the end of the function. This makes a difference for large functions. (cherry picked from FBD6094221) --- bolt/Passes/BinaryFunctionCallGraph.cpp | 76 ++++++++++++++++--------- bolt/Passes/BinaryFunctionCallGraph.h | 4 +- bolt/Passes/CallGraph.cpp | 37 ++++++------ bolt/Passes/CallGraph.h | 2 +- bolt/Passes/ReorderFunctions.cpp | 4 +- 5 files changed, 69 insertions(+), 54 deletions(-) diff --git a/bolt/Passes/BinaryFunctionCallGraph.cpp b/bolt/Passes/BinaryFunctionCallGraph.cpp index cb6dbd6b5471..24dc378e1e4c 100644 --- a/bolt/Passes/BinaryFunctionCallGraph.cpp +++ b/bolt/Passes/BinaryFunctionCallGraph.cpp @@ -89,6 +89,13 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, BinaryFunctionCallGraph Cg; static constexpr auto COUNT_NO_PROFILE = BinaryBasicBlock::COUNT_NO_PROFILE; + // Compute function size + auto functionSize = [&](const BinaryFunction *Function) { + return UseFunctionHotSize && Function->isSplit() + ? Function->estimateHotSize(UseSplitHotSize) + : Function->estimateSize(); + }; + // Add call graph nodes. auto lookupNode = [&](BinaryFunction *Function) { const auto Id = Cg.maybeGetNodeId(Function); @@ -97,9 +104,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, // because emitFunctions will emit the hot part first in the order that is // computed by ReorderFunctions. The cold part will be emitted with the // rest of the cold functions and code. - const auto Size = UseFunctionHotSize && Function->isSplit() - ? Function->estimateHotSize(UseSplitHotSize) - : Function->estimateSize(); + const auto Size = functionSize(Function); // NOTE: for functions without a profile, we set the number of samples // to zero. This will keep these functions from appearing in the hot // section. This is a little weird because we wouldn't be trying to @@ -125,14 +130,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, for (auto &It : BFs) { auto *Function = &It.second; - if(Filter(*Function)) { + if (Filter(*Function)) { continue; } const auto *BranchData = Function->getBranchData(); const auto SrcId = lookupNode(Function); - uint64_t Offset = Function->getAddress(); - uint64_t LastInstSize = 0; + // Offset of the current basic block from the beginning of the function + uint64_t Offset = 0; auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { if (auto *DstFunc = @@ -145,11 +150,11 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, return false; } const auto DstId = lookupNode(DstFunc); - const auto AvgDelta = UseEdgeCounts ? 0 : Offset - DstFunc->getAddress(); const bool IsValidCount = Count != COUNT_NO_PROFILE; const auto AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; - if (!IsValidCount) ++NoProfileCallsites; - Cg.incArcWeight(SrcId, DstId, AdjCount, AvgDelta); + if (!IsValidCount) + ++NoProfileCallsites; + Cg.incArcWeight(SrcId, DstId, AdjCount, Offset); DEBUG( if (opts::Verbosity > 1) { dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function @@ -157,6 +162,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, }); return true; } + return false; }; @@ -209,8 +215,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" << " for " << *Function << "\n"); ++NumFallbacks; + const auto Size = functionSize(Function); for (const auto &BI : BranchData->Data) { - Offset = Function->getAddress() + BI.From.Offset; + Offset = BI.From.Offset; + // The computed offset may exceed the hot part of the function; hence, + // bound it the size + if (Offset > Size) + Offset = Size; + const auto CI = getCallInfoFromBranchData(BI, true); if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch continue; @@ -225,30 +237,38 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, if (BB->isCold() && !IncludeColdCalls) continue; + // Determine whether the block is included in Function's (hot) size + // See BinaryFunction::estimateHotSize + bool BBIncludedInFunctionSize = false; + if (UseFunctionHotSize && Function->isSplit()) { + if (UseSplitHotSize) + BBIncludedInFunctionSize = !BB->isCold(); + else + BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; + } else { + BBIncludedInFunctionSize = true; + } + for (auto &Inst : *BB) { - if (!UseEdgeCounts) { - Offset += LastInstSize; - LastInstSize = BC.computeCodeSize(&Inst, &Inst + 1); - } - // Find call instructions and extract target symbols from each one. - if (!BC.MIA->isCall(Inst)) - continue; + if (BC.MIA->isCall(Inst)) { + const auto CallInfo = getCallInfo(BB, Inst); - const auto CallInfo = getCallInfo(BB, Inst); - - if (CallInfo.empty()) { - ++TotalCallsites; - ++NotProcessed; - continue; - } - - for (const auto &CI : CallInfo) { - ++TotalCallsites; - if (!recordCall(CI.first, CI.second)) { + if (!CallInfo.empty()) { + for (const auto &CI : CallInfo) { + ++TotalCallsites; + if (!recordCall(CI.first, CI.second)) + ++NotProcessed; + } + } else { + ++TotalCallsites; ++NotProcessed; } } + // Increase Offset if needed + if (BBIncludedInFunctionSize) { + Offset += BC.computeCodeSize(&Inst, &Inst + 1); + } } } } diff --git a/bolt/Passes/BinaryFunctionCallGraph.h b/bolt/Passes/BinaryFunctionCallGraph.h index 513bb0ef5415..0bce5c9de92f 100644 --- a/bolt/Passes/BinaryFunctionCallGraph.h +++ b/bolt/Passes/BinaryFunctionCallGraph.h @@ -65,8 +65,8 @@ inline bool NoFilter(const BinaryFunction &) { return false; } /// graph, otherwise they are ignored. /// UseFunctionHotSize controls whether the hot size of a function is used when /// filling in the Size attribute of new Nodes. -/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is -/// computed using the offsets of call instructions. +/// UseEdgeCounts is used to control if the Weight attribute on Arcs is computed +/// using the number of calls. BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, std::map &BFs, CgFilterFunction Filter = NoFilter, diff --git a/bolt/Passes/CallGraph.cpp b/bolt/Passes/CallGraph.cpp index 70544fe6da45..4533c23d681d 100644 --- a/bolt/Passes/CallGraph.cpp +++ b/bolt/Passes/CallGraph.cpp @@ -44,7 +44,7 @@ inline size_t hash_int64(int64_t k) { return hash_int64_fallback(k); #endif } - + inline size_t hash_int64_pair(int64_t k1, int64_t k2) { #if defined(USE_SSECRC) && defined(__SSE4_2__) // crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes @@ -56,7 +56,7 @@ inline size_t hash_int64_pair(int64_t k1, int64_t k2) { return (hash_int64(k1) << 1) ^ hash_int64(k2); #endif } - + } namespace llvm { @@ -79,36 +79,31 @@ CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint64_t Samples) { const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W, double Offset) { + assert(Offset <= size(Src) && "Call offset exceeds function size"); + auto Res = Arcs.emplace(Src, Dst, W); if (!Res.second) { Res.first->Weight += W; + Res.first->AvgCallOffset += Offset * W; return *Res.first; } - Res.first->AvgCallOffset += Offset; + Res.first->AvgCallOffset = Offset * W; Nodes[Src].Succs.push_back(Dst); Nodes[Dst].Preds.push_back(Src); return *Res.first; } -void CallGraph::normalizeArcWeights(bool UseEdgeCounts) { - // Normalize arc weights. - if (!UseEdgeCounts) { - for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) { - auto& Func = getNode(FuncId); - for (auto Caller : Func.predecessors()) { - auto Arc = findArc(Caller, FuncId); - Arc->NormalizedWeight = Arc->weight() / Func.samples(); +void CallGraph::normalizeArcWeights() { + // Normalize arc weights + for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) { + auto& Func = getNode(FuncId); + for (auto Caller : Func.predecessors()) { + auto Arc = findArc(Caller, FuncId); + Arc->NormalizedWeight = Arc->weight() / Func.samples(); + if (Arc->weight() > 0) Arc->AvgCallOffset /= Arc->weight(); - assert(Arc->AvgCallOffset < size(Caller)); - } - } - } else { - for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) { - auto &Func = getNode(FuncId); - for (auto Caller : Func.predecessors()) { - auto Arc = findArc(Caller, FuncId); - Arc->NormalizedWeight = Arc->weight() / Func.samples(); - } + assert(Arc->AvgCallOffset <= size(Caller) && + "Avg call offset exceeds function size"); } } } diff --git a/bolt/Passes/CallGraph.h b/bolt/Passes/CallGraph.h index 83837e55b67f..c5df85734d2e 100644 --- a/bolt/Passes/CallGraph.h +++ b/bolt/Passes/CallGraph.h @@ -153,7 +153,7 @@ public: return double(Arcs.size()) / (Nodes.size()*Nodes.size()); } - void normalizeArcWeights(bool UseEdgeCounts); + void normalizeArcWeights(); template void printDot(char* fileName, L getLabel) const; diff --git a/bolt/Passes/ReorderFunctions.cpp b/bolt/Passes/ReorderFunctions.cpp index bb5e55ca752b..d90e621c7649 100644 --- a/bolt/Passes/ReorderFunctions.cpp +++ b/bolt/Passes/ReorderFunctions.cpp @@ -134,7 +134,7 @@ namespace bolt { using NodeId = CallGraph::NodeId; using Arc = CallGraph::Arc; -using Node = CallGraph::Node; +using Node = CallGraph::Node; void ReorderFunctions::reorder(std::vector &&Clusters, std::map &BFs) { @@ -310,7 +310,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC, opts::CgUseSplitHotSize, opts::UseEdgeCounts, opts::CgIgnoreRecursiveCalls); - Cg.normalizeArcWeights(opts::UseEdgeCounts); + Cg.normalizeArcWeights(); } std::vector Clusters;