Files
llvm/bolt/src/YAMLProfileReader.cpp
Maksim Panchenko 8729171182 [BOLT] Refactor profile-handling code
Summary:
This diff handles several issues related to profile reading and
handling:
  * Unifies interface used by 3 profile readers in ProfileReaderBase.
  * Adds automatic detection of the profile file contents.
  * Removes reader-specific fields from BinaryFunction and BinaryData.
    All the information is stored in instruction annotations.
  * Removes implicit memory dependencies in annotations on profile
    reader instance.
  * Adds lite mode support to YAML reader.
  * Moves profile reading code out of BinaryFunction.

(cherry picked from FBD21601411)
2020-05-07 23:00:29 -07:00

416 lines
13 KiB
C++

//===-- YAMLProfileReader.cpp - BOLT YAML profile de-serializer -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "Passes/MCF.h"
#include "YAMLProfileReader.h"
#include "ProfileYAMLMapping.h"
#include "Utils.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::OptionCategory BoltOptCategory;
static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
cl::desc("ignore hash while reading function profile"),
cl::init(false),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
}
namespace llvm {
namespace bolt {
bool YAMLProfileReader::isYAML(const StringRef Filename) {
auto MB = MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError())
report_error(Filename, EC);
auto Buffer = MB.get()->getBuffer();
if (Buffer.startswith("---\n"))
return true;
return false;
}
void YAMLProfileReader::buildNameMaps(
std::map<uint64_t, BinaryFunction> &Functions) {
for (auto &YamlBF : YamlBP.Functions) {
StringRef Name = YamlBF.Name;
const auto Pos = Name.find("(*");
if (Pos != StringRef::npos)
Name = Name.substr(0, Pos);
ProfileNameToProfile[Name] = &YamlBF;
if (const auto CommonName = getLTOCommonName(Name)) {
LTOCommonNameMap[*CommonName].push_back(&YamlBF);
}
}
for (auto &BFI : Functions) {
const auto &Function = BFI.second;
for (auto Name : Function.getNames()) {
if (const auto CommonName = getLTOCommonName(Name)) {
LTOCommonNameFunctionMap[*CommonName].insert(&Function);
}
}
}
}
bool YAMLProfileReader::hasLocalsWithFileName() const {
for (const auto &KV : ProfileNameToProfile) {
const auto &FuncName = KV.getKey();
if (FuncName.count('/') == 2 && FuncName[0] != '/')
return true;
}
return false;
}
bool YAMLProfileReader::parseFunctionProfile(
BinaryFunction &BF,
const yaml::bolt::BinaryFunctionProfile &YamlBF) {
auto &BC = BF.getBinaryContext();
bool ProfileMatched = true;
uint64_t MismatchedBlocks = 0;
uint64_t MismatchedCalls = 0;
uint64_t MismatchedEdges = 0;
uint64_t FunctionExecutionCount = 0;
BF.setExecutionCount(YamlBF.ExecCount);
if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: function hash mismatch\n";
ProfileMatched = false;
}
if (YamlBF.NumBasicBlocks != BF.size()) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: number of basic blocks mismatch\n";
ProfileMatched = false;
}
auto DFSOrder = BF.dfs();
for (const auto &YamlBB : YamlBF.Blocks) {
if (YamlBB.Index >= DFSOrder.size()) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: index " << YamlBB.Index
<< " is out of bounds\n";
++MismatchedBlocks;
continue;
}
auto &BB = *DFSOrder[YamlBB.Index];
// Basic samples profile (without LBR) does not have branches information
// and needs a special processing.
if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
if (!YamlBB.EventCount) {
BB.setExecutionCount(0);
continue;
}
auto NumSamples = YamlBB.EventCount * 1000;
if (NormalizeByInsnCount && BB.getNumNonPseudos()) {
NumSamples /= BB.getNumNonPseudos();
} else if (NormalizeByCalls) {
NumSamples /= BB.getNumCalls() + 1;
}
BB.setExecutionCount(NumSamples);
if (BB.isEntryPoint())
FunctionExecutionCount += NumSamples;
continue;
}
BB.setExecutionCount(YamlBB.ExecCount);
for (const auto &YamlCSI: YamlBB.CallSites) {
auto *Callee = YamlCSI.DestId < YamlProfileToFunction.size() ?
YamlProfileToFunction[YamlCSI.DestId] : nullptr;
bool IsFunction = Callee ? true : false;
MCSymbol *CalleeSymbol = nullptr;
if (IsFunction) {
CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator);
}
BF.getAllCallSites().emplace_back(
CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds, YamlCSI.Offset);
if (YamlCSI.Offset >= BB.getOriginalSize()) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: offset " << YamlCSI.Offset
<< " out of bounds in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
auto *Instr =
BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset);
if (!Instr) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset
<< " in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) {
if (opts::Verbosity >= 2)
errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset
<< " in block " << BB.getName() << '\n';
++MismatchedCalls;
continue;
}
auto setAnnotation = [&](StringRef Name, uint64_t Count) {
if (BC.MIB->hasAnnotation(*Instr, Name)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: ignoring duplicate " << Name
<< " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset)
<< " in function " << BF << '\n';
return;
}
BC.MIB->addAnnotation(*Instr, Name, Count);
};
if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
IndirectCallSiteProfile &CSP =
BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
*Instr, "CallProfile");
CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds);
} else if (BC.MIB->getConditionalTailCall(*Instr)) {
setAnnotation("CTCTakenCount", YamlCSI.Count);
setAnnotation("CTCMispredCount", YamlCSI.Mispreds);
} else {
setAnnotation("Count", YamlCSI.Count);
}
}
for (const auto &YamlSI : YamlBB.Successors) {
if (YamlSI.Index >= DFSOrder.size()) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: index out of bounds for profiled block\n";
++MismatchedEdges;
continue;
}
auto &SuccessorBB = *DFSOrder[YamlSI.Index];
if (!BB.getSuccessor(SuccessorBB.getLabel())) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: no successor for block " << BB.getName()
<< " that matches index " << YamlSI.Index << " or block "
<< SuccessorBB.getName() << '\n';
++MismatchedEdges;
continue;
}
auto &BI = BB.getBranchInfo(SuccessorBB);
BI.Count += YamlSI.Count;
BI.MispredictedCount += YamlSI.Mispreds;
}
}
// If basic block profile wasn't read it should be 0.
for (auto &BB : BF) {
if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
BB.setExecutionCount(0);
}
if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
BF.setExecutionCount(FunctionExecutionCount);
estimateEdgeCounts(BF);
}
ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;
if (ProfileMatched)
BF.markProfiled(YamlBP.Header.Flags);
if (!ProfileMatched && opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, "
<< MismatchedCalls << " calls, and " << MismatchedEdges
<< " edges in profile did not match function " << BF << '\n';
}
return ProfileMatched;
}
Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
auto MB = MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError()) {
errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n";
return errorCodeToError(EC);
}
yaml::Input YamlInput(MB.get()->getBuffer());
// Consume YAML file.
YamlInput >> YamlBP;
if (YamlInput.error()) {
errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename
<< " : " << YamlInput.error().message() << '\n';
return errorCodeToError(YamlInput.error());
}
// Sanity check.
if (YamlBP.Header.Version != 1) {
return make_error<StringError>(
Twine("cannot read profile : unsupported version"),
inconvertibleErrorCode());
}
if (YamlBP.Header.EventNames.find(',') != StringRef::npos) {
return make_error<StringError>(
Twine("multiple events in profile are not supported"),
inconvertibleErrorCode());
}
// Match profile to function based on a function name.
buildNameMaps(BC.getBinaryFunctions());
return Error::success();
}
bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
for (StringRef Name : BF.getNames()) {
if (ProfileNameToProfile.find(Name) != ProfileNameToProfile.end())
return true;
if (const auto CommonName = getLTOCommonName(Name)) {
if (LTOCommonNameMap.find(*CommonName) != LTOCommonNameMap.end()) {
return true;
}
}
}
return false;
}
Error YAMLProfileReader::readProfile(BinaryContext &BC) {
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
BinaryFunction &BF) {
if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size())
return true;
if (!opts::IgnoreHash &&
Profile.Hash == static_cast<uint64_t>(BF.getHash()))
return true;
return false;
};
// We have to do 2 passes since LTO introduces an ambiguity in function
// names. The first pass assigns profiles that match 100% by name and
// by hash. The second pass allows name ambiguity for LTO private functions.
for (auto &BFI : BC.getBinaryFunctions()) {
auto &Function = BFI.second;
// Recompute hash once per function.
if (!opts::IgnoreHash)
Function.computeHash(/*UseDFS=*/true);
for (auto FunctionName : Function.getNames()) {
auto PI = ProfileNameToProfile.find(FunctionName);
if (PI == ProfileNameToProfile.end()) {
continue;
}
auto &YamlBF = *PI->getValue();
if (profileMatches(YamlBF, Function))
matchProfileToFunction(YamlBF, Function);
}
}
for (auto &BFI : BC.getBinaryFunctions()) {
auto &Function = BFI.second;
if (ProfiledFunctions.count(&Function))
continue;
for (auto FunctionName : Function.getNames()) {
const auto CommonName = getLTOCommonName(FunctionName);
if (CommonName) {
auto I = LTOCommonNameMap.find(*CommonName);
if (I == LTOCommonNameMap.end())
continue;
bool ProfileMatched{false};
auto &LTOProfiles = I->getValue();
for (auto *YamlBF : LTOProfiles) {
if (YamlBF->Used)
continue;
if ((ProfileMatched = profileMatches(*YamlBF, Function))) {
matchProfileToFunction(*YamlBF, Function);
break;
}
}
if (ProfileMatched)
break;
// If there's only one function with a given name, try to
// match it partially.
if (LTOProfiles.size() == 1 &&
LTOCommonNameFunctionMap[*CommonName].size() == 1 &&
!LTOProfiles.front()->Used) {
matchProfileToFunction(*LTOProfiles.front(), Function);
break;
}
} else {
auto PI = ProfileNameToProfile.find(FunctionName);
if (PI == ProfileNameToProfile.end())
continue;
auto &YamlBF = *PI->getValue();
if (!YamlBF.Used) {
matchProfileToFunction(YamlBF, Function);
break;
}
}
}
}
for (auto &YamlBF : YamlBP.Functions) {
if (!YamlBF.Used) {
errs() << "BOLT-WARNING: profile ignored for function "
<< YamlBF.Name << '\n';
}
}
// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");
uint64_t NumUnused{0};
for (auto &YamlBF : YamlBP.Functions) {
if (YamlBF.Id >= YamlProfileToFunction.size()) {
// Such profile was ignored.
++NumUnused;
continue;
}
if (auto *BF = YamlProfileToFunction[YamlBF.Id]) {
parseFunctionProfile(*BF, YamlBF);
} else {
++NumUnused;
}
}
BC.setNumUnusedProfiledObjects(NumUnused);
return Error::success();
}
bool YAMLProfileReader::usesEvent(StringRef Name) const {
return YamlBP.Header.EventNames.find(Name) != StringRef::npos;
}
} // end namespace bolt
} // end namespace llvm