124#include <unordered_map>
132#define DEBUG_TYPE "pgo-instrumentation"
134STATISTIC(NumOfPGOInstrument,
"Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts,
"Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics,
"Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOSplit,
"Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc,
"Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch,
"Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing,
"Number of functions without profile.");
143STATISTIC(NumOfPGOICall,
"Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument,
"Number of edges instrumented in CSPGO.");
146 "Number of select instruction instrumented in CSPGO.");
148 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOBB,
"Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit,
"Number of critical edge splits in CSPGO.");
153 "Number of functions having valid profile counts in CSPGO.");
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing,
"Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks,
"Number of basic blocks that were executed");
164 cl::desc(
"Specify the path of profile data file. This is"
165 "mainly for test purpose."));
169 cl::desc(
"Specify the path of profile remapping file. This is mainly for "
176 cl::desc(
"Disable Value Profiling"));
182 cl::desc(
"Max number of annotations for a single indirect "
189 cl::desc(
"Max number of preicise value annotations for a single memop"
196 cl::desc(
"Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
204 cl::desc(
"Use this option to turn on/off "
205 "warnings about missing profile data for "
212 cl::desc(
"Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
220 cl::desc(
"The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
228 cl::desc(
"Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
234 cl::desc(
"A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
247 cl::desc(
"Use this option to turn on/off "
248 "memory intrinsic size profiling."));
253 cl::desc(
"When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
260 cl::desc(
"Force to instrument function entry basicblock."));
265 "Use this option to enable function entry coverage instrumentation."));
268 "pgo-block-coverage",
269 cl::desc(
"Use this option to enable basic block coverage instrumentation"));
273 cl::desc(
"Create a dot file of CFGs with block "
274 "coverage inference information"));
277 "pgo-temporal-instrumentation",
278 cl::desc(
"Use this option to enable temporal instrumentation"));
282 cl::desc(
"Fix function entry count in profile use."));
286 cl::desc(
"Print out the non-match BFI count if a hot raw profile count "
287 "becomes non-hot, or a cold raw profile count becomes hot. "
288 "The print is enabled under -Rpass-analysis=pgo, or "
289 "internal option -pass-remakrs-analysis=pgo."));
293 cl::desc(
"Print out mismatched BFI counts after setting profile metadata "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remakrs-analysis=pgo."));
299 cl::desc(
"Set the threshold for pgo-verify-bfi: only print out "
300 "mismatched BFI if the difference percentage is greater than "
301 "this value (in percentage)."));
305 cl::desc(
"Set the threshold for pgo-verify-bfi: skip the counts whose "
306 "profile count value is below."));
311 cl::desc(
"Trace the hash of the function with this name."));
315 cl::desc(
"Do not instrument functions smaller than this threshold."));
319 cl::desc(
"Do not instrument functions with the number of critical edges "
320 " greater than this threshold."));
341class FunctionInstrumenter final {
345 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
355 bool isValueProfilingDisabled()
const {
357 InstrumentationType == PGOInstrumentationType::CTXPROF;
360 bool shouldInstrumentEntryBB()
const {
362 InstrumentationType == PGOInstrumentationType::CTXPROF;
366 FunctionInstrumenter(
368 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
371 : M(M),
F(
F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
372 InstrumentationType(InstrumentationType) {}
383 return std::string();
388 return std::string();
400 else if (CV->
isOne())
412#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
421 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
423 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
424 if (InstrumentationType == PGOInstrumentationType::CSFDO)
425 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
427 InstrumentationType == PGOInstrumentationType::CTXPROF)
428 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
430 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
433 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
435 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
437 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
442 Triple TT(M.getTargetTriple());
443 if (TT.supportsCOMDAT()) {
445 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
447 return IRLevelVersionVariable;
457enum VisitMode { VM_counting, VM_instrument, VM_annotate };
461struct SelectInstVisitor :
public InstVisitor<SelectInstVisitor> {
464 VisitMode
Mode = VM_counting;
465 unsigned *CurCtrIdx =
nullptr;
466 unsigned TotalNumCtrs = 0;
469 PGOUseFunc *UseFunc =
nullptr;
470 bool HasSingleByteCoverage;
472 SelectInstVisitor(
Function &Func,
bool HasSingleByteCoverage)
473 :
F(
Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
475 void countSelects() {
485 void instrumentSelects(
unsigned *Ind,
unsigned TotalNC,
GlobalVariable *FNV,
487 Mode = VM_instrument;
489 TotalNumCtrs = TotalNC;
496 void annotateSelects(PGOUseFunc *UF,
unsigned *Ind) {
511 unsigned getNumOfSelectInsts()
const {
return NSIs; }
523 bool Removed =
false;
524 bool IsCritical =
false;
527 : SrcBB(Src), DestBB(Dest), Weight(
W) {}
530 std::string infoString()
const {
531 return (
Twine(Removed ?
"-" :
" ") + (InMST ?
" " :
"*") +
532 (IsCritical ?
"c" :
" ") +
" W=" +
Twine(Weight))
543 PGOBBInfo(
unsigned IX) : Group(this),
Index(IX) {}
546 std::string infoString()
const {
552template <
class Edge,
class BBInfo>
class FuncPGOInstrumentation {
560 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
564 void computeCFGHash();
565 void renameComdatFunction();
569 std::vector<std::vector<VPCandidateInfo>> ValueSites;
570 SelectInstVisitor SIVisitor;
571 std::string FuncName;
572 std::string DeprecatedFuncName;
581 const std::optional<BlockCoverageInference> BCI;
583 static std::optional<BlockCoverageInference>
584 constructBCI(
Function &Func,
bool HasSingleByteCoverage,
585 bool InstrumentFuncEntry) {
586 if (HasSingleByteCoverage)
593 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
606 void dumpInfo(
StringRef Str =
"")
const {
608 " Hash: " +
Twine(FunctionHash) +
"\t" + Str);
611 FuncPGOInstrumentation(
613 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
616 bool InstrumentFuncEntry =
true,
bool HasSingleByteCoverage =
false)
617 :
F(
Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(
Func, TLI),
618 TLI(TLI), ValueSites(IPVK_Last + 1),
619 SIVisitor(
Func, HasSingleByteCoverage),
620 MST(
F, InstrumentFuncEntry, BPI,
BFI),
621 BCI(constructBCI(
Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
623 BCI->viewBlockCoverageGraph();
625 SIVisitor.countSelects();
626 ValueSites[IPVK_MemOPSize] = VPC.
get(IPVK_MemOPSize);
628 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
629 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
631 ValueSites[IPVK_IndirectCallTarget] = VPC.
get(IPVK_IndirectCallTarget);
633 ValueSites[IPVK_VTableTarget] = VPC.
get(IPVK_VTableTarget);
635 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
636 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
643 if (!ComdatMembers.empty())
644 renameComdatFunction();
647 for (
const auto &E : MST.
allEdges()) {
650 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
652 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
665template <
class Edge,
class BBInfo>
666void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
667 std::vector<uint8_t> Indexes;
671 auto BI = findBBInfo(Succ);
675 for (
int J = 0; J < 4; J++)
676 Indexes.push_back((uint8_t)(
Index >> (J * 8)));
683 auto updateJCH = [&JCH](
uint64_t Num) {
688 updateJCH((
uint64_t)SIVisitor.getNumOfSelectInsts());
689 updateJCH((
uint64_t)ValueSites[IPVK_IndirectCallTarget].
size());
692 updateJCH(BCI->getInstrumentedBlocksHash());
702 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
705 LLVM_DEBUG(
dbgs() <<
"Function Hash Computation for " <<
F.getName() <<
":\n"
706 <<
" CRC = " << JC.
getCRC()
707 <<
", Selects = " << SIVisitor.getNumOfSelectInsts()
708 <<
", Edges = " << MST.
numEdges() <<
", ICSites = "
709 << ValueSites[IPVK_IndirectCallTarget].size()
710 <<
", Memops = " << ValueSites[IPVK_MemOPSize].size()
711 <<
", High32 CRC = " << JCH.
getCRC()
712 <<
", Hash = " << FunctionHash <<
"\n";);
715 dbgs() <<
"Funcname=" <<
F.getName() <<
", Hash=" << FunctionHash
716 <<
" in building " <<
F.getParent()->getSourceFileName() <<
"\n";
722 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
734 for (
auto &&CM :
make_range(ComdatMembers.equal_range(
C))) {
735 assert(!isa<GlobalAlias>(CM.second));
736 Function *FM = dyn_cast<Function>(CM.second);
744template <
class Edge,
class BBInfo>
745void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
748 std::string OrigName =
F.getName().str();
749 std::string NewFuncName =
751 F.setName(
Twine(NewFuncName));
753 FuncName =
Twine(FuncName +
"." +
Twine(FunctionHash)).
str();
759 if (!
F.hasComdat()) {
761 NewComdat =
M->getOrInsertComdat(
StringRef(NewFuncName));
763 F.setComdat(NewComdat);
768 Comdat *OrigComdat =
F.getComdat();
769 std::string NewComdatName =
771 NewComdat =
M->getOrInsertComdat(
StringRef(NewComdatName));
774 for (
auto &&CM :
make_range(ComdatMembers.equal_range(OrigComdat))) {
776 cast<Function>(CM.second)->setComdat(NewComdat);
782template <
class Edge,
class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
784 std::vector<BasicBlock *> &InstrumentBBs) {
787 if (BCI->shouldInstrumentBlock(BB))
788 InstrumentBBs.push_back(&BB);
793 std::vector<Edge *> EdgeList;
795 for (
const auto &E : MST.
allEdges())
796 EdgeList.push_back(E.get());
798 for (
auto &E : EdgeList) {
801 InstrumentBBs.push_back(InstrBB);
807template <
class Edge,
class BBInfo>
808BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
809 if (E->InMST || E->Removed)
815 if (SrcBB ==
nullptr)
817 if (DestBB ==
nullptr)
832 return canInstrument(SrcBB);
834 return canInstrument(DestBB);
843 dbgs() <<
"Fail to split critical edge: not instrument this edge.\n");
848 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
850 <<
" --> " << getBBInfo(DestBB).
Index <<
"\n");
852 MST.
addEdge(SrcBB, InstrBB, 0);
854 Edge &NewEdge1 = MST.
addEdge(InstrBB, DestBB, 0);
855 NewEdge1.InMST =
true;
858 return canInstrument(InstrBB);
874 if (!isa<IntrinsicInst>(OrigCall)) {
877 std::optional<OperandBundleUse> ParentFunclet =
885 if (!BlockColors.
empty()) {
886 const ColorVector &CV = BlockColors.
find(OrigCall->getParent())->second;
887 assert(CV.
size() == 1 &&
"non-unique color for block!");
897void FunctionInstrumenter::instrument() {
904 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
905 F, TLI, ComdatMembers,
true, BPI, BFI,
906 InstrumentationType == PGOInstrumentationType::CSFDO,
909 auto Name = FuncInfo.FuncNameVar;
915 Name, PointerType::get(
M.getContext(), 0));
917 auto &EntryBB =
F.getEntryBlock();
918 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
923 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
927 std::vector<BasicBlock *> InstrumentBBs;
928 FuncInfo.getInstrumentBBs(InstrumentBBs);
929 unsigned NumCounters =
930 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
932 if (InstrumentationType == PGOInstrumentationType::CTXPROF) {
945 for (
auto &Instr : BB)
946 if (
auto *CS = dyn_cast<CallBase>(&Instr)) {
947 if ((CS->getCalledFunction() &&
948 CS->getCalledFunction()->isIntrinsic()) ||
949 dyn_cast<InlineAsm>(CS->getCalledOperand()))
956 Visit([&TotalNrCallsites](
auto *) { ++TotalNrCallsites; });
960 Visit([&](
auto *CB) {
962 Builder.CreateCall(CSIntrinsic,
963 {
Name, CFGHash, Builder.getInt32(TotalNrCallsites),
964 Builder.getInt32(CallsiteIndex++),
965 CB->getCalledOperand()});
972 auto &EntryBB =
F.getEntryBlock();
973 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
978 {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
979 Builder.getInt32(
I)});
983 for (
auto *InstrBB : InstrumentBBs) {
985 assert(Builder.GetInsertPoint() != InstrBB->
end() &&
986 "Cannot get the Instrumentation point");
991 ? Intrinsic::instrprof_cover
992 : Intrinsic::instrprof_increment),
993 {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
994 Builder.getInt32(
I++)});
998 FuncInfo.SIVisitor.instrumentSelects(&
I, NumCounters, FuncInfo.FuncNameVar,
999 FuncInfo.FunctionHash);
1002 if (isValueProfilingDisabled())
1005 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1012 if (
F.hasPersonalityFn() &&
1018 unsigned SiteIndex = 0;
1024 <<
" site: CallSite Index = " << SiteIndex <<
"\n");
1027 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1028 "Cannot get the Instrumentation point");
1030 Value *ToProfile =
nullptr;
1031 if (Cand.V->getType()->isIntegerTy())
1032 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1033 else if (Cand.V->getType()->isPointerTy())
1034 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1035 assert(ToProfile &&
"value profiling Value is of unexpected type");
1038 Name, PointerType::get(
M.getContext(), 0));
1044 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1045 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1054struct PGOUseEdge :
public PGOEdge {
1055 using PGOEdge::PGOEdge;
1057 std::optional<uint64_t> Count;
1063 std::string infoString()
const {
1065 return PGOEdge::infoString();
1066 return (
Twine(PGOEdge::infoString()) +
" Count=" +
Twine(*Count)).str();
1073struct PGOUseBBInfo :
public PGOBBInfo {
1074 std::optional<uint64_t> Count;
1075 int32_t UnknownCountInEdge = 0;
1076 int32_t UnknownCountOutEdge = 0;
1077 DirectEdges InEdges;
1078 DirectEdges OutEdges;
1080 PGOUseBBInfo(
unsigned IX) : PGOBBInfo(IX) {}
1086 std::string infoString()
const {
1088 return PGOBBInfo::infoString();
1089 return (
Twine(PGOBBInfo::infoString()) +
" Count=" +
Twine(*Count)).str();
1093 void addOutEdge(PGOUseEdge *E) {
1094 OutEdges.push_back(E);
1095 UnknownCountOutEdge++;
1099 void addInEdge(PGOUseEdge *E) {
1100 InEdges.push_back(E);
1101 UnknownCountInEdge++;
1110 for (
const auto &E : Edges) {
1124 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1127 bool HasSingleByteCoverage)
1128 :
F(
Func),
M(Modu),
BFI(BFIin), PSI(PSI),
1129 FuncInfo(
Func, TLI, ComdatMembers,
false, BPI, BFIin, IsCS,
1130 InstrumentFuncEntry, HasSingleByteCoverage),
1131 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(
Func, TLI) {}
1133 void handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum);
1140 void populateCounters();
1149 void annotateValueSites();
1152 void annotateValueSites(
uint32_t Kind);
1155 void annotateIrrLoopHeaderWeights();
1158 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1161 FuncFreqAttr getFuncFreqAttr()
const {
return FreqAttr; }
1170 PGOUseBBInfo &getBBInfo(
const BasicBlock *BB)
const {
1171 return FuncInfo.getBBInfo(BB);
1175 PGOUseBBInfo *findBBInfo(
const BasicBlock *BB)
const {
1176 return FuncInfo.findBBInfo(BB);
1181 void dumpInfo(
StringRef Str =
"")
const { FuncInfo.dumpInfo(Str); }
1183 uint64_t getProgramMaxCount()
const {
return ProgramMaxCount; }
1192 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1208 FuncFreqAttr FreqAttr;
1216 bool setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile);
1229 FreqAttr = FFA_Cold;
1237 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1241 for (
const auto &E : FuncInfo.MST.allEdges()) {
1246 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1247 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1248 SrcInfo.addOutEdge(E.get());
1249 DestInfo.addInEdge(E.get());
1255bool PGOUseFunc::setInstrumentedCounts(
1256 const std::vector<uint64_t> &CountFromProfile) {
1258 std::vector<BasicBlock *> InstrumentBBs;
1259 FuncInfo.getInstrumentBBs(InstrumentBBs);
1263 unsigned NumCounters =
1264 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1267 if (NumCounters != CountFromProfile.size()) {
1270 auto *FuncEntry = &*
F.begin();
1275 uint64_t CountValue = CountFromProfile[
I++];
1276 PGOUseBBInfo &
Info = getBBInfo(InstrBB);
1280 if (InstrBB == FuncEntry && CountValue == 0)
1282 Info.setBBInfoCount(CountValue);
1284 ProfileCountSize = CountFromProfile.size();
1288 auto setEdgeCount = [
this](PGOUseEdge *E,
uint64_t Value) ->
void {
1289 E->setEdgeCount(
Value);
1290 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1291 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1297 for (
const auto &E : FuncInfo.MST.allEdges()) {
1298 if (E->Removed || E->InMST)
1301 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1305 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1306 setEdgeCount(E.get(), *SrcInfo.Count);
1309 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1312 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1313 setEdgeCount(E.get(), *DestInfo.Count);
1319 setEdgeCount(E.get(), 0);
1326void PGOUseFunc::setEdgeCount(DirectEdges &Edges,
uint64_t Value) {
1327 for (
auto &E : Edges) {
1330 E->setEdgeCount(
Value);
1332 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1333 getBBInfo(E->DestBB).UnknownCountInEdge--;
1341 const char MetadataName[] =
"instr_prof_hash_mismatch";
1344 auto *Existing =
F.getMetadata(LLVMContext::MD_annotation);
1346 MDTuple *Tuple = cast<MDTuple>(Existing);
1347 for (
const auto &
N : Tuple->
operands()) {
1348 if (
N.equalsStr(MetadataName))
1357 F.setMetadata(LLVMContext::MD_annotation, MD);
1360void PGOUseFunc::handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum) {
1362 auto &Ctx =
M->getContext();
1363 auto Err = IPE.
get();
1364 bool SkipWarning =
false;
1366 << FuncInfo.FuncName <<
": ");
1367 if (Err == instrprof_error::unknown_function) {
1368 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1371 }
else if (Err == instrprof_error::hash_mismatch ||
1372 Err == instrprof_error::malformed) {
1373 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1379 LLVM_DEBUG(
dbgs() <<
"hash mismatch (hash= " << FuncInfo.FunctionHash
1380 <<
" skip=" << SkipWarning <<
")");
1390 IPE.
message() + std::string(
" ") +
F.getName().str() +
1391 std::string(
" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1392 std::string(
" up to ") + std::to_string(MismatchedFuncSum) +
1393 std::string(
" count discarded");
1405 auto &Ctx =
M->getContext();
1408 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1409 &MismatchedFuncSum);
1411 handleInstrProfError(std::move(E), MismatchedFuncSum);
1414 ProfileRecord = std::move(
Result.get());
1419 std::vector<uint64_t> &CountFromProfile = ProfileRecord.
Counts;
1421 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1425 for (
unsigned I = 0, S = CountFromProfile.size();
I < S;
I++) {
1427 ValueSum += CountFromProfile[
I];
1429 AllZeros = (ValueSum == 0);
1433 getBBInfo(
nullptr).UnknownCountOutEdge = 2;
1434 getBBInfo(
nullptr).UnknownCountInEdge = 2;
1436 if (!setInstrumentedCounts(CountFromProfile)) {
1438 dbgs() <<
"Inconsistent number of counts, skipping this function");
1440 M->getName().data(),
1441 Twine(
"Inconsistent number of counts in ") +
F.getName().str() +
1442 Twine(
": the profile may be stale or there is a function name "
1454 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1455 &MismatchedFuncSum);
1456 if (
auto Err =
Result.takeError()) {
1457 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1460 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1462 std::vector<uint64_t> &CountsFromProfile =
Result.get().Counts;
1466 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1472 InverseDependencies;
1473 for (
auto &BB :
F) {
1474 for (
auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1476 InverseDependencies[Dep].
insert(&BB);
1481 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1482 for (
auto &[BB, IsCovered] : Coverage)
1484 CoveredBlocksToProcess.push(BB);
1486 while (!CoveredBlocksToProcess.empty()) {
1487 auto *CoveredBlock = CoveredBlocksToProcess.top();
1488 assert(Coverage[CoveredBlock]);
1489 CoveredBlocksToProcess.pop();
1490 for (
auto *BB : InverseDependencies[CoveredBlock]) {
1495 CoveredBlocksToProcess.push(BB);
1503 F.setEntryCount(Coverage[&
F.getEntryBlock()] ? 10000 : 0);
1504 for (
auto &BB :
F) {
1513 Weights.
push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1514 if (Weights.
size() >= 2)
1519 unsigned NumCorruptCoverage = 0;
1524 auto IsBlockDead = [&](
const BasicBlock &BB) -> std::optional<bool> {
1525 if (
auto C =
BFI.getBlockProfileCount(&BB))
1529 LLVM_DEBUG(
dbgs() <<
"Block Coverage: (Instrumented=*, Covered=X)\n");
1530 for (
auto &BB :
F) {
1531 LLVM_DEBUG(
dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ?
"* " :
" ")
1532 << (Coverage[&BB] ?
"X " :
" ") <<
" " << BB.getName()
1538 if (Coverage[&BB] == IsBlockDead(BB).value_or(
false)) {
1540 dbgs() <<
"Found inconsistent block covearge for " << BB.getName()
1541 <<
": BCI=" << (Coverage[&BB] ?
"Covered" :
"Dead") <<
" BFI="
1542 << (IsBlockDead(BB).
value() ?
"Dead" :
"Covered") <<
"\n");
1543 ++NumCorruptCoverage;
1549 auto &Ctx =
M->getContext();
1551 M->getName().data(),
1552 Twine(
"Found inconsistent block coverage for function ") +
F.getName() +
1553 " in " +
Twine(NumCorruptCoverage) +
" blocks.",
1557 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1562void PGOUseFunc::populateCounters() {
1563 bool Changes =
true;
1564 unsigned NumPasses = 0;
1572 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1573 if (UseBBInfo ==
nullptr)
1575 if (!UseBBInfo->Count) {
1576 if (UseBBInfo->UnknownCountOutEdge == 0) {
1579 }
else if (UseBBInfo->UnknownCountInEdge == 0) {
1584 if (UseBBInfo->Count) {
1585 if (UseBBInfo->UnknownCountOutEdge == 1) {
1591 if (*UseBBInfo->Count > OutSum)
1592 Total = *UseBBInfo->Count - OutSum;
1593 setEdgeCount(UseBBInfo->OutEdges,
Total);
1596 if (UseBBInfo->UnknownCountInEdge == 1) {
1599 if (*UseBBInfo->Count > InSum)
1600 Total = *UseBBInfo->Count - InSum;
1601 setEdgeCount(UseBBInfo->InEdges,
Total);
1608 LLVM_DEBUG(
dbgs() <<
"Populate counts in " << NumPasses <<
" passes.\n");
1612 for (
auto &BB :
F) {
1613 auto BI = findBBInfo(&BB);
1616 assert(BI->Count &&
"BB count is not valid");
1621 for (
auto &BB :
F) {
1622 auto BI = findBBInfo(&BB);
1625 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1635 FuncInfo.SIVisitor.annotateSelects(
this, &CountPosition);
1636 assert(CountPosition == ProfileCountSize);
1638 LLVM_DEBUG(FuncInfo.dumpInfo(
"after reading profile."));
1642void PGOUseFunc::setBranchWeights() {
1644 LLVM_DEBUG(
dbgs() <<
"\nSetting branch weights for func " <<
F.getName()
1645 <<
" IsCS=" << IsCS <<
"\n");
1646 for (
auto &BB :
F) {
1650 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1651 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1652 isa<CallBrInst>(TI)))
1655 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1656 if (!*BBCountInfo.Count)
1660 unsigned Size = BBCountInfo.OutEdges.size();
1663 for (
unsigned s = 0; s <
Size; s++) {
1664 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1667 if (DestBB ==
nullptr)
1671 if (EdgeCount > MaxCount)
1672 MaxCount = EdgeCount;
1673 EdgeCounts[SuccNum] = EdgeCount;
1682 auto &Ctx =
M->getContext();
1684 M->getName().data(),
1685 Twine(
"Profile in ") +
F.getName().str() +
1686 Twine(
" partially ignored") +
1687 Twine(
", possibly due to the lack of a return path."),
1695 if (isa<IndirectBrInst>(Pred->getTerminator()))
1701void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1702 LLVM_DEBUG(
dbgs() <<
"\nAnnotating irreducible loop header weights.\n");
1704 for (
auto &BB :
F) {
1710 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1716void SelectInstVisitor::instrumentOneSelectInst(
SelectInst &SI) {
1720 auto *Step = Builder.CreateZExt(
SI.getCondition(), Int64Ty);
1721 auto *NormalizedFuncNameVarPtr =
1723 FuncNameVar, PointerType::get(
M->getContext(), 0));
1726 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1727 Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
1731void SelectInstVisitor::annotateOneSelectInst(
SelectInst &SI) {
1732 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1733 assert(*CurCtrIdx < CountFromProfile.size() &&
1734 "Out of bound access of counters");
1736 SCounts[0] = CountFromProfile[*CurCtrIdx];
1739 auto BI = UseFunc->findBBInfo(
SI.getParent());
1741 TotalCount = *BI->Count;
1743 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1744 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1749void SelectInstVisitor::visitSelectInst(
SelectInst &SI) {
1753 if (
SI.getCondition()->getType()->isVectorTy())
1761 instrumentOneSelectInst(SI);
1764 annotateOneSelectInst(SI);
1772 if (ValueProfKind == IPVK_MemOPSize)
1774 if (ValueProfKind == llvm::IPVK_VTableTarget)
1780void PGOUseFunc::annotateValueSites() {
1788 annotateValueSites(Kind);
1792void PGOUseFunc::annotateValueSites(
uint32_t Kind) {
1793 assert(Kind <= IPVK_Last);
1794 unsigned ValueSiteIndex = 0;
1807 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1808 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1810 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.
get(IPVK_VTableTarget);
1811 auto &ValueSites = FuncInfo.ValueSites[
Kind];
1812 if (NumValueSites != ValueSites.size()) {
1813 auto &Ctx =
M->getContext();
1815 M->getName().data(),
1816 Twine(
"Inconsistent number of value sites for ") +
1819 Twine(
"\", possibly due to the use of a stale profile."),
1825 LLVM_DEBUG(
dbgs() <<
"Read one value site profile (kind = " << Kind
1826 <<
"): Index = " << ValueSiteIndex <<
" out of "
1827 << NumValueSites <<
"\n");
1829 *M, *
I.AnnotatedInst, ProfileRecord,
1840 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1845 ComdatMembers.insert(std::make_pair(
C, &
F));
1847 if (
Comdat *
C = GV.getComdat())
1848 ComdatMembers.insert(std::make_pair(
C, &GV));
1850 if (
Comdat *
C = GA.getComdat())
1851 ComdatMembers.insert(std::make_pair(
C, &GA));
1856 if (
F.isDeclaration())
1861 unsigned NumCriticalEdges = 0;
1862 for (
auto &BB :
F) {
1871 <<
", NumCriticalEdges=" << NumCriticalEdges
1872 <<
" exceed the threshold. Skip PGO.\n");
1882 if (
F.hasFnAttribute(llvm::Attribute::Naked))
1884 if (
F.hasFnAttribute(llvm::Attribute::NoProfile))
1886 if (
F.hasFnAttribute(llvm::Attribute::SkipProfile))
1900 if (InstrumentationType == PGOInstrumentationType::FDO)
1903 Triple TT(M.getTargetTriple());
1908 Twine(
"VTable value profiling is presently not "
1909 "supported for non-ELF object formats"),
1911 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1917 auto &TLI = LookupTLI(
F);
1918 auto *BPI = LookupBPI(
F);
1919 auto *BFI = LookupBFI(
F);
1920 FunctionInstrumenter FI(M,
F, TLI, ComdatMembers, BPI, BFI,
1921 InstrumentationType);
1934 if (ProfileSampling)
1956 InstrumentationType))
1969 auto BFIEntryCount =
F.getEntryCount();
1970 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1971 "Invalid BFI Entrycount");
1975 for (
auto &BBI :
F) {
1978 if (!Func.findBBInfo(&BBI))
1981 CountValue = *Func.getBBInfo(&BBI).Count;
1982 BFICountValue = *BFICount;
1986 if (SumCount.isZero())
1990 "Incorrect sum of BFI counts");
1993 double Scale = (SumCount / SumBFICount).convertToDouble();
1994 if (Scale < 1.001 && Scale > 0.999)
1999 if (NewEntryCount == 0)
2005 << NewEntryCount <<
"\n");
2022 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2023 for (
auto &BBI :
F) {
2027 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
2034 BFICountValue = *BFICount;
2037 bool rawIsHot = CountValue >= HotCountThreshold;
2038 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2040 bool ShowCount =
false;
2041 if (rawIsHot && !BFIIsHot) {
2042 Msg =
"raw-Hot to BFI-nonHot";
2044 }
else if (rawIsCold && BFIIsHot) {
2045 Msg =
"raw-Cold to BFI-Hot";
2054 uint64_t Diff = (BFICountValue >= CountValue)
2055 ? BFICountValue - CountValue
2056 : CountValue - BFICountValue;
2064 F.getSubprogram(), &BBI);
2066 <<
" Count=" <<
ore::NV(
"Count", CountValue)
2067 <<
" BFI_Count=" <<
ore::NV(
"Count", BFICountValue);
2069 Remark <<
" (" << Msg <<
")";
2076 F.getSubprogram(), &
F.getEntryBlock())
2077 <<
"In Func " <<
ore::NV(
"Function",
F.getName())
2078 <<
": Num_of_BB=" <<
ore::NV(
"Count", BBNum)
2079 <<
", Num_of_non_zerovalue_BB=" <<
ore::NV(
"Count", NonZeroBBNum)
2080 <<
", Num_of_mis_matching_BB=" <<
ore::NV(
"Count", BBMisMatchNum);
2092 auto &Ctx = M.getContext();
2095 ProfileRemappingFileName);
2096 if (
Error E = ReaderOrErr.takeError()) {
2104 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2105 std::move(ReaderOrErr.get());
2111 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2115 if (!PGOReader->isIRLevelProfile()) {
2117 ProfileFileName.
data(),
"Not an IR level instrumentation profile"));
2120 if (PGOReader->functionEntryOnly()) {
2122 ProfileFileName.
data(),
2123 "Function entry profiles are not yet supported for optimization"));
2129 if (!
G.hasName() || !
G.hasMetadata(LLVMContext::MD_type))
2140 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2145 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2147 std::vector<Function *> HotFunctions;
2148 std::vector<Function *> ColdFunctions;
2152 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2156 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2160 auto &TLI = LookupTLI(
F);
2161 auto *BPI = LookupBPI(
F);
2162 auto *BFI = LookupBFI(
F);
2163 if (!HasSingleByteCoverage) {
2169 PGOUseFunc Func(
F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2170 InstrumentFuncEntry, HasSingleByteCoverage);
2171 if (HasSingleByteCoverage) {
2172 Func.populateCoverage(PGOReader.get());
2180 bool AllZeros =
false;
2181 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2185 if (Func.getProgramMaxCount() != 0)
2186 ColdFunctions.push_back(&
F);
2191 if (
F.hasFnAttribute(Attribute::Cold))
2192 F.removeFnAttr(Attribute::Cold);
2195 F.addFnAttr(Attribute::Hot);
2198 Func.populateCounters();
2199 Func.setBranchWeights();
2200 Func.annotateValueSites();
2201 Func.annotateIrrLoopHeaderWeights();
2202 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2203 if (FreqAttr == PGOUseFunc::FFA_Cold)
2204 ColdFunctions.push_back(&
F);
2205 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2206 HotFunctions.push_back(&
F);
2211 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2212 std::make_unique<BranchProbabilityInfo>(
F, LI);
2213 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2214 std::make_unique<BlockFrequencyInfo>(
F, *NewBPI, LI);
2218 dbgs() <<
"pgo-view-counts: " << Func.getFunc().getName() <<
"\n";
2219 NewBFI->print(
dbgs());
2229 ViewGraph(&Func,
Twine(
"PGORawCounts_") + Func.getFunc().getName());
2231 dbgs() <<
"pgo-view-raw-counts: " << Func.getFunc().getName() <<
"\n";
2258 for (
auto &
F : HotFunctions) {
2259 F->addFnAttr(Attribute::InlineHint);
2260 LLVM_DEBUG(
dbgs() <<
"Set inline attribute to function: " <<
F->getName()
2263 for (
auto &
F : ColdFunctions) {
2266 if (
F->hasFnAttribute(Attribute::Hot)) {
2267 auto &Ctx = M.getContext();
2268 std::string Msg = std::string(
"Function ") +
F->getName().str() +
2269 std::string(
" is annotated as a hot function but"
2270 " the profile is cold");
2275 F->addFnAttr(Attribute::Cold);
2276 LLVM_DEBUG(
dbgs() <<
"Set cold attribute to function: " <<
F->getName()
2283 std::string Filename, std::string RemappingFilename,
bool IsCS,
2285 : ProfileFileName(
std::
move(Filename)),
2286 ProfileRemappingFileName(
std::
move(RemappingFilename)), IsCS(IsCS),
2312 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2319 if (!
Node->getName().empty())
2320 return Node->getName().str();
2322 std::string SimpleNodeName;
2325 return SimpleNodeName;
2330 assert(MaxCount > 0 &&
"Bad max count");
2333 for (
const auto &ECI : EdgeCounts)
2346 if (BrCondStr.empty())
2358 std::string BranchProbStr;
2361 OS <<
" (total count : " << TotalCount <<
")";
2367 << BrCondStr <<
" is true with probability : " << BranchProbStr;
2386 return &
G->getFunc().front();
2409 return std::string(
G->getFunc().getName());
2417 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2419 if (BI && BI->Count)
2420 OS << *BI->Count <<
"\\l";
2428 if (!isa<SelectInst>(&
I))
2431 OS <<
"SELECT : { T = ";
2435 OS <<
"Unknown, F = Unknown }\\l";
2437 OS << TC <<
", F = " << FC <<
" }\\l";
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, PGOInstrumentationType InstrumentationType)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
const std::vector< std::unique_ptr< Edge > > & allEdges() const
size_t bbInfoSize() const
BBInfo * findBBInfo(const BasicBlock *BB) const
BBInfo & getBBInfo(const BasicBlock *BB) const
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate getPredicate() const
Return the predicate for this instruction.
StringRef getName() const
void setSelectionKind(SelectionKind Val)
SelectionKind getSelectionKind() const
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class to represent profile counts.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
@ HiddenVisibility
The GV is hidden.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AvailableExternallyLinkage
Available for inspection, not emission.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Base class for instruction visitors.
void visit(Iterator Start, Iterator End)
RetTy visitSelectInst(SelectInst &I)
instrprof_error get() const
std::string message() const override
Return the error message as a string.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
void update(ArrayRef< uint8_t > Data)
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
void write64le(void *P, uint64_t V)
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
DOTGraphTraits(bool isSimple=false)
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
std::vector< uint64_t > Counts
CountPseudoKind getCountPseudoKind() const
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
static void setCSFlagInHash(uint64_t &FuncHash)
Instruction * AnnotatedInst