LLVM 19.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
117#include <algorithm>
118#include <cassert>
119#include <cstdint>
120#include <memory>
121#include <numeric>
122#include <optional>
123#include <string>
124#include <unordered_map>
125#include <utility>
126#include <vector>
127
128using namespace llvm;
131
132#define DEBUG_TYPE "pgo-instrumentation"
133
134STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
137STATISTIC(NumOfPGOEdge, "Number of edges.");
138STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
139STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
143STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
145STATISTIC(NumOfCSPGOSelectInsts,
146 "Number of select instruction instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOMemIntrinsics,
148 "Number of mem intrinsics instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
150STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
152STATISTIC(NumOfCSPGOFunc,
153 "Number of functions having valid profile counts in CSPGO.");
154STATISTIC(NumOfCSPGOMismatch,
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
158
159// Command line option to specify the file to read profile from. This is
160// mainly used for testing.
162 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
163 cl::value_desc("filename"),
164 cl::desc("Specify the path of profile data file. This is"
165 "mainly for test purpose."));
167 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
168 cl::value_desc("filename"),
169 cl::desc("Specify the path of profile remapping file. This is mainly for "
170 "test purpose."));
171
172// Command line option to disable value profiling. The default is false:
173// i.e. value profiling is enabled by default. This is for debug purpose.
174static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176 cl::desc("Disable Value Profiling"));
177
178// Command line option to set the maximum number of VP annotations to write to
179// the metadata for a single indirect call callsite.
181 "icp-max-annotations", cl::init(3), cl::Hidden,
182 cl::desc("Max number of annotations for a single indirect "
183 "call callsite"));
184
185// Command line option to set the maximum number of value annotations
186// to write to the metadata for a single memop intrinsic.
188 "memop-max-annotations", cl::init(4), cl::Hidden,
189 cl::desc("Max number of preicise value annotations for a single memop"
190 "intrinsic"));
191
192// Command line option to control appending FunctionHash to the name of a COMDAT
193// function. This is to avoid the hash mismatch caused by the preinliner.
195 "do-comdat-renaming", cl::init(false), cl::Hidden,
196 cl::desc("Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
198
199namespace llvm {
200// Command line option to enable/disable the warning about missing profile
201// information.
202cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204 cl::desc("Use this option to turn on/off "
205 "warnings about missing profile data for "
206 "functions."));
207
208// Command line option to enable/disable the warning about a hash mismatch in
209// the profile data.
211 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
212 cl::desc("Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
214
215// Command line option to enable/disable the warning about a hash mismatch in
216// the profile data for Comdat functions, which often turns out to be false
217// positive due to the pre-instrumentation inline.
219 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
220 cl::desc("The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
223} // namespace llvm
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
259 "pgo-instrument-entry", cl::init(false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
263 "pgo-function-entry-coverage", cl::Hidden,
264 cl::desc(
265 "Use this option to enable function entry coverage instrumentation."));
266
268 "pgo-block-coverage",
269 cl::desc("Use this option to enable basic block coverage instrumentation"));
270
271static cl::opt<bool>
272 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
273 cl::desc("Create a dot file of CFGs with block "
274 "coverage inference information"));
275
277 "pgo-temporal-instrumentation",
278 cl::desc("Use this option to enable temporal instrumentation"));
279
280static cl::opt<bool>
281 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
282 cl::desc("Fix function entry count in profile use."));
283
285 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
286 cl::desc("Print out the non-match BFI count if a hot raw profile count "
287 "becomes non-hot, or a cold raw profile count becomes hot. "
288 "The print is enabled under -Rpass-analysis=pgo, or "
289 "internal option -pass-remakrs-analysis=pgo."));
290
292 "pgo-verify-bfi", cl::init(false), cl::Hidden,
293 cl::desc("Print out mismatched BFI counts after setting profile metadata "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remakrs-analysis=pgo."));
296
298 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
299 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
300 "mismatched BFI if the difference percentage is greater than "
301 "this value (in percentage)."));
302
304 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
306 "profile count value is below."));
307
309 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
310 cl::value_desc("function name"),
311 cl::desc("Trace the hash of the function with this name."));
312
314 "pgo-function-size-threshold", cl::Hidden,
315 cl::desc("Do not instrument functions smaller than this threshold."));
316
318 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
319 cl::desc("Do not instrument functions with the number of critical edges "
320 " greater than this threshold."));
321
322namespace llvm {
323// Command line option to turn on CFG dot dump after profile annotation.
324// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
326
327// Command line option to specify the name of the function for CFG dump
328// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
330
331// Command line option to enable vtable value profiling. Defined in
332// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
335} // namespace llvm
336
338 return PGOInstrumentEntry ||
340}
341
342// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
343// profiling implicitly captures indirect call cases, but not other values.
344// Supporting other values is relatively straight-forward - just another counter
345// range within the context.
347 return DisableValueProfiling ||
349}
350
351// Return a string describing the branch condition that can be
352// used in static branch probability heuristics:
353static std::string getBranchCondString(Instruction *TI) {
354 BranchInst *BI = dyn_cast<BranchInst>(TI);
355 if (!BI || !BI->isConditional())
356 return std::string();
357
358 Value *Cond = BI->getCondition();
359 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
360 if (!CI)
361 return std::string();
362
363 std::string result;
364 raw_string_ostream OS(result);
365 OS << CI->getPredicate() << "_";
366 CI->getOperand(0)->getType()->print(OS, true);
367
368 Value *RHS = CI->getOperand(1);
369 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
370 if (CV) {
371 if (CV->isZero())
372 OS << "_Zero";
373 else if (CV->isOne())
374 OS << "_One";
375 else if (CV->isMinusOne())
376 OS << "_MinusOne";
377 else
378 OS << "_Const";
379 }
380 OS.flush();
381 return result;
382}
383
384static const char *ValueProfKindDescr[] = {
385#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
387};
388
389// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
390// aware this is an ir_level profile so it can set the version flag.
392 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
393 Type *IntTy64 = Type::getInt64Ty(M.getContext());
394 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
395 if (IsCS)
396 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
398 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
400 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
402 ProfileVersion |=
403 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
405 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
407 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
408 auto IRLevelVersionVariable = new GlobalVariable(
409 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
410 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
411 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
412 Triple TT(M.getTargetTriple());
413 if (TT.supportsCOMDAT()) {
414 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
415 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
416 }
417 return IRLevelVersionVariable;
418}
419
420namespace {
421
422/// The select instruction visitor plays three roles specified
423/// by the mode. In \c VM_counting mode, it simply counts the number of
424/// select instructions. In \c VM_instrument mode, it inserts code to count
425/// the number times TrueValue of select is taken. In \c VM_annotate mode,
426/// it reads the profile data and annotate the select instruction with metadata.
427enum VisitMode { VM_counting, VM_instrument, VM_annotate };
428class PGOUseFunc;
429
430/// Instruction Visitor class to visit select instructions.
431struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
432 Function &F;
433 unsigned NSIs = 0; // Number of select instructions instrumented.
434 VisitMode Mode = VM_counting; // Visiting mode.
435 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
436 unsigned TotalNumCtrs = 0; // Total number of counters
437 GlobalVariable *FuncNameVar = nullptr;
438 uint64_t FuncHash = 0;
439 PGOUseFunc *UseFunc = nullptr;
440 bool HasSingleByteCoverage;
441
442 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
443 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
444
445 void countSelects() {
446 NSIs = 0;
447 Mode = VM_counting;
448 visit(F);
449 }
450
451 // Visit the IR stream and instrument all select instructions. \p
452 // Ind is a pointer to the counter index variable; \p TotalNC
453 // is the total number of counters; \p FNV is the pointer to the
454 // PGO function name var; \p FHash is the function hash.
455 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
456 uint64_t FHash) {
457 Mode = VM_instrument;
458 CurCtrIdx = Ind;
459 TotalNumCtrs = TotalNC;
460 FuncHash = FHash;
461 FuncNameVar = FNV;
462 visit(F);
463 }
464
465 // Visit the IR stream and annotate all select instructions.
466 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
467 Mode = VM_annotate;
468 UseFunc = UF;
469 CurCtrIdx = Ind;
470 visit(F);
471 }
472
473 void instrumentOneSelectInst(SelectInst &SI);
474 void annotateOneSelectInst(SelectInst &SI);
475
476 // Visit \p SI instruction and perform tasks according to visit mode.
477 void visitSelectInst(SelectInst &SI);
478
479 // Return the number of select instructions. This needs be called after
480 // countSelects().
481 unsigned getNumOfSelectInsts() const { return NSIs; }
482};
483
484/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
485/// based instrumentation.
486/// Note that the CFG can be a multi-graph. So there might be multiple edges
487/// with the same SrcBB and DestBB.
488struct PGOEdge {
489 BasicBlock *SrcBB;
490 BasicBlock *DestBB;
491 uint64_t Weight;
492 bool InMST = false;
493 bool Removed = false;
494 bool IsCritical = false;
495
496 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
497 : SrcBB(Src), DestBB(Dest), Weight(W) {}
498
499 /// Return the information string of an edge.
500 std::string infoString() const {
501 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
502 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
503 .str();
504 }
505};
506
507/// This class stores the auxiliary information for each BB in the MST.
508struct PGOBBInfo {
509 PGOBBInfo *Group;
511 uint32_t Rank = 0;
512
513 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
514
515 /// Return the information string of this object.
516 std::string infoString() const {
517 return (Twine("Index=") + Twine(Index)).str();
518 }
519};
520
521// This class implements the CFG edges. Note the CFG can be a multi-graph.
522template <class Edge, class BBInfo> class FuncPGOInstrumentation {
523private:
524 Function &F;
525
526 // Is this is context-sensitive instrumentation.
527 bool IsCS;
528
529 // A map that stores the Comdat group in function F.
530 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
531
533
534 void computeCFGHash();
535 void renameComdatFunction();
536
537public:
538 const TargetLibraryInfo &TLI;
539 std::vector<std::vector<VPCandidateInfo>> ValueSites;
540 SelectInstVisitor SIVisitor;
541 std::string FuncName;
542 std::string DeprecatedFuncName;
543 GlobalVariable *FuncNameVar;
544
545 // CFG hash value for this function.
546 uint64_t FunctionHash = 0;
547
548 // The Minimum Spanning Tree of function CFG.
550
551 const std::optional<BlockCoverageInference> BCI;
552
553 static std::optional<BlockCoverageInference>
554 constructBCI(Function &Func, bool HasSingleByteCoverage,
555 bool InstrumentFuncEntry) {
556 if (HasSingleByteCoverage)
557 return BlockCoverageInference(Func, InstrumentFuncEntry);
558 return {};
559 }
560
561 // Collect all the BBs that will be instrumented, and store them in
562 // InstrumentBBs.
563 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
564
565 // Give an edge, find the BB that will be instrumented.
566 // Return nullptr if there is no BB to be instrumented.
567 BasicBlock *getInstrBB(Edge *E);
568
569 // Return the auxiliary BB information.
570 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
571
572 // Return the auxiliary BB information if available.
573 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
574
575 // Dump edges and BB information.
576 void dumpInfo(StringRef Str = "") const {
577 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
578 " Hash: " + Twine(FunctionHash) + "\t" + Str);
579 }
580
581 FuncPGOInstrumentation(
582 Function &Func, TargetLibraryInfo &TLI,
583 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
584 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
585 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
586 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
587 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
588 TLI(TLI), ValueSites(IPVK_Last + 1),
589 SIVisitor(Func, HasSingleByteCoverage),
590 MST(F, InstrumentFuncEntry, BPI, BFI),
591 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
592 if (BCI && PGOViewBlockCoverageGraph)
593 BCI->viewBlockCoverageGraph();
594 // This should be done before CFG hash computation.
595 SIVisitor.countSelects();
596 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
597 if (!IsCS) {
598 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
599 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
600 NumOfPGOBB += MST.bbInfoSize();
601 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
603 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
604 } else {
605 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
606 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
607 NumOfCSPGOBB += MST.bbInfoSize();
608 }
609
610 FuncName = getIRPGOFuncName(F);
611 DeprecatedFuncName = getPGOFuncName(F);
612 computeCFGHash();
613 if (!ComdatMembers.empty())
614 renameComdatFunction();
615 LLVM_DEBUG(dumpInfo("after CFGMST"));
616
617 for (const auto &E : MST.allEdges()) {
618 if (E->Removed)
619 continue;
620 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
621 if (!E->InMST)
622 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
623 }
624
625 if (CreateGlobalVar)
626 FuncNameVar = createPGOFuncNameVar(F, FuncName);
627 }
628};
629
630} // end anonymous namespace
631
632// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
633// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
634// of selects, indirect calls, mem ops and edges.
635template <class Edge, class BBInfo>
636void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
637 std::vector<uint8_t> Indexes;
638 JamCRC JC;
639 for (auto &BB : F) {
640 for (BasicBlock *Succ : successors(&BB)) {
641 auto BI = findBBInfo(Succ);
642 if (BI == nullptr)
643 continue;
644 uint32_t Index = BI->Index;
645 for (int J = 0; J < 4; J++)
646 Indexes.push_back((uint8_t)(Index >> (J * 8)));
647 }
648 }
649 JC.update(Indexes);
650
651 JamCRC JCH;
652 // The higher 32 bits.
653 auto updateJCH = [&JCH](uint64_t Num) {
654 uint8_t Data[8];
656 JCH.update(Data);
657 };
658 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
659 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
660 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
661 if (BCI) {
662 updateJCH(BCI->getInstrumentedBlocksHash());
663 } else {
664 updateJCH((uint64_t)MST.numEdges());
665 }
666
667 // Hash format for context sensitive profile. Reserve 4 bits for other
668 // information.
669 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
670
671 // Reserve bit 60-63 for other information purpose.
672 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
673 if (IsCS)
675 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
676 << " CRC = " << JC.getCRC()
677 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
678 << ", Edges = " << MST.numEdges() << ", ICSites = "
679 << ValueSites[IPVK_IndirectCallTarget].size()
680 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
681 << ", High32 CRC = " << JCH.getCRC()
682 << ", Hash = " << FunctionHash << "\n";);
683
684 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
685 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
686 << " in building " << F.getParent()->getSourceFileName() << "\n";
687}
688
689// Check if we can safely rename this Comdat function.
690static bool canRenameComdat(
691 Function &F,
692 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
693 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
694 return false;
695
696 // FIXME: Current only handle those Comdat groups that only containing one
697 // function.
698 // (1) For a Comdat group containing multiple functions, we need to have a
699 // unique postfix based on the hashes for each function. There is a
700 // non-trivial code refactoring to do this efficiently.
701 // (2) Variables can not be renamed, so we can not rename Comdat function in a
702 // group including global vars.
703 Comdat *C = F.getComdat();
704 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
705 assert(!isa<GlobalAlias>(CM.second));
706 Function *FM = dyn_cast<Function>(CM.second);
707 if (FM != &F)
708 return false;
709 }
710 return true;
711}
712
713// Append the CFGHash to the Comdat function name.
714template <class Edge, class BBInfo>
715void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
716 if (!canRenameComdat(F, ComdatMembers))
717 return;
718 std::string OrigName = F.getName().str();
719 std::string NewFuncName =
720 Twine(F.getName() + "." + Twine(FunctionHash)).str();
721 F.setName(Twine(NewFuncName));
723 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
724 Comdat *NewComdat;
725 Module *M = F.getParent();
726 // For AvailableExternallyLinkage functions, change the linkage to
727 // LinkOnceODR and put them into comdat. This is because after renaming, there
728 // is no backup external copy available for the function.
729 if (!F.hasComdat()) {
731 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
733 F.setComdat(NewComdat);
734 return;
735 }
736
737 // This function belongs to a single function Comdat group.
738 Comdat *OrigComdat = F.getComdat();
739 std::string NewComdatName =
740 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
741 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
742 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
743
744 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
745 // Must be a function.
746 cast<Function>(CM.second)->setComdat(NewComdat);
747 }
748}
749
750/// Collect all the BBs that will be instruments and add them to
751/// `InstrumentBBs`.
752template <class Edge, class BBInfo>
753void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
754 std::vector<BasicBlock *> &InstrumentBBs) {
755 if (BCI) {
756 for (auto &BB : F)
757 if (BCI->shouldInstrumentBlock(BB))
758 InstrumentBBs.push_back(&BB);
759 return;
760 }
761
762 // Use a worklist as we will update the vector during the iteration.
763 std::vector<Edge *> EdgeList;
764 EdgeList.reserve(MST.numEdges());
765 for (const auto &E : MST.allEdges())
766 EdgeList.push_back(E.get());
767
768 for (auto &E : EdgeList) {
769 BasicBlock *InstrBB = getInstrBB(E);
770 if (InstrBB)
771 InstrumentBBs.push_back(InstrBB);
772 }
773}
774
775// Given a CFG E to be instrumented, find which BB to place the instrumented
776// code. The function will split the critical edge if necessary.
777template <class Edge, class BBInfo>
778BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
779 if (E->InMST || E->Removed)
780 return nullptr;
781
782 BasicBlock *SrcBB = E->SrcBB;
783 BasicBlock *DestBB = E->DestBB;
784 // For a fake edge, instrument the real BB.
785 if (SrcBB == nullptr)
786 return DestBB;
787 if (DestBB == nullptr)
788 return SrcBB;
789
790 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
791 // There are basic blocks (such as catchswitch) cannot be instrumented.
792 // If the returned first insertion point is the end of BB, skip this BB.
793 if (BB->getFirstInsertionPt() == BB->end())
794 return nullptr;
795 return BB;
796 };
797
798 // Instrument the SrcBB if it has a single successor,
799 // otherwise, the DestBB if this is not a critical edge.
800 Instruction *TI = SrcBB->getTerminator();
801 if (TI->getNumSuccessors() <= 1)
802 return canInstrument(SrcBB);
803 if (!E->IsCritical)
804 return canInstrument(DestBB);
805
806 // Some IndirectBr critical edges cannot be split by the previous
807 // SplitIndirectBrCriticalEdges call. Bail out.
808 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
809 BasicBlock *InstrBB =
810 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
811 if (!InstrBB) {
813 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
814 return nullptr;
815 }
816 // For a critical edge, we have to split. Instrument the newly
817 // created BB.
818 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
819 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
820 << " --> " << getBBInfo(DestBB).Index << "\n");
821 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
822 MST.addEdge(SrcBB, InstrBB, 0);
823 // Second one: Add new edge of InstrBB->DestBB.
824 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
825 NewEdge1.InMST = true;
826 E->Removed = true;
827
828 return canInstrument(InstrBB);
829}
830
831// When generating value profiling calls on Windows routines that make use of
832// handler funclets for exception processing an operand bundle needs to attached
833// to the called function. This routine will set \p OpBundles to contain the
834// funclet information, if any is needed, that should be placed on the generated
835// value profiling call for the value profile candidate call.
836static void
840 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
841 if (!OrigCall)
842 return;
843
844 if (!isa<IntrinsicInst>(OrigCall)) {
845 // The instrumentation call should belong to the same funclet as a
846 // non-intrinsic call, so just copy the operand bundle, if any exists.
847 std::optional<OperandBundleUse> ParentFunclet =
848 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
849 if (ParentFunclet)
850 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
851 } else {
852 // Intrinsics or other instructions do not get funclet information from the
853 // front-end. Need to use the BlockColors that was computed by the routine
854 // colorEHFunclets to determine whether a funclet is needed.
855 if (!BlockColors.empty()) {
856 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
857 assert(CV.size() == 1 && "non-unique color for block!");
858 Instruction *EHPad = CV.front()->getFirstNonPHI();
859 if (EHPad->isEHPad())
860 OpBundles.emplace_back("funclet", EHPad);
861 }
862 }
863}
864
865// Visit all edge and instrument the edges not in MST, and do value profiling.
866// Critical edges will be split.
870 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
871 bool IsCS) {
872 if (!PGOBlockCoverage) {
873 // Split indirectbr critical edges here before computing the MST rather than
874 // later in getInstrBB() to avoid invalidating it.
875 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
876 }
877
878 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
879 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
881
882 auto Name = FuncInfo.FuncNameVar;
883 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
884 FuncInfo.FunctionHash);
886 auto &EntryBB = F.getEntryBlock();
887 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
888 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
889 // i32 <index>)
890 Builder.CreateCall(
891 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
892 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
893 return;
894 }
895
896 std::vector<BasicBlock *> InstrumentBBs;
897 FuncInfo.getInstrumentBBs(InstrumentBBs);
898 unsigned NumCounters =
899 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
900
902 auto *CSIntrinsic =
903 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
904 // We want to count the instrumentable callsites, then instrument them. This
905 // is because the llvm.instrprof.callsite intrinsic has an argument (like
906 // the other instrprof intrinsics) capturing the total number of
907 // instrumented objects (counters, or callsites, in this case). In this
908 // case, we want that value so we can readily pass it to the compiler-rt
909 // APIs that may have to allocate memory based on the nr of callsites.
910 // The traversal logic is the same for both counting and instrumentation,
911 // just needs to be done in succession.
912 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
913 for (auto &BB : F)
914 for (auto &Instr : BB)
915 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
916 if ((CS->getCalledFunction() &&
917 CS->getCalledFunction()->isIntrinsic()) ||
918 dyn_cast<InlineAsm>(CS->getCalledOperand()))
919 continue;
920 Visitor(CS);
921 }
922 };
923 // First, count callsites.
924 uint32_t TotalNrCallsites = 0;
925 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
926
927 // Now instrument.
928 uint32_t CallsiteIndex = 0;
929 Visit([&](auto *CB) {
930 IRBuilder<> Builder(CB);
931 Builder.CreateCall(CSIntrinsic,
932 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
933 Builder.getInt32(CallsiteIndex++),
934 CB->getCalledOperand()});
935 });
936 }
937
938 uint32_t I = 0;
940 NumCounters += PGOBlockCoverage ? 8 : 1;
941 auto &EntryBB = F.getEntryBlock();
942 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
943 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
944 // i32 <index>)
945 Builder.CreateCall(
946 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
947 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
948 I += PGOBlockCoverage ? 8 : 1;
949 }
950
951 for (auto *InstrBB : InstrumentBBs) {
952 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
953 assert(Builder.GetInsertPoint() != InstrBB->end() &&
954 "Cannot get the Instrumentation point");
955 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
956 // i32 <index>)
957 Builder.CreateCall(
959 ? Intrinsic::instrprof_cover
960 : Intrinsic::instrprof_increment),
961 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
962 }
963
964 // Now instrument select instructions:
965 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
966 FuncInfo.FunctionHash);
967 assert(I == NumCounters);
968
970 return;
971
972 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
973
974 // Intrinsic function calls do not have funclet operand bundles needed for
975 // Windows exception handling attached to them. However, if value profiling is
976 // inserted for one of these calls, then a funclet value will need to be set
977 // on the instrumentation call based on the funclet coloring.
979 if (F.hasPersonalityFn() &&
980 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
981 BlockColors = colorEHFunclets(F);
982
983 // For each VP Kind, walk the VP candidates and instrument each one.
984 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
985 unsigned SiteIndex = 0;
986 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
987 continue;
988
989 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
990 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
991 << " site: CallSite Index = " << SiteIndex << "\n");
992
993 IRBuilder<> Builder(Cand.InsertPt);
994 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
995 "Cannot get the Instrumentation point");
996
997 Value *ToProfile = nullptr;
998 if (Cand.V->getType()->isIntegerTy())
999 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1000 else if (Cand.V->getType()->isPointerTy())
1001 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1002 assert(ToProfile && "value profiling Value is of unexpected type");
1003
1005 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1006 Builder.CreateCall(
1007 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1008 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1009 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1010 OpBundles);
1011 }
1012 } // IPVK_First <= Kind <= IPVK_Last
1013}
1014
1015namespace {
1016
1017// This class represents a CFG edge in profile use compilation.
1018struct PGOUseEdge : public PGOEdge {
1019 using PGOEdge::PGOEdge;
1020
1021 std::optional<uint64_t> Count;
1022
1023 // Set edge count value
1024 void setEdgeCount(uint64_t Value) { Count = Value; }
1025
1026 // Return the information string for this object.
1027 std::string infoString() const {
1028 if (!Count)
1029 return PGOEdge::infoString();
1030 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1031 }
1032};
1033
1034using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1035
1036// This class stores the auxiliary information for each BB.
1037struct PGOUseBBInfo : public PGOBBInfo {
1038 std::optional<uint64_t> Count;
1039 int32_t UnknownCountInEdge = 0;
1040 int32_t UnknownCountOutEdge = 0;
1041 DirectEdges InEdges;
1042 DirectEdges OutEdges;
1043
1044 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1045
1046 // Set the profile count value for this BB.
1047 void setBBInfoCount(uint64_t Value) { Count = Value; }
1048
1049 // Return the information string of this object.
1050 std::string infoString() const {
1051 if (!Count)
1052 return PGOBBInfo::infoString();
1053 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1054 }
1055
1056 // Add an OutEdge and update the edge count.
1057 void addOutEdge(PGOUseEdge *E) {
1058 OutEdges.push_back(E);
1059 UnknownCountOutEdge++;
1060 }
1061
1062 // Add an InEdge and update the edge count.
1063 void addInEdge(PGOUseEdge *E) {
1064 InEdges.push_back(E);
1065 UnknownCountInEdge++;
1066 }
1067};
1068
1069} // end anonymous namespace
1070
1071// Sum up the count values for all the edges.
1073 uint64_t Total = 0;
1074 for (const auto &E : Edges) {
1075 if (E->Removed)
1076 continue;
1077 if (E->Count)
1078 Total += *E->Count;
1079 }
1080 return Total;
1081}
1082
1083namespace {
1084
1085class PGOUseFunc {
1086public:
1087 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1088 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1090 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1091 bool HasSingleByteCoverage)
1092 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1093 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1094 InstrumentFuncEntry, HasSingleByteCoverage),
1095 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1096
1097 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1098
1099 // Read counts for the instrumented BB from profile.
1100 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1102
1103 // Populate the counts for all BBs.
1104 void populateCounters();
1105
1106 // Set block coverage based on profile coverage values.
1107 void populateCoverage(IndexedInstrProfReader *PGOReader);
1108
1109 // Set the branch weights based on the count values.
1110 void setBranchWeights();
1111
1112 // Annotate the value profile call sites for all value kind.
1113 void annotateValueSites();
1114
1115 // Annotate the value profile call sites for one value kind.
1116 void annotateValueSites(uint32_t Kind);
1117
1118 // Annotate the irreducible loop header weights.
1119 void annotateIrrLoopHeaderWeights();
1120
1121 // The hotness of the function from the profile count.
1122 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1123
1124 // Return the function hotness from the profile.
1125 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1126
1127 // Return the function hash.
1128 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1129
1130 // Return the profile record for this function;
1131 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1132
1133 // Return the auxiliary BB information.
1134 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1135 return FuncInfo.getBBInfo(BB);
1136 }
1137
1138 // Return the auxiliary BB information if available.
1139 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1140 return FuncInfo.findBBInfo(BB);
1141 }
1142
1143 Function &getFunc() const { return F; }
1144
1145 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1146
1147 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1148
1149private:
1150 Function &F;
1151 Module *M;
1153 ProfileSummaryInfo *PSI;
1154
1155 // This member stores the shared information with class PGOGenFunc.
1156 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1157
1158 // The maximum count value in the profile. This is only used in PGO use
1159 // compilation.
1160 uint64_t ProgramMaxCount;
1161
1162 // Position of counter that remains to be read.
1163 uint32_t CountPosition = 0;
1164
1165 // Total size of the profile count for this function.
1166 uint32_t ProfileCountSize = 0;
1167
1168 // ProfileRecord for this function.
1169 InstrProfRecord ProfileRecord;
1170
1171 // Function hotness info derived from profile.
1172 FuncFreqAttr FreqAttr;
1173
1174 // Is to use the context sensitive profile.
1175 bool IsCS;
1176
1177 // Find the Instrumented BB and set the value. Return false on error.
1178 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1179
1180 // Set the edge counter value for the unknown edge -- there should be only
1181 // one unknown edge.
1182 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1183
1184 // Set the hot/cold inline hints based on the count values.
1185 // FIXME: This function should be removed once the functionality in
1186 // the inliner is implemented.
1187 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1188 if (PSI->isHotCount(EntryCount))
1189 FreqAttr = FFA_Hot;
1190 else if (PSI->isColdCount(MaxCount))
1191 FreqAttr = FFA_Cold;
1192 }
1193};
1194
1195} // end anonymous namespace
1196
1197/// Set up InEdges/OutEdges for all BBs in the MST.
1199 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1200 // This is not required when there is block coverage inference.
1201 if (FuncInfo.BCI)
1202 return;
1203 for (const auto &E : FuncInfo.MST.allEdges()) {
1204 if (E->Removed)
1205 continue;
1206 const BasicBlock *SrcBB = E->SrcBB;
1207 const BasicBlock *DestBB = E->DestBB;
1208 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1209 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1210 SrcInfo.addOutEdge(E.get());
1211 DestInfo.addInEdge(E.get());
1212 }
1213}
1214
1215// Visit all the edges and assign the count value for the instrumented
1216// edges and the BB. Return false on error.
1217bool PGOUseFunc::setInstrumentedCounts(
1218 const std::vector<uint64_t> &CountFromProfile) {
1219
1220 std::vector<BasicBlock *> InstrumentBBs;
1221 FuncInfo.getInstrumentBBs(InstrumentBBs);
1222
1223 setupBBInfoEdges(FuncInfo);
1224
1225 unsigned NumCounters =
1226 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1227 // The number of counters here should match the number of counters
1228 // in profile. Return if they mismatch.
1229 if (NumCounters != CountFromProfile.size()) {
1230 return false;
1231 }
1232 auto *FuncEntry = &*F.begin();
1233
1234 // Set the profile count to the Instrumented BBs.
1235 uint32_t I = 0;
1236 for (BasicBlock *InstrBB : InstrumentBBs) {
1237 uint64_t CountValue = CountFromProfile[I++];
1238 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1239 // If we reach here, we know that we have some nonzero count
1240 // values in this function. The entry count should not be 0.
1241 // Fix it if necessary.
1242 if (InstrBB == FuncEntry && CountValue == 0)
1243 CountValue = 1;
1244 Info.setBBInfoCount(CountValue);
1245 }
1246 ProfileCountSize = CountFromProfile.size();
1247 CountPosition = I;
1248
1249 // Set the edge count and update the count of unknown edges for BBs.
1250 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1251 E->setEdgeCount(Value);
1252 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1253 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1254 };
1255
1256 // Set the profile count the Instrumented edges. There are BBs that not in
1257 // MST but not instrumented. Need to set the edge count value so that we can
1258 // populate the profile counts later.
1259 for (const auto &E : FuncInfo.MST.allEdges()) {
1260 if (E->Removed || E->InMST)
1261 continue;
1262 const BasicBlock *SrcBB = E->SrcBB;
1263 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1264
1265 // If only one out-edge, the edge profile count should be the same as BB
1266 // profile count.
1267 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1268 setEdgeCount(E.get(), *SrcInfo.Count);
1269 else {
1270 const BasicBlock *DestBB = E->DestBB;
1271 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1272 // If only one in-edge, the edge profile count should be the same as BB
1273 // profile count.
1274 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1275 setEdgeCount(E.get(), *DestInfo.Count);
1276 }
1277 if (E->Count)
1278 continue;
1279 // E's count should have been set from profile. If not, this meenas E skips
1280 // the instrumentation. We set the count to 0.
1281 setEdgeCount(E.get(), 0);
1282 }
1283 return true;
1284}
1285
1286// Set the count value for the unknown edge. There should be one and only one
1287// unknown edge in Edges vector.
1288void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1289 for (auto &E : Edges) {
1290 if (E->Count)
1291 continue;
1292 E->setEdgeCount(Value);
1293
1294 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1295 getBBInfo(E->DestBB).UnknownCountInEdge--;
1296 return;
1297 }
1298 llvm_unreachable("Cannot find the unknown count edge");
1299}
1300
1301// Emit function metadata indicating PGO profile mismatch.
1303 const char MetadataName[] = "instr_prof_hash_mismatch";
1305 // If this metadata already exists, ignore.
1306 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1307 if (Existing) {
1308 MDTuple *Tuple = cast<MDTuple>(Existing);
1309 for (const auto &N : Tuple->operands()) {
1310 if (N.equalsStr(MetadataName))
1311 return;
1312 Names.push_back(N.get());
1313 }
1314 }
1315
1316 MDBuilder MDB(ctx);
1317 Names.push_back(MDB.createString(MetadataName));
1318 MDNode *MD = MDTuple::get(ctx, Names);
1319 F.setMetadata(LLVMContext::MD_annotation, MD);
1320}
1321
1322void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1323 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1324 auto &Ctx = M->getContext();
1325 auto Err = IPE.get();
1326 bool SkipWarning = false;
1327 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1328 << FuncInfo.FuncName << ": ");
1329 if (Err == instrprof_error::unknown_function) {
1330 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1331 SkipWarning = !PGOWarnMissing;
1332 LLVM_DEBUG(dbgs() << "unknown function");
1333 } else if (Err == instrprof_error::hash_mismatch ||
1334 Err == instrprof_error::malformed) {
1335 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1336 SkipWarning =
1339 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1341 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1342 << " skip=" << SkipWarning << ")");
1343 // Emit function metadata indicating PGO profile mismatch.
1344 annotateFunctionWithHashMismatch(F, M->getContext());
1345 }
1346
1347 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1348 if (SkipWarning)
1349 return;
1350
1351 std::string Msg =
1352 IPE.message() + std::string(" ") + F.getName().str() +
1353 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1354 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1355 std::string(" count discarded");
1356
1357 Ctx.diagnose(
1358 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1359 });
1360}
1361
1362// Read the profile from ProfileFileName and assign the value to the
1363// instrumented BB and the edges. This function also updates ProgramMaxCount.
1364// Return true if the profile are successfully read, and false on errors.
1365bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1367 auto &Ctx = M->getContext();
1368 uint64_t MismatchedFuncSum = 0;
1370 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1371 &MismatchedFuncSum);
1372 if (Error E = Result.takeError()) {
1373 handleInstrProfError(std::move(E), MismatchedFuncSum);
1374 return false;
1375 }
1376 ProfileRecord = std::move(Result.get());
1377 PseudoKind = ProfileRecord.getCountPseudoKind();
1378 if (PseudoKind != InstrProfRecord::NotPseudo) {
1379 return true;
1380 }
1381 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1382
1383 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1384 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1385
1386 uint64_t ValueSum = 0;
1387 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1388 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1389 ValueSum += CountFromProfile[I];
1390 }
1391 AllZeros = (ValueSum == 0);
1392
1393 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1394
1395 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1396 getBBInfo(nullptr).UnknownCountInEdge = 2;
1397
1398 if (!setInstrumentedCounts(CountFromProfile)) {
1399 LLVM_DEBUG(
1400 dbgs() << "Inconsistent number of counts, skipping this function");
1401 Ctx.diagnose(DiagnosticInfoPGOProfile(
1402 M->getName().data(),
1403 Twine("Inconsistent number of counts in ") + F.getName().str() +
1404 Twine(": the profile may be stale or there is a function name "
1405 "collision."),
1406 DS_Warning));
1407 return false;
1408 }
1409 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1410 return true;
1411}
1412
1413void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1414 uint64_t MismatchedFuncSum = 0;
1416 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1417 &MismatchedFuncSum);
1418 if (auto Err = Result.takeError()) {
1419 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1420 return;
1421 }
1422 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1423
1424 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1426 unsigned Index = 0;
1427 for (auto &BB : F)
1428 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1429 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1430 assert(Index == CountsFromProfile.size());
1431
1432 // For each B in InverseDependencies[A], if A is covered then B is covered.
1434 InverseDependencies;
1435 for (auto &BB : F) {
1436 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1437 // If Dep is covered then BB is covered.
1438 InverseDependencies[Dep].insert(&BB);
1439 }
1440 }
1441
1442 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1443 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1444 for (auto &[BB, IsCovered] : Coverage)
1445 if (IsCovered)
1446 CoveredBlocksToProcess.push(BB);
1447
1448 while (!CoveredBlocksToProcess.empty()) {
1449 auto *CoveredBlock = CoveredBlocksToProcess.top();
1450 assert(Coverage[CoveredBlock]);
1451 CoveredBlocksToProcess.pop();
1452 for (auto *BB : InverseDependencies[CoveredBlock]) {
1453 // If CoveredBlock is covered then BB is covered.
1454 if (Coverage[BB])
1455 continue;
1456 Coverage[BB] = true;
1457 CoveredBlocksToProcess.push(BB);
1458 }
1459 }
1460
1461 // Annotate block coverage.
1462 MDBuilder MDB(F.getContext());
1463 // We set the entry count to 10000 if the entry block is covered so that BFI
1464 // can propagate a fraction of this count to the other covered blocks.
1465 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1466 for (auto &BB : F) {
1467 // For a block A and its successor B, we set the edge weight as follows:
1468 // If A is covered and B is covered, set weight=1.
1469 // If A is covered and B is uncovered, set weight=0.
1470 // If A is uncovered, set weight=1.
1471 // This setup will allow BFI to give nonzero profile counts to only covered
1472 // blocks.
1474 for (auto *Succ : successors(&BB))
1475 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1476 if (Weights.size() >= 2)
1477 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1478 /*IsExpected=*/false);
1479 }
1480
1481 unsigned NumCorruptCoverage = 0;
1482 DominatorTree DT(F);
1483 LoopInfo LI(DT);
1484 BranchProbabilityInfo BPI(F, LI);
1485 BlockFrequencyInfo BFI(F, BPI, LI);
1486 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1487 if (auto C = BFI.getBlockProfileCount(&BB))
1488 return C == 0;
1489 return {};
1490 };
1491 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1492 for (auto &BB : F) {
1493 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1494 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1495 << "\n");
1496 // In some cases it is possible to find a covered block that has no covered
1497 // successors, e.g., when a block calls a function that may call exit(). In
1498 // those cases, BFI could find its successor to be covered while BCI could
1499 // find its successor to be dead.
1500 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1501 LLVM_DEBUG(
1502 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1503 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1504 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1505 ++NumCorruptCoverage;
1506 }
1507 if (Coverage[&BB])
1508 ++NumCoveredBlocks;
1509 }
1510 if (PGOVerifyBFI && NumCorruptCoverage) {
1511 auto &Ctx = M->getContext();
1512 Ctx.diagnose(DiagnosticInfoPGOProfile(
1513 M->getName().data(),
1514 Twine("Found inconsistent block coverage for function ") + F.getName() +
1515 " in " + Twine(NumCorruptCoverage) + " blocks.",
1516 DS_Warning));
1517 }
1519 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1520}
1521
1522// Populate the counters from instrumented BBs to all BBs.
1523// In the end of this operation, all BBs should have a valid count value.
1524void PGOUseFunc::populateCounters() {
1525 bool Changes = true;
1526 unsigned NumPasses = 0;
1527 while (Changes) {
1528 NumPasses++;
1529 Changes = false;
1530
1531 // For efficient traversal, it's better to start from the end as most
1532 // of the instrumented edges are at the end.
1533 for (auto &BB : reverse(F)) {
1534 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1535 if (UseBBInfo == nullptr)
1536 continue;
1537 if (!UseBBInfo->Count) {
1538 if (UseBBInfo->UnknownCountOutEdge == 0) {
1539 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1540 Changes = true;
1541 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1542 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1543 Changes = true;
1544 }
1545 }
1546 if (UseBBInfo->Count) {
1547 if (UseBBInfo->UnknownCountOutEdge == 1) {
1548 uint64_t Total = 0;
1549 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1550 // If the one of the successor block can early terminate (no-return),
1551 // we can end up with situation where out edge sum count is larger as
1552 // the source BB's count is collected by a post-dominated block.
1553 if (*UseBBInfo->Count > OutSum)
1554 Total = *UseBBInfo->Count - OutSum;
1555 setEdgeCount(UseBBInfo->OutEdges, Total);
1556 Changes = true;
1557 }
1558 if (UseBBInfo->UnknownCountInEdge == 1) {
1559 uint64_t Total = 0;
1560 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1561 if (*UseBBInfo->Count > InSum)
1562 Total = *UseBBInfo->Count - InSum;
1563 setEdgeCount(UseBBInfo->InEdges, Total);
1564 Changes = true;
1565 }
1566 }
1567 }
1568 }
1569
1570 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1571 (void)NumPasses;
1572#ifndef NDEBUG
1573 // Assert every BB has a valid counter.
1574 for (auto &BB : F) {
1575 auto BI = findBBInfo(&BB);
1576 if (BI == nullptr)
1577 continue;
1578 assert(BI->Count && "BB count is not valid");
1579 }
1580#endif
1581 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1582 uint64_t FuncMaxCount = FuncEntryCount;
1583 for (auto &BB : F) {
1584 auto BI = findBBInfo(&BB);
1585 if (BI == nullptr)
1586 continue;
1587 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1588 }
1589
1590 // Fix the obviously inconsistent entry count.
1591 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1592 FuncEntryCount = 1;
1594 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1595
1596 // Now annotate select instructions
1597 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1598 assert(CountPosition == ProfileCountSize);
1599
1600 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1601}
1602
1603// Assign the scaled count values to the BB with multiple out edges.
1604void PGOUseFunc::setBranchWeights() {
1605 // Generate MD_prof metadata for every branch instruction.
1606 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1607 << " IsCS=" << IsCS << "\n");
1608 for (auto &BB : F) {
1609 Instruction *TI = BB.getTerminator();
1610 if (TI->getNumSuccessors() < 2)
1611 continue;
1612 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1613 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1614 isa<CallBrInst>(TI)))
1615 continue;
1616
1617 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1618 if (!*BBCountInfo.Count)
1619 continue;
1620
1621 // We have a non-zero Branch BB.
1622 unsigned Size = BBCountInfo.OutEdges.size();
1623 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1624 uint64_t MaxCount = 0;
1625 for (unsigned s = 0; s < Size; s++) {
1626 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1627 const BasicBlock *SrcBB = E->SrcBB;
1628 const BasicBlock *DestBB = E->DestBB;
1629 if (DestBB == nullptr)
1630 continue;
1631 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1632 uint64_t EdgeCount = *E->Count;
1633 if (EdgeCount > MaxCount)
1634 MaxCount = EdgeCount;
1635 EdgeCounts[SuccNum] = EdgeCount;
1636 }
1637
1638 if (MaxCount)
1639 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1640 else {
1641 // A zero MaxCount can come about when we have a BB with a positive
1642 // count, and whose successor blocks all have 0 count. This can happen
1643 // when there is no exit block and the code exits via a noreturn function.
1644 auto &Ctx = M->getContext();
1645 Ctx.diagnose(DiagnosticInfoPGOProfile(
1646 M->getName().data(),
1647 Twine("Profile in ") + F.getName().str() +
1648 Twine(" partially ignored") +
1649 Twine(", possibly due to the lack of a return path."),
1650 DS_Warning));
1651 }
1652 }
1653}
1654
1656 for (BasicBlock *Pred : predecessors(BB)) {
1657 if (isa<IndirectBrInst>(Pred->getTerminator()))
1658 return true;
1659 }
1660 return false;
1661}
1662
1663void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1664 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1665 // Find irr loop headers
1666 for (auto &BB : F) {
1667 // As a heuristic also annotate indrectbr targets as they have a high chance
1668 // to become an irreducible loop header after the indirectbr tail
1669 // duplication.
1670 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1671 Instruction *TI = BB.getTerminator();
1672 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1673 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1674 }
1675 }
1676}
1677
1678void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1679 Module *M = F.getParent();
1680 IRBuilder<> Builder(&SI);
1681 Type *Int64Ty = Builder.getInt64Ty();
1682 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1683 Builder.CreateCall(
1684 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1685 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1686 Builder.getInt32(*CurCtrIdx), Step});
1687 ++(*CurCtrIdx);
1688}
1689
1690void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1691 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1692 assert(*CurCtrIdx < CountFromProfile.size() &&
1693 "Out of bound access of counters");
1694 uint64_t SCounts[2];
1695 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1696 ++(*CurCtrIdx);
1697 uint64_t TotalCount = 0;
1698 auto BI = UseFunc->findBBInfo(SI.getParent());
1699 if (BI != nullptr)
1700 TotalCount = *BI->Count;
1701 // False Count
1702 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1703 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1704 if (MaxCount)
1705 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1706}
1707
1708void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1709 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1710 return;
1711 // FIXME: do not handle this yet.
1712 if (SI.getCondition()->getType()->isVectorTy())
1713 return;
1714
1715 switch (Mode) {
1716 case VM_counting:
1717 NSIs++;
1718 return;
1719 case VM_instrument:
1720 instrumentOneSelectInst(SI);
1721 return;
1722 case VM_annotate:
1723 annotateOneSelectInst(SI);
1724 return;
1725 }
1726
1727 llvm_unreachable("Unknown visiting mode");
1728}
1729
1730// Traverse all valuesites and annotate the instructions for all value kind.
1731void PGOUseFunc::annotateValueSites() {
1733 return;
1734
1735 // Create the PGOFuncName meta data.
1736 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1737
1738 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1739 annotateValueSites(Kind);
1740}
1741
1742// Annotate the instructions for a specific value kind.
1743void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1744 assert(Kind <= IPVK_Last);
1745 unsigned ValueSiteIndex = 0;
1746 auto &ValueSites = FuncInfo.ValueSites[Kind];
1747 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1748 if (NumValueSites != ValueSites.size()) {
1749 auto &Ctx = M->getContext();
1750 Ctx.diagnose(DiagnosticInfoPGOProfile(
1751 M->getName().data(),
1752 Twine("Inconsistent number of value sites for ") +
1753 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1754 F.getName().str() +
1755 Twine("\", possibly due to the use of a stale profile."),
1756 DS_Warning));
1757 return;
1758 }
1759
1760 for (VPCandidateInfo &I : ValueSites) {
1761 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1762 << "): Index = " << ValueSiteIndex << " out of "
1763 << NumValueSites << "\n");
1764 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1765 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1766 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1768 ValueSiteIndex++;
1769 }
1770}
1771
1772// Collect the set of members for each Comdat in module M and store
1773// in ComdatMembers.
1775 Module &M,
1776 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1777 if (!DoComdatRenaming)
1778 return;
1779 for (Function &F : M)
1780 if (Comdat *C = F.getComdat())
1781 ComdatMembers.insert(std::make_pair(C, &F));
1782 for (GlobalVariable &GV : M.globals())
1783 if (Comdat *C = GV.getComdat())
1784 ComdatMembers.insert(std::make_pair(C, &GV));
1785 for (GlobalAlias &GA : M.aliases())
1786 if (Comdat *C = GA.getComdat())
1787 ComdatMembers.insert(std::make_pair(C, &GA));
1788}
1789
1790// Return true if we should not find instrumentation data for this function
1791static bool skipPGOUse(const Function &F) {
1792 if (F.isDeclaration())
1793 return true;
1794 // If there are too many critical edges, PGO might cause
1795 // compiler time problem. Skip PGO if the number of
1796 // critical edges execeed the threshold.
1797 unsigned NumCriticalEdges = 0;
1798 for (auto &BB : F) {
1799 const Instruction *TI = BB.getTerminator();
1800 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1801 if (isCriticalEdge(TI, I))
1802 NumCriticalEdges++;
1803 }
1804 }
1805 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1806 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1807 << ", NumCriticalEdges=" << NumCriticalEdges
1808 << " exceed the threshold. Skip PGO.\n");
1809 return true;
1810 }
1811 return false;
1812}
1813
1814// Return true if we should not instrument this function
1815static bool skipPGOGen(const Function &F) {
1816 if (skipPGOUse(F))
1817 return true;
1818 if (F.hasFnAttribute(llvm::Attribute::Naked))
1819 return true;
1820 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1821 return true;
1822 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1823 return true;
1824 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1825 return true;
1826 return false;
1827}
1828
1830 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1832 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1833 // For the context-sensitve instrumentation, we should have a separated pass
1834 // (before LTO/ThinLTO linking) to create these variables.
1836 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1837
1838 Triple TT(M.getTargetTriple());
1839 LLVMContext &Ctx = M.getContext();
1840 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1842 M.getName().data(),
1843 Twine("VTable value profiling is presently not "
1844 "supported for non-ELF object formats"),
1845 DS_Warning));
1846 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1847 collectComdatMembers(M, ComdatMembers);
1848
1849 for (auto &F : M) {
1850 if (skipPGOGen(F))
1851 continue;
1852 auto &TLI = LookupTLI(F);
1853 auto *BPI = LookupBPI(F);
1854 auto *BFI = LookupBFI(F);
1855 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1856 }
1857 return true;
1858}
1859
1862 createProfileFileNameVar(M, CSInstrName);
1863 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1864 // will be retained.
1869 return PA;
1870}
1871
1874 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1875 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1877 };
1878 auto LookupBPI = [&FAM](Function &F) {
1880 };
1881 auto LookupBFI = [&FAM](Function &F) {
1883 };
1884
1885 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1886 return PreservedAnalyses::all();
1887
1888 return PreservedAnalyses::none();
1889}
1890
1891// Using the ratio b/w sums of profile count values and BFI count values to
1892// adjust the func entry count.
1893static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1894 BranchProbabilityInfo &NBPI) {
1895 Function &F = Func.getFunc();
1896 BlockFrequencyInfo NBFI(F, NBPI, LI);
1897#ifndef NDEBUG
1898 auto BFIEntryCount = F.getEntryCount();
1899 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1900 "Invalid BFI Entrycount");
1901#endif
1902 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1903 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1904 for (auto &BBI : F) {
1905 uint64_t CountValue = 0;
1906 uint64_t BFICountValue = 0;
1907 if (!Func.findBBInfo(&BBI))
1908 continue;
1909 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1910 CountValue = *Func.getBBInfo(&BBI).Count;
1911 BFICountValue = *BFICount;
1912 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1913 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1914 }
1915 if (SumCount.isZero())
1916 return;
1917
1918 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1919 "Incorrect sum of BFI counts");
1920 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1921 return;
1922 double Scale = (SumCount / SumBFICount).convertToDouble();
1923 if (Scale < 1.001 && Scale > 0.999)
1924 return;
1925
1926 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1927 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1928 if (NewEntryCount == 0)
1929 NewEntryCount = 1;
1930 if (NewEntryCount != FuncEntryCount) {
1931 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1932 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1933 << ", entry_count " << FuncEntryCount << " --> "
1934 << NewEntryCount << "\n");
1935 }
1936}
1937
1938// Compare the profile count values with BFI count values, and print out
1939// the non-matching ones.
1940static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1942 uint64_t HotCountThreshold,
1944 Function &F = Func.getFunc();
1945 BlockFrequencyInfo NBFI(F, NBPI, LI);
1946 // bool PrintFunc = false;
1947 bool HotBBOnly = PGOVerifyHotBFI;
1948 StringRef Msg;
1950
1951 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1952 for (auto &BBI : F) {
1953 uint64_t CountValue = 0;
1954 uint64_t BFICountValue = 0;
1955
1956 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1957
1958 BBNum++;
1959 if (CountValue)
1960 NonZeroBBNum++;
1961 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1962 if (BFICount)
1963 BFICountValue = *BFICount;
1964
1965 if (HotBBOnly) {
1966 bool rawIsHot = CountValue >= HotCountThreshold;
1967 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1968 bool rawIsCold = CountValue <= ColdCountThreshold;
1969 bool ShowCount = false;
1970 if (rawIsHot && !BFIIsHot) {
1971 Msg = "raw-Hot to BFI-nonHot";
1972 ShowCount = true;
1973 } else if (rawIsCold && BFIIsHot) {
1974 Msg = "raw-Cold to BFI-Hot";
1975 ShowCount = true;
1976 }
1977 if (!ShowCount)
1978 continue;
1979 } else {
1980 if ((CountValue < PGOVerifyBFICutoff) &&
1981 (BFICountValue < PGOVerifyBFICutoff))
1982 continue;
1983 uint64_t Diff = (BFICountValue >= CountValue)
1984 ? BFICountValue - CountValue
1985 : CountValue - BFICountValue;
1986 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1987 continue;
1988 }
1989 BBMisMatchNum++;
1990
1991 ORE.emit([&]() {
1993 F.getSubprogram(), &BBI);
1994 Remark << "BB " << ore::NV("Block", BBI.getName())
1995 << " Count=" << ore::NV("Count", CountValue)
1996 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1997 if (!Msg.empty())
1998 Remark << " (" << Msg << ")";
1999 return Remark;
2000 });
2001 }
2002 if (BBMisMatchNum)
2003 ORE.emit([&]() {
2004 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2005 F.getSubprogram(), &F.getEntryBlock())
2006 << "In Func " << ore::NV("Function", F.getName())
2007 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2008 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2009 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2010 });
2011}
2012
2014 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2015 vfs::FileSystem &FS,
2016 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2019 ProfileSummaryInfo *PSI, bool IsCS) {
2020 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2021 auto &Ctx = M.getContext();
2022 // Read the counter array from file.
2023 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2024 ProfileRemappingFileName);
2025 if (Error E = ReaderOrErr.takeError()) {
2026 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2027 Ctx.diagnose(
2028 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2029 });
2030 return false;
2031 }
2032
2033 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2034 std::move(ReaderOrErr.get());
2035 if (!PGOReader) {
2036 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2037 StringRef("Cannot get PGOReader")));
2038 return false;
2039 }
2040 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2041 return false;
2042
2043 // TODO: might need to change the warning once the clang option is finalized.
2044 if (!PGOReader->isIRLevelProfile()) {
2045 Ctx.diagnose(DiagnosticInfoPGOProfile(
2046 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2047 return false;
2048 }
2049 if (PGOReader->functionEntryOnly()) {
2050 Ctx.diagnose(DiagnosticInfoPGOProfile(
2051 ProfileFileName.data(),
2052 "Function entry profiles are not yet supported for optimization"));
2053 return false;
2054 }
2055
2056 // Add the profile summary (read from the header of the indexed summary) here
2057 // so that we can use it below when reading counters (which checks if the
2058 // function should be marked with a cold or inlinehint attribute).
2059 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2062 PSI->refresh();
2063
2064 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2065 collectComdatMembers(M, ComdatMembers);
2066 std::vector<Function *> HotFunctions;
2067 std::vector<Function *> ColdFunctions;
2068
2069 // If the profile marked as always instrument the entry BB, do the
2070 // same. Note this can be overwritten by the internal option in CFGMST.h
2071 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2072 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2073 InstrumentFuncEntry = PGOInstrumentEntry;
2074 InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2075
2076 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2077 for (auto &F : M) {
2078 if (skipPGOUse(F))
2079 continue;
2080 auto &TLI = LookupTLI(F);
2081 auto *BPI = LookupBPI(F);
2082 auto *BFI = LookupBFI(F);
2083 if (!HasSingleByteCoverage) {
2084 // Split indirectbr critical edges here before computing the MST rather
2085 // than later in getInstrBB() to avoid invalidating it.
2086 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2087 BFI);
2088 }
2089 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2090 InstrumentFuncEntry, HasSingleByteCoverage);
2091 if (HasSingleByteCoverage) {
2092 Func.populateCoverage(PGOReader.get());
2093 continue;
2094 }
2095 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2096 // it means the profile for the function is unrepresentative and this
2097 // function is actually hot / warm. We will reset the function hot / cold
2098 // attribute and drop all the profile counters.
2100 bool AllZeros = false;
2101 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2102 continue;
2103 if (AllZeros) {
2104 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2105 if (Func.getProgramMaxCount() != 0)
2106 ColdFunctions.push_back(&F);
2107 continue;
2108 }
2109 if (PseudoKind != InstrProfRecord::NotPseudo) {
2110 // Clear function attribute cold.
2111 if (F.hasFnAttribute(Attribute::Cold))
2112 F.removeFnAttr(Attribute::Cold);
2113 // Set function attribute as hot.
2114 if (PseudoKind == InstrProfRecord::PseudoHot)
2115 F.addFnAttr(Attribute::Hot);
2116 continue;
2117 }
2118 Func.populateCounters();
2119 Func.setBranchWeights();
2120 Func.annotateValueSites();
2121 Func.annotateIrrLoopHeaderWeights();
2122 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2123 if (FreqAttr == PGOUseFunc::FFA_Cold)
2124 ColdFunctions.push_back(&F);
2125 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2126 HotFunctions.push_back(&F);
2127 if (PGOViewCounts != PGOVCT_None &&
2128 (ViewBlockFreqFuncName.empty() ||
2129 F.getName() == ViewBlockFreqFuncName)) {
2131 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2132 std::make_unique<BranchProbabilityInfo>(F, LI);
2133 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2134 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2136 NewBFI->view();
2137 else if (PGOViewCounts == PGOVCT_Text) {
2138 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2139 NewBFI->print(dbgs());
2140 }
2141 }
2143 (ViewBlockFreqFuncName.empty() ||
2144 F.getName() == ViewBlockFreqFuncName)) {
2146 if (ViewBlockFreqFuncName.empty())
2147 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2148 else
2149 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2150 else if (PGOViewRawCounts == PGOVCT_Text) {
2151 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2152 Func.dumpInfo();
2153 }
2154 }
2155
2158 BranchProbabilityInfo NBPI(F, LI);
2159
2160 // Fix func entry count.
2161 if (PGOFixEntryCount)
2162 fixFuncEntryCount(Func, LI, NBPI);
2163
2164 // Verify BlockFrequency information.
2165 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2166 if (PGOVerifyHotBFI) {
2167 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2169 }
2170 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2171 }
2172 }
2173
2174 // Set function hotness attribute from the profile.
2175 // We have to apply these attributes at the end because their presence
2176 // can affect the BranchProbabilityInfo of any callers, resulting in an
2177 // inconsistent MST between prof-gen and prof-use.
2178 for (auto &F : HotFunctions) {
2179 F->addFnAttr(Attribute::InlineHint);
2180 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2181 << "\n");
2182 }
2183 for (auto &F : ColdFunctions) {
2184 // Only set when there is no Attribute::Hot set by the user. For Hot
2185 // attribute, user's annotation has the precedence over the profile.
2186 if (F->hasFnAttribute(Attribute::Hot)) {
2187 auto &Ctx = M.getContext();
2188 std::string Msg = std::string("Function ") + F->getName().str() +
2189 std::string(" is annotated as a hot function but"
2190 " the profile is cold");
2191 Ctx.diagnose(
2192 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2193 continue;
2194 }
2195 F->addFnAttr(Attribute::Cold);
2196 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2197 << "\n");
2198 }
2199 return true;
2200}
2201
2203 std::string Filename, std::string RemappingFilename, bool IsCS,
2205 : ProfileFileName(std::move(Filename)),
2206 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2207 FS(std::move(VFS)) {
2208 if (!PGOTestProfileFile.empty())
2209 ProfileFileName = PGOTestProfileFile;
2210 if (!PGOTestProfileRemappingFile.empty())
2211 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2212 if (!FS)
2214}
2215
2218
2219 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2220 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2222 };
2223 auto LookupBPI = [&FAM](Function &F) {
2225 };
2226 auto LookupBFI = [&FAM](Function &F) {
2228 };
2229
2230 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2231
2232 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2233 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2234 return PreservedAnalyses::all();
2235
2236 return PreservedAnalyses::none();
2237}
2238
2239static std::string getSimpleNodeName(const BasicBlock *Node) {
2240 if (!Node->getName().empty())
2241 return Node->getName().str();
2242
2243 std::string SimpleNodeName;
2244 raw_string_ostream OS(SimpleNodeName);
2245 Node->printAsOperand(OS, false);
2246 return OS.str();
2247}
2248
2250 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2251 assert(MaxCount > 0 && "Bad max count");
2252 uint64_t Scale = calculateCountScale(MaxCount);
2254 for (const auto &ECI : EdgeCounts)
2255 Weights.push_back(scaleBranchCount(ECI, Scale));
2256
2257 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2258 : Weights) {
2259 dbgs() << W << " ";
2260 } dbgs() << "\n";);
2261
2262 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2263
2264 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2266 std::string BrCondStr = getBranchCondString(TI);
2267 if (BrCondStr.empty())
2268 return;
2269
2270 uint64_t WSum =
2271 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2272 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2273 uint64_t TotalCount =
2274 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2275 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2276 Scale = calculateCountScale(WSum);
2277 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2278 scaleBranchCount(WSum, Scale));
2279 std::string BranchProbStr;
2280 raw_string_ostream OS(BranchProbStr);
2281 OS << BP;
2282 OS << " (total count : " << TotalCount << ")";
2283 OS.flush();
2284 Function *F = TI->getParent()->getParent();
2286 ORE.emit([&]() {
2287 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2288 << BrCondStr << " is true with probability : " << BranchProbStr;
2289 });
2290 }
2291}
2292
2293namespace llvm {
2294
2296 MDBuilder MDB(M->getContext());
2297 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2298 MDB.createIrrLoopHeaderWeight(Count));
2299}
2300
2301template <> struct GraphTraits<PGOUseFunc *> {
2302 using NodeRef = const BasicBlock *;
2305
2306 static NodeRef getEntryNode(const PGOUseFunc *G) {
2307 return &G->getFunc().front();
2308 }
2309
2311 return succ_begin(N);
2312 }
2313
2314 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2315
2316 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2317 return nodes_iterator(G->getFunc().begin());
2318 }
2319
2320 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2321 return nodes_iterator(G->getFunc().end());
2322 }
2323};
2324
2325template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2326 explicit DOTGraphTraits(bool isSimple = false)
2328
2329 static std::string getGraphName(const PGOUseFunc *G) {
2330 return std::string(G->getFunc().getName());
2331 }
2332
2333 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2334 std::string Result;
2335 raw_string_ostream OS(Result);
2336
2337 OS << getSimpleNodeName(Node) << ":\\l";
2338 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2339 OS << "Count : ";
2340 if (BI && BI->Count)
2341 OS << *BI->Count << "\\l";
2342 else
2343 OS << "Unknown\\l";
2344
2345 if (!PGOInstrSelect)
2346 return Result;
2347
2348 for (const Instruction &I : *Node) {
2349 if (!isa<SelectInst>(&I))
2350 continue;
2351 // Display scaled counts for SELECT instruction:
2352 OS << "SELECT : { T = ";
2353 uint64_t TC, FC;
2354 bool HasProf = extractBranchWeights(I, TC, FC);
2355 if (!HasProf)
2356 OS << "Unknown, F = Unknown }\\l";
2357 else
2358 OS << TC << ", F = " << FC << " }\\l";
2359 }
2360 return Result;
2361 }
2362};
2363
2364} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:982
Class for arbitrary precision integers.
Definition: APInt.h:77
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:47
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Value * getCalledOperand() const
Definition: InstrTypes.h:1743
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:279
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:530
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:67
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:51
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:55
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:52
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:54
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:491
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:412
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:250
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:812
const BasicBlock * getParent() const
Definition: Instruction.h:152
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1474
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:203
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:374
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1404
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:363
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:462
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1272
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:267
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1464
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1487
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:272
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:827
std::vector< uint64_t > Counts
Definition: InstrProf.h:828
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:938
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1057
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1038