LLVM 23.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
76#include "llvm/IR/Dominators.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
133
134#define DEBUG_TYPE "pgo-instrumentation"
135
136STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
137STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
138STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOEdge, "Number of edges.");
140STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
141STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
142STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
143STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
144STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
145STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
146STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOSelectInsts,
148 "Number of select instruction instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOMemIntrinsics,
150 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
152STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
153STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
154STATISTIC(NumOfCSPGOFunc,
155 "Number of functions having valid profile counts in CSPGO.");
156STATISTIC(NumOfCSPGOMismatch,
157 "Number of functions having mismatch profile in CSPGO.");
158STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
159STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
160
161// Command line option to specify the file to read profile from. This is
162// mainly used for testing.
164 "pgo-test-profile-file", cl::init(""), cl::Hidden,
165 cl::value_desc("filename"),
166 cl::desc("Specify the path of profile data file. This is "
167 "mainly for test purpose."));
169 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
170 cl::value_desc("filename"),
171 cl::desc("Specify the path of profile remapping file. This is mainly for "
172 "test purpose."));
173
174// Command line option to disable value profiling. The default is false:
175// i.e. value profiling is enabled by default. This is for debug purpose.
176static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
178 cl::desc("Disable Value Profiling"));
179
180// Command line option to set the maximum number of VP annotations to write to
181// the metadata for a single indirect call callsite.
183 "icp-max-annotations", cl::init(3), cl::Hidden,
184 cl::desc("Max number of annotations for a single indirect "
185 "call callsite"));
186
187// Command line option to set the maximum number of value annotations
188// to write to the metadata for a single memop intrinsic.
190 "memop-max-annotations", cl::init(4), cl::Hidden,
191 cl::desc("Max number of precise value annotations for a single memop"
192 "intrinsic"));
193
194// Command line option to control appending FunctionHash to the name of a COMDAT
195// function. This is to avoid the hash mismatch caused by the preinliner.
197 "do-comdat-renaming", cl::init(false), cl::Hidden,
198 cl::desc("Append function hash to the name of COMDAT function to avoid "
199 "function hash mismatch due to the preinliner"));
200
201namespace llvm {
202// Command line option to enable/disable the warning about missing profile
203// information.
204cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
206 cl::desc("Use this option to turn on/off "
207 "warnings about missing profile data for "
208 "functions."));
209
210// Command line option to enable/disable the warning about a hash mismatch in
211// the profile data.
213 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
214 cl::desc("Use this option to turn off/on "
215 "warnings about profile cfg mismatch."));
216
217// Command line option to enable/disable the warning about a hash mismatch in
218// the profile data for Comdat functions, which often turns out to be false
219// positive due to the pre-instrumentation inline.
221 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
222 cl::desc("The option is used to turn on/off "
223 "warnings about hash mismatch for comdat "
224 "or weak functions."));
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
263static cl::opt<bool>
264 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
266 cl::desc("Force to instrument loop entries."));
267
269 "pgo-function-entry-coverage", cl::Hidden,
270 cl::desc(
271 "Use this option to enable function entry coverage instrumentation."));
272
274 "pgo-block-coverage",
275 cl::desc("Use this option to enable basic block coverage instrumentation"));
276
277static cl::opt<bool>
278 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
279 cl::desc("Create a dot file of CFGs with block "
280 "coverage inference information"));
281
283 "pgo-temporal-instrumentation",
284 cl::desc("Use this option to enable temporal instrumentation"));
285
286static cl::opt<bool>
287 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
288 cl::desc("Fix function entry count in profile use."));
289
291 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out the non-match BFI count if a hot raw profile count "
293 "becomes non-hot, or a cold raw profile count becomes hot. "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remarks-analysis=pgo."));
296
298 "pgo-verify-bfi", cl::init(false), cl::Hidden,
299 cl::desc("Print out mismatched BFI counts after setting profile metadata "
300 "The print is enabled under -Rpass-analysis=pgo, or "
301 "internal option -pass-remarks-analysis=pgo."));
302
304 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
306 "mismatched BFI if the difference percentage is greater than "
307 "this value (in percentage)."));
308
310 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
311 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
312 "profile count value is below."));
313
315 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
316 cl::value_desc("function name"),
317 cl::desc("Trace the hash of the function with this name."));
318
320 "pgo-function-size-threshold", cl::Hidden,
321 cl::desc("Do not instrument functions smaller than this threshold."));
322
324 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
325 cl::desc("Do not instrument functions with the number of critical edges "
326 " greater than this threshold."));
327
329 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
330 cl::desc("For cold function instrumentation, skip instrumenting functions "
331 "whose entry count is above the given value."));
332
334 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
335 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
336 "unprofiled) functions as cold."));
337
339 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
340 cl::desc("Enable cold function only instrumentation."));
341
343 "ctx-prof-skip-callsite-instr", cl::Hidden,
344 cl::desc("Do not instrument callsites to functions in this list. Intended "
345 "for testing."));
346
348
349// Command line option to turn on CFG dot dump after profile annotation.
350// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
352
353// Command line option to specify the name of the function for CFG dump
354// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
356
357// Command line option to enable vtable value profiling. Defined in
358// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
363} // namespace llvm
364
365namespace {
366class FunctionInstrumenter final {
367 Module &M;
368 Function &F;
370 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
371 BranchProbabilityInfo *const BPI;
372 BlockFrequencyInfo *const BFI;
373 LoopInfo *const LI;
374
375 const PGOInstrumentationType InstrumentationType;
376
377 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
378 // Ctx profiling implicitly captures indirect call cases, but not other
379 // values. Supporting other values is relatively straight-forward - just
380 // another counter range within the context.
381 bool isValueProfilingDisabled() const {
382 return DisableValueProfiling ||
383 InstrumentationType == PGOInstrumentationType::CTXPROF ||
384 M.getTargetTriple().isGPU();
385 }
386
387 bool shouldInstrumentEntryBB() const {
388 return PGOInstrumentEntry ||
389 InstrumentationType == PGOInstrumentationType::CTXPROF;
390 }
391
392 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
393
394public:
395 FunctionInstrumenter(
397 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
398 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
399 LoopInfo *LI = nullptr,
401 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
402 LI(LI), InstrumentationType(InstrumentationType) {}
403
404 void instrument();
405};
406} // namespace
407
408// Return a string describing the branch condition that can be
409// used in static branch probability heuristics:
410static std::string getBranchCondString(Instruction *TI) {
412 if (!BI)
413 return std::string();
414
415 Value *Cond = BI->getCondition();
417 if (!CI)
418 return std::string();
419
420 std::string result;
421 raw_string_ostream OS(result);
422 OS << CI->getPredicate() << "_";
423 CI->getOperand(0)->getType()->print(OS, true);
424
425 Value *RHS = CI->getOperand(1);
427 if (CV) {
428 if (CV->isZero())
429 OS << "_Zero";
430 else if (CV->isOne())
431 OS << "_One";
432 else if (CV->isMinusOne())
433 OS << "_MinusOne";
434 else
435 OS << "_Const";
436 }
437 return result;
438}
439
440static const char *ValueProfKindDescr[] = {
441#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
443};
444
445// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
446// aware this is an ir_level profile so it can set the version flag.
447static GlobalVariable *
449 PGOInstrumentationType InstrumentationType) {
451 Type *IntTy64 = Type::getInt64Ty(M.getContext());
453 if (InstrumentationType == PGOInstrumentationType::CSFDO)
454 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
455 if (PGOInstrumentEntry ||
456 InstrumentationType == PGOInstrumentationType::CTXPROF)
457 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
459 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
461 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
463 ProfileVersion |=
466 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
468 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
469 auto IRLevelVersionVariable = new GlobalVariable(
470 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
471 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
472 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
473
474 Triple TT(M.getTargetTriple());
475 if (TT.supportsCOMDAT()) {
476 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
477 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
478 }
479 return IRLevelVersionVariable;
480}
481
482namespace {
483
484/// The select instruction visitor plays three roles specified
485/// by the mode. In \c VM_counting mode, it simply counts the number of
486/// select instructions. In \c VM_instrument mode, it inserts code to count
487/// the number times TrueValue of select is taken. In \c VM_annotate mode,
488/// it reads the profile data and annotate the select instruction with metadata.
489enum VisitMode { VM_counting, VM_instrument, VM_annotate };
490class PGOUseFunc;
491
492/// Instruction Visitor class to visit select instructions.
493struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
494 Function &F;
495 unsigned NSIs = 0; // Number of select instructions instrumented.
496 VisitMode Mode = VM_counting; // Visiting mode.
497 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
498 unsigned TotalNumCtrs = 0; // Total number of counters
499 GlobalValue *FuncNameVar = nullptr;
500 uint64_t FuncHash = 0;
501 PGOUseFunc *UseFunc = nullptr;
502 bool HasSingleByteCoverage;
503
504 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
505 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
506
507 void countSelects() {
508 NSIs = 0;
509 Mode = VM_counting;
510 visit(F);
511 }
512
513 // Visit the IR stream and instrument all select instructions. \p
514 // Ind is a pointer to the counter index variable; \p TotalNC
515 // is the total number of counters; \p FNV is the pointer to the
516 // PGO function name var; \p FHash is the function hash.
517 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
518 uint64_t FHash) {
519 Mode = VM_instrument;
520 CurCtrIdx = Ind;
521 TotalNumCtrs = TotalNC;
522 FuncHash = FHash;
523 FuncNameVar = FNV;
524 visit(F);
525 }
526
527 // Visit the IR stream and annotate all select instructions.
528 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
529 Mode = VM_annotate;
530 UseFunc = UF;
531 CurCtrIdx = Ind;
532 visit(F);
533 }
534
535 void instrumentOneSelectInst(SelectInst &SI);
536 void annotateOneSelectInst(SelectInst &SI);
537
538 // Visit \p SI instruction and perform tasks according to visit mode.
539 void visitSelectInst(SelectInst &SI);
540
541 // Return the number of select instructions. This needs be called after
542 // countSelects().
543 unsigned getNumOfSelectInsts() const { return NSIs; }
544};
545
546/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
547/// based instrumentation.
548/// Note that the CFG can be a multi-graph. So there might be multiple edges
549/// with the same SrcBB and DestBB.
550struct PGOEdge {
551 BasicBlock *SrcBB;
552 BasicBlock *DestBB;
553 uint64_t Weight;
554 bool InMST = false;
555 bool Removed = false;
556 bool IsCritical = false;
557
558 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
559 : SrcBB(Src), DestBB(Dest), Weight(W) {}
560
561 /// Return the information string of an edge.
562 std::string infoString() const {
563 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
564 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
565 .str();
566 }
567};
568
569/// This class stores the auxiliary information for each BB in the MST.
570struct PGOBBInfo {
571 PGOBBInfo *Group;
572 uint32_t Index;
573 uint32_t Rank = 0;
574
575 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
576
577 /// Return the information string of this object.
578 std::string infoString() const {
579 return (Twine("Index=") + Twine(Index)).str();
580 }
581};
582
583// This class implements the CFG edges. Note the CFG can be a multi-graph.
584template <class Edge, class BBInfo> class FuncPGOInstrumentation {
585private:
586 Function &F;
587
588 // Is this is context-sensitive instrumentation.
589 bool IsCS;
590
591 // A map that stores the Comdat group in function F.
592 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
593
594 ValueProfileCollector VPC;
595
596 void computeCFGHash();
597 void renameComdatFunction();
598
599public:
600 const TargetLibraryInfo &TLI;
601 std::vector<std::vector<VPCandidateInfo>> ValueSites;
602 SelectInstVisitor SIVisitor;
603 std::string FuncName;
604 std::string DeprecatedFuncName;
605 GlobalVariable *FuncNameVar;
606
607 // CFG hash value for this function.
608 uint64_t FunctionHash = 0;
609
610 // The Minimum Spanning Tree of function CFG.
611 CFGMST<Edge, BBInfo> MST;
612
613 const std::optional<BlockCoverageInference> BCI;
614
615 static std::optional<BlockCoverageInference>
616 constructBCI(Function &Func, bool HasSingleByteCoverage,
617 bool InstrumentFuncEntry) {
618 if (HasSingleByteCoverage)
619 return BlockCoverageInference(Func, InstrumentFuncEntry);
620 return {};
621 }
622
623 // Collect all the BBs that will be instrumented, and store them in
624 // InstrumentBBs.
625 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
626
627 // Give an edge, find the BB that will be instrumented.
628 // Return nullptr if there is no BB to be instrumented.
630
631 // Return the auxiliary BB information.
632 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
633
634 // Return the auxiliary BB information if available.
635 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
636
637 // Dump edges and BB information.
638 void dumpInfo(StringRef Str = "") const {
639 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
640 " Hash: " + Twine(FunctionHash) + "\t" + Str);
641 }
642
643 FuncPGOInstrumentation(
644 Function &Func, TargetLibraryInfo &TLI,
645 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
646 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
647 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
648 bool IsCS = false, bool InstrumentFuncEntry = true,
649 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
650 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
651 TLI(TLI), ValueSites(IPVK_Last + 1),
652 SIVisitor(Func, HasSingleByteCoverage),
653 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
654 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
655 if (BCI && PGOViewBlockCoverageGraph)
656 BCI->viewBlockCoverageGraph();
657 // This should be done before CFG hash computation.
658 SIVisitor.countSelects();
659 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
660 if (!IsCS) {
661 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
662 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
663 NumOfPGOBB += MST.bbInfoSize();
664 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
666 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
667 } else {
668 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
669 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
670 NumOfCSPGOBB += MST.bbInfoSize();
671 }
672
673 FuncName = getIRPGOFuncName(F);
674 DeprecatedFuncName = getPGOFuncName(F);
675 computeCFGHash();
676 if (!ComdatMembers.empty())
677 renameComdatFunction();
678 LLVM_DEBUG(dumpInfo("after CFGMST"));
679
680 for (const auto &E : MST.allEdges()) {
681 if (E->Removed)
682 continue;
683 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
684 if (!E->InMST)
685 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
686 }
687
688 if (CreateGlobalVar)
689 FuncNameVar = createPGOFuncNameVar(F, FuncName);
690 }
691};
692
693} // end anonymous namespace
694
695// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
696// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
697// of selects, indirect calls, mem ops and edges.
698template <class Edge, class BBInfo>
699void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
700 std::vector<uint8_t> Indexes;
701 JamCRC JC;
702 for (auto &BB : F) {
703 for (BasicBlock *Succ : successors(&BB)) {
704 auto BI = findBBInfo(Succ);
705 if (BI == nullptr)
706 continue;
707 uint32_t Index = BI->Index;
708 for (int J = 0; J < 4; J++)
709 Indexes.push_back((uint8_t)(Index >> (J * 8)));
710 }
711 }
712 JC.update(Indexes);
713
714 JamCRC JCH;
715 // The higher 32 bits.
716 auto updateJCH = [&JCH](uint64_t Num) {
717 uint8_t Data[8];
719 JCH.update(Data);
720 };
721 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
722 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
723 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
724 if (BCI) {
725 updateJCH(BCI->getInstrumentedBlocksHash());
726 } else {
727 updateJCH((uint64_t)MST.numEdges());
728 }
729
730 // Hash format for context sensitive profile. Reserve 4 bits for other
731 // information.
732 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
733
734 // Reserve bit 60-63 for other information purpose.
736 if (IsCS)
738 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
739 << " CRC = " << JC.getCRC()
740 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
741 << ", Edges = " << MST.numEdges() << ", ICSites = "
742 << ValueSites[IPVK_IndirectCallTarget].size()
743 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
744 << ", High32 CRC = " << JCH.getCRC()
745 << ", Hash = " << FunctionHash << "\n";);
746
747 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
748 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
749 << " in building " << F.getParent()->getSourceFileName() << "\n";
750}
751
752// Check if we can safely rename this Comdat function.
753static bool canRenameComdat(
754 Function &F,
755 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
756 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
757 return false;
758
759 // FIXME: Current only handle those Comdat groups that only containing one
760 // function.
761 // (1) For a Comdat group containing multiple functions, we need to have a
762 // unique postfix based on the hashes for each function. There is a
763 // non-trivial code refactoring to do this efficiently.
764 // (2) Variables can not be renamed, so we can not rename Comdat function in a
765 // group including global vars.
766 Comdat *C = F.getComdat();
767 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
768 assert(!isa<GlobalAlias>(CM.second));
769 Function *FM = dyn_cast<Function>(CM.second);
770 if (FM != &F)
771 return false;
772 }
773 return true;
774}
775
776// Append the CFGHash to the Comdat function name.
777template <class Edge, class BBInfo>
778void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
779 if (!canRenameComdat(F, ComdatMembers))
780 return;
781 std::string OrigName = F.getName().str();
782 std::string NewFuncName =
783 Twine(F.getName() + "." + Twine(FunctionHash)).str();
784 F.setName(Twine(NewFuncName));
786 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
787 Comdat *NewComdat;
788 Module *M = F.getParent();
789 // For AvailableExternallyLinkage functions, change the linkage to
790 // LinkOnceODR and put them into comdat. This is because after renaming, there
791 // is no backup external copy available for the function.
792 if (!F.hasComdat()) {
794 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
796 F.setComdat(NewComdat);
797 return;
798 }
799
800 // This function belongs to a single function Comdat group.
801 Comdat *OrigComdat = F.getComdat();
802 std::string NewComdatName =
803 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
804 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
805 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
806
807 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
808 // Must be a function.
809 cast<Function>(CM.second)->setComdat(NewComdat);
810 }
811}
812
813/// Collect all the BBs that will be instruments and add them to
814/// `InstrumentBBs`.
815template <class Edge, class BBInfo>
816void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
817 std::vector<BasicBlock *> &InstrumentBBs) {
818 if (BCI) {
819 for (auto &BB : F)
820 if (BCI->shouldInstrumentBlock(BB))
821 InstrumentBBs.push_back(&BB);
822 return;
823 }
824
825 // Use a worklist as we will update the vector during the iteration.
826 std::vector<Edge *> EdgeList;
827 EdgeList.reserve(MST.numEdges());
828 for (const auto &E : MST.allEdges())
829 EdgeList.push_back(E.get());
830
831 for (auto &E : EdgeList) {
832 BasicBlock *InstrBB = getInstrBB(E);
833 if (InstrBB)
834 InstrumentBBs.push_back(InstrBB);
835 }
836}
837
838// Given a CFG E to be instrumented, find which BB to place the instrumented
839// code. The function will split the critical edge if necessary.
840template <class Edge, class BBInfo>
841BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
842 if (E->InMST || E->Removed)
843 return nullptr;
844
845 BasicBlock *SrcBB = E->SrcBB;
846 BasicBlock *DestBB = E->DestBB;
847 // For a fake edge, instrument the real BB.
848 if (SrcBB == nullptr)
849 return DestBB;
850 if (DestBB == nullptr)
851 return SrcBB;
852
853 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
854 // There are basic blocks (such as catchswitch) cannot be instrumented.
855 // If the returned first insertion point is the end of BB, skip this BB.
856 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
857 return nullptr;
858 return BB;
859 };
860
861 // Instrument the SrcBB if it has a single successor,
862 // otherwise, the DestBB if this is not a critical edge.
863 Instruction *TI = SrcBB->getTerminator();
864 if (TI->getNumSuccessors() <= 1)
865 return canInstrument(SrcBB);
866 if (!E->IsCritical)
867 return canInstrument(DestBB);
868
869 // Some IndirectBr critical edges cannot be split by the previous
870 // SplitIndirectBrCriticalEdges call. Bail out.
871 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
872 BasicBlock *InstrBB =
873 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
874 if (!InstrBB) {
876 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
877 return nullptr;
878 }
879 // For a critical edge, we have to split. Instrument the newly
880 // created BB.
881 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
882 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
883 << " --> " << getBBInfo(DestBB).Index << "\n");
884 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
885 MST.addEdge(SrcBB, InstrBB, 0);
886 // Second one: Add new edge of InstrBB->DestBB.
887 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
888 NewEdge1.InMST = true;
889 E->Removed = true;
890
891 return canInstrument(InstrBB);
892}
893
894// When generating value profiling calls on Windows routines that make use of
895// handler funclets for exception processing an operand bundle needs to attached
896// to the called function. This routine will set \p OpBundles to contain the
897// funclet information, if any is needed, that should be placed on the generated
898// value profiling call for the value profile candidate call.
899static void
903 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
904 if (!OrigCall)
905 return;
906
907 if (!isa<IntrinsicInst>(OrigCall)) {
908 // The instrumentation call should belong to the same funclet as a
909 // non-intrinsic call, so just copy the operand bundle, if any exists.
910 std::optional<OperandBundleUse> ParentFunclet =
911 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
912 if (ParentFunclet)
913 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
914 } else {
915 // Intrinsics or other instructions do not get funclet information from the
916 // front-end. Need to use the BlockColors that was computed by the routine
917 // colorEHFunclets to determine whether a funclet is needed.
918 if (!BlockColors.empty()) {
919 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
920 assert(CV.size() == 1 && "non-unique color for block!");
922 if (EHPadIt->isEHPad())
923 OpBundles.emplace_back("funclet", &*EHPadIt);
924 }
925 }
926}
927
928// Visit all edge and instrument the edges not in MST, and do value profiling.
929// Critical edges will be split.
930void FunctionInstrumenter::instrument() {
931 if (!PGOBlockCoverage) {
932 // Split indirectbr critical edges here before computing the MST rather than
933 // later in getInstrBB() to avoid invalidating it.
934 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
935 }
936
937 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
938 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
939 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
940 InstrumentationType == PGOInstrumentationType::CSFDO,
941 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
943
944 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
945 auto *const CFGHash =
946 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
947 // Make sure that pointer to global is passed in with zero addrspace
948 // This is relevant during GPU profiling
949 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
950 Name, PointerType::get(M.getContext(), 0));
952 auto &EntryBB = F.getEntryBlock();
953 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
954 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
955 // i32 <index>)
956 Builder.CreateIntrinsic(
957 Intrinsic::instrprof_cover,
958 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
959 return;
960 }
961
962 std::vector<BasicBlock *> InstrumentBBs;
963 FuncInfo.getInstrumentBBs(InstrumentBBs);
964 unsigned NumCounters =
965 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
966
967 if (IsCtxProf) {
968 StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument);
969
970 auto *CSIntrinsic =
971 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
972 // We want to count the instrumentable callsites, then instrument them. This
973 // is because the llvm.instrprof.callsite intrinsic has an argument (like
974 // the other instrprof intrinsics) capturing the total number of
975 // instrumented objects (counters, or callsites, in this case). In this
976 // case, we want that value so we can readily pass it to the compiler-rt
977 // APIs that may have to allocate memory based on the nr of callsites.
978 // The traversal logic is the same for both counting and instrumentation,
979 // just needs to be done in succession.
980 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
981 for (auto &BB : F)
982 for (auto &Instr : BB)
983 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
985 continue;
986 if (CS->getCalledFunction() &&
987 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
988 continue;
989 Visitor(CS);
990 }
991 };
992 // First, count callsites.
993 uint32_t TotalNumCallsites = 0;
994 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
995
996 // Now instrument.
997 uint32_t CallsiteIndex = 0;
998 Visit([&](auto *CB) {
999 IRBuilder<> Builder(CB);
1000 Builder.CreateCall(CSIntrinsic,
1001 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1002 Builder.getInt32(CallsiteIndex++),
1003 CB->getCalledOperand()});
1004 });
1005 }
1006
1007 uint32_t I = 0;
1009 NumCounters += PGOBlockCoverage ? 8 : 1;
1010 auto &EntryBB = F.getEntryBlock();
1011 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1012 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1013 // i32 <index>)
1014 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1015 {NormalizedNamePtr, CFGHash,
1016 Builder.getInt32(NumCounters),
1017 Builder.getInt32(I)});
1018 I += PGOBlockCoverage ? 8 : 1;
1019 }
1020
1021 for (auto *InstrBB : InstrumentBBs) {
1022 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1023 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1024 "Cannot get the Instrumentation point");
1025 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1026 // i32 <index>)
1027 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1028 : Intrinsic::instrprof_increment,
1029 {NormalizedNamePtr, CFGHash,
1030 Builder.getInt32(NumCounters),
1031 Builder.getInt32(I++)});
1032 }
1033
1034 // Now instrument select instructions:
1035 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1036 FuncInfo.FunctionHash);
1037 assert(I == NumCounters);
1038
1039 if (isValueProfilingDisabled())
1040 return;
1041
1042 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1043
1044 // Intrinsic function calls do not have funclet operand bundles needed for
1045 // Windows exception handling attached to them. However, if value profiling is
1046 // inserted for one of these calls, then a funclet value will need to be set
1047 // on the instrumentation call based on the funclet coloring.
1048 DenseMap<BasicBlock *, ColorVector> BlockColors;
1049 if (F.hasPersonalityFn() &&
1050 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1051 BlockColors = colorEHFunclets(F);
1052
1053 // For each VP Kind, walk the VP candidates and instrument each one.
1054 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1055 unsigned SiteIndex = 0;
1056 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1057 continue;
1058
1059 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1060 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1061 << " site: CallSite Index = " << SiteIndex << "\n");
1062
1063 IRBuilder<> Builder(Cand.InsertPt);
1064 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1065 "Cannot get the Instrumentation point");
1066
1067 Value *ToProfile = nullptr;
1068 if (Cand.V->getType()->isIntegerTy())
1069 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1070 else if (Cand.V->getType()->isPointerTy())
1071 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1072 assert(ToProfile && "value profiling Value is of unexpected type");
1073
1074 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1075 Name, PointerType::get(M.getContext(), 0));
1076
1078 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1079 Builder.CreateCall(
1081 Intrinsic::instrprof_value_profile),
1082 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1083 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1084 OpBundles);
1085 }
1086 } // IPVK_First <= Kind <= IPVK_Last
1087}
1088
1089namespace {
1090
1091// This class represents a CFG edge in profile use compilation.
1092struct PGOUseEdge : public PGOEdge {
1093 using PGOEdge::PGOEdge;
1094
1095 std::optional<uint64_t> Count;
1096
1097 // Set edge count value
1098 void setEdgeCount(uint64_t Value) { Count = Value; }
1099
1100 // Return the information string for this object.
1101 std::string infoString() const {
1102 if (!Count)
1103 return PGOEdge::infoString();
1104 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1105 }
1106};
1107
1108using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1109
1110// This class stores the auxiliary information for each BB.
1111struct PGOUseBBInfo : public PGOBBInfo {
1112 std::optional<uint64_t> Count;
1113 int32_t UnknownCountInEdge = 0;
1114 int32_t UnknownCountOutEdge = 0;
1115 DirectEdges InEdges;
1116 DirectEdges OutEdges;
1117
1118 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1119
1120 // Set the profile count value for this BB.
1121 void setBBInfoCount(uint64_t Value) { Count = Value; }
1122
1123 // Return the information string of this object.
1124 std::string infoString() const {
1125 if (!Count)
1126 return PGOBBInfo::infoString();
1127 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1128 }
1129
1130 // Add an OutEdge and update the edge count.
1131 void addOutEdge(PGOUseEdge *E) {
1132 OutEdges.push_back(E);
1133 UnknownCountOutEdge++;
1134 }
1135
1136 // Add an InEdge and update the edge count.
1137 void addInEdge(PGOUseEdge *E) {
1138 InEdges.push_back(E);
1139 UnknownCountInEdge++;
1140 }
1141};
1142
1143} // end anonymous namespace
1144
1145// Sum up the count values for all the edges.
1147 uint64_t Total = 0;
1148 for (const auto &E : Edges) {
1149 if (E->Removed)
1150 continue;
1151 if (E->Count)
1152 Total += *E->Count;
1153 }
1154 return Total;
1155}
1156
1157namespace {
1158
1159class PGOUseFunc {
1160public:
1161 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1162 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1163 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1164 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1165 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1166 bool HasSingleByteCoverage)
1167 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1168 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1169 InstrumentFuncEntry, InstrumentLoopEntries,
1170 HasSingleByteCoverage),
1171 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1172
1173 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1174
1175 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1176 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1177 /// errors.
1178 bool getRecord(IndexedInstrProfReader *PGOReader);
1179
1180 // Read counts for the instrumented BB from profile.
1181 bool readCounters(bool &AllZeros,
1183
1184 // Populate the counts for all BBs.
1185 void populateCounters();
1186
1187 // Set block coverage based on profile coverage values.
1188 void populateCoverage();
1189
1190 // Set the branch weights based on the count values.
1191 void setBranchWeights();
1192
1193 // Annotate the value profile call sites for all value kind.
1194 void annotateValueSites();
1195
1196 // Annotate the value profile call sites for one value kind.
1197 void annotateValueSites(uint32_t Kind);
1198
1199 // Annotate the irreducible loop header weights.
1200 void annotateIrrLoopHeaderWeights();
1201
1202 // The hotness of the function from the profile count.
1203 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1204
1205 // Return the function hotness from the profile.
1206 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1207
1208 // Return the function hash.
1209 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1210
1211 // Return the profile record for this function;
1212 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1213
1214 // Return the auxiliary BB information.
1215 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1216 return FuncInfo.getBBInfo(BB);
1217 }
1218
1219 // Return the auxiliary BB information if available.
1220 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1221 return FuncInfo.findBBInfo(BB);
1222 }
1223
1224 Function &getFunc() const { return F; }
1225
1226 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1227
1228 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1229
1230private:
1231 Function &F;
1232 Module *M;
1233 BlockFrequencyInfo *BFI;
1234 ProfileSummaryInfo *PSI;
1235
1236 // This member stores the shared information with class PGOGenFunc.
1237 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1238
1239 // The maximum count value in the profile. This is only used in PGO use
1240 // compilation.
1241 uint64_t ProgramMaxCount;
1242
1243 // Position of counter that remains to be read.
1244 uint32_t CountPosition = 0;
1245
1246 // Total size of the profile count for this function.
1247 uint32_t ProfileCountSize = 0;
1248
1249 // ProfileRecord for this function.
1250 NamedInstrProfRecord ProfileRecord;
1251
1252 // Function hotness info derived from profile.
1253 FuncFreqAttr FreqAttr;
1254
1255 // Is to use the context sensitive profile.
1256 bool IsCS;
1257
1258 ValueProfileCollector VPC;
1259
1260 // Find the Instrumented BB and set the value. Return false on error.
1261 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1262
1263 // Set the edge counter value for the unknown edge -- there should be only
1264 // one unknown edge.
1265 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1266
1267 // Set the hot/cold inline hints based on the count values.
1268 // FIXME: This function should be removed once the functionality in
1269 // the inliner is implemented.
1270 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1271 if (PSI->isHotCount(EntryCount))
1272 FreqAttr = FFA_Hot;
1273 else if (PSI->isColdCount(MaxCount))
1274 FreqAttr = FFA_Cold;
1275 }
1276};
1277
1278} // end anonymous namespace
1279
1280/// Set up InEdges/OutEdges for all BBs in the MST.
1282 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1283 // This is not required when there is block coverage inference.
1284 if (FuncInfo.BCI)
1285 return;
1286 for (const auto &E : FuncInfo.MST.allEdges()) {
1287 if (E->Removed)
1288 continue;
1289 const BasicBlock *SrcBB = E->SrcBB;
1290 const BasicBlock *DestBB = E->DestBB;
1291 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1292 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1293 SrcInfo.addOutEdge(E.get());
1294 DestInfo.addInEdge(E.get());
1295 }
1296}
1297
1298// Visit all the edges and assign the count value for the instrumented
1299// edges and the BB. Return false on error.
1300bool PGOUseFunc::setInstrumentedCounts(
1301 const std::vector<uint64_t> &CountFromProfile) {
1302
1303 std::vector<BasicBlock *> InstrumentBBs;
1304 FuncInfo.getInstrumentBBs(InstrumentBBs);
1305
1306 setupBBInfoEdges(FuncInfo);
1307
1308 unsigned NumCounters =
1309 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1310 // The number of counters here should match the number of counters
1311 // in profile. Return if they mismatch.
1312 if (NumCounters != CountFromProfile.size()) {
1313 return false;
1314 }
1315 auto *FuncEntry = &*F.begin();
1316
1317 // Set the profile count to the Instrumented BBs.
1318 uint32_t I = 0;
1319 for (BasicBlock *InstrBB : InstrumentBBs) {
1320 uint64_t CountValue = CountFromProfile[I++];
1321 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1322 // If we reach here, we know that we have some nonzero count
1323 // values in this function. The entry count should not be 0.
1324 // Fix it if necessary.
1325 if (InstrBB == FuncEntry && CountValue == 0)
1326 CountValue = 1;
1327 Info.setBBInfoCount(CountValue);
1328 }
1329 ProfileCountSize = CountFromProfile.size();
1330 CountPosition = I;
1331
1332 // Set the edge count and update the count of unknown edges for BBs.
1333 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1334 E->setEdgeCount(Value);
1335 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1336 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1337 };
1338
1339 // Set the profile count the Instrumented edges. There are BBs that not in
1340 // MST but not instrumented. Need to set the edge count value so that we can
1341 // populate the profile counts later.
1342 for (const auto &E : FuncInfo.MST.allEdges()) {
1343 if (E->Removed || E->InMST)
1344 continue;
1345 const BasicBlock *SrcBB = E->SrcBB;
1346 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1347
1348 // If only one out-edge, the edge profile count should be the same as BB
1349 // profile count.
1350 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1351 setEdgeCount(E.get(), *SrcInfo.Count);
1352 else {
1353 const BasicBlock *DestBB = E->DestBB;
1354 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1355 // If only one in-edge, the edge profile count should be the same as BB
1356 // profile count.
1357 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1358 setEdgeCount(E.get(), *DestInfo.Count);
1359 }
1360 if (E->Count)
1361 continue;
1362 // E's count should have been set from profile. If not, this meenas E skips
1363 // the instrumentation. We set the count to 0.
1364 setEdgeCount(E.get(), 0);
1365 }
1366 return true;
1367}
1368
1369// Set the count value for the unknown edge. There should be one and only one
1370// unknown edge in Edges vector.
1371void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1372 for (auto &E : Edges) {
1373 if (E->Count)
1374 continue;
1375 E->setEdgeCount(Value);
1376
1377 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1378 getBBInfo(E->DestBB).UnknownCountInEdge--;
1379 return;
1380 }
1381 llvm_unreachable("Cannot find the unknown count edge");
1382}
1383
1384// Emit function metadata indicating PGO profile mismatch.
1386 const char MetadataName[] = "instr_prof_hash_mismatch";
1388 // If this metadata already exists, ignore.
1389 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1390 if (Existing) {
1391 MDTuple *Tuple = cast<MDTuple>(Existing);
1392 for (const auto &N : Tuple->operands()) {
1393 if (N.equalsStr(MetadataName))
1394 return;
1395 Names.push_back(N.get());
1396 }
1397 }
1398
1399 MDBuilder MDB(ctx);
1400 Names.push_back(MDB.createString(MetadataName));
1401 MDNode *MD = MDTuple::get(ctx, Names);
1402 F.setMetadata(LLVMContext::MD_annotation, MD);
1403}
1404
1405void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1406 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1407 auto &Ctx = M->getContext();
1408 auto Err = IPE.get();
1409 bool SkipWarning = false;
1410 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1411 << FuncInfo.FuncName << ": ");
1412 if (Err == instrprof_error::unknown_function) {
1413 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1414 SkipWarning = !PGOWarnMissing;
1415 LLVM_DEBUG(dbgs() << "unknown function");
1416 } else if (Err == instrprof_error::hash_mismatch ||
1417 Err == instrprof_error::malformed) {
1418 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1419 SkipWarning =
1422 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1424 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1425 << " skip=" << SkipWarning << ")");
1426 // Emit function metadata indicating PGO profile mismatch.
1427 annotateFunctionWithHashMismatch(F, M->getContext());
1428 }
1429
1430 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1431 if (SkipWarning)
1432 return;
1433
1434 std::string Msg =
1435 IPE.message() + std::string(" ") + F.getName().str() +
1436 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1437 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1438 std::string(" count discarded");
1439
1440 Ctx.diagnose(
1441 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1442 });
1443}
1444
1445bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1446 uint64_t MismatchedFuncSum = 0;
1447 auto Result = PGOReader->getInstrProfRecord(
1448 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1449 &MismatchedFuncSum);
1450 if (Error E = Result.takeError()) {
1451 handleInstrProfError(std::move(E), MismatchedFuncSum);
1452 return false;
1453 }
1454 ProfileRecord = std::move(Result.get());
1455 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1456 return true;
1457}
1458
1459// Read the profile from ProfileFileName and assign the value to the
1460// instrumented BB and the edges. Return true if the profile are successfully
1461// read, and false on errors.
1462bool PGOUseFunc::readCounters(bool &AllZeros,
1464 auto &Ctx = M->getContext();
1465 PseudoKind = ProfileRecord.getCountPseudoKind();
1466 if (PseudoKind != InstrProfRecord::NotPseudo) {
1467 return true;
1468 }
1469 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1470
1471 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1472 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1473
1474 uint64_t ValueSum = 0;
1475 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1476 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1477 ValueSum += CountFromProfile[I];
1478 }
1479 AllZeros = (ValueSum == 0);
1480
1481 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1482
1483 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1484 getBBInfo(nullptr).UnknownCountInEdge = 2;
1485
1486 if (!setInstrumentedCounts(CountFromProfile)) {
1487 LLVM_DEBUG(
1488 dbgs() << "Inconsistent number of counts, skipping this function");
1489 Ctx.diagnose(DiagnosticInfoPGOProfile(
1490 M->getName().data(),
1491 Twine("Inconsistent number of counts in ") + F.getName().str() +
1492 Twine(": the profile may be stale or there is a function name "
1493 "collision."),
1494 DS_Warning));
1495 return false;
1496 }
1497 return true;
1498}
1499
1500void PGOUseFunc::populateCoverage() {
1501 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1502
1503 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1504 DenseMap<const BasicBlock *, bool> Coverage;
1505 unsigned Index = 0;
1506 for (auto &BB : F)
1507 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1508 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1509 assert(Index == CountsFromProfile.size());
1510
1511 // For each B in InverseDependencies[A], if A is covered then B is covered.
1512 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1513 InverseDependencies;
1514 for (auto &BB : F) {
1515 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1516 // If Dep is covered then BB is covered.
1517 InverseDependencies[Dep].insert(&BB);
1518 }
1519 }
1520
1521 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1522 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1523 for (auto &[BB, IsCovered] : Coverage)
1524 if (IsCovered)
1525 CoveredBlocksToProcess.push(BB);
1526
1527 while (!CoveredBlocksToProcess.empty()) {
1528 auto *CoveredBlock = CoveredBlocksToProcess.top();
1529 assert(Coverage[CoveredBlock]);
1530 CoveredBlocksToProcess.pop();
1531 for (auto *BB : InverseDependencies[CoveredBlock]) {
1532 // If CoveredBlock is covered then BB is covered.
1533 bool &Cov = Coverage[BB];
1534 if (Cov)
1535 continue;
1536 Cov = true;
1537 CoveredBlocksToProcess.push(BB);
1538 }
1539 }
1540
1541 // Annotate block coverage.
1542 MDBuilder MDB(F.getContext());
1543 // We set the entry count to 10000 if the entry block is covered so that BFI
1544 // can propagate a fraction of this count to the other covered blocks.
1545 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1546 for (auto &BB : F) {
1547 // For a block A and its successor B, we set the edge weight as follows:
1548 // If A is covered and B is covered, set weight=1.
1549 // If A is covered and B is uncovered, set weight=0.
1550 // If A is uncovered, set weight=1.
1551 // This setup will allow BFI to give nonzero profile counts to only covered
1552 // blocks.
1553 SmallVector<uint32_t, 4> Weights;
1554 for (auto *Succ : successors(&BB))
1555 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1556 if (Weights.size() >= 2)
1557 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1558 /*IsExpected=*/false);
1559 }
1560
1561 unsigned NumCorruptCoverage = 0;
1562 DominatorTree DT(F);
1563 LoopInfo LI(DT);
1564 BranchProbabilityInfo BPI(F, LI);
1565 BlockFrequencyInfo BFI(F, BPI, LI);
1566 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1567 if (auto C = BFI.getBlockProfileCount(&BB))
1568 return C == 0;
1569 return {};
1570 };
1571 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1572 for (auto &BB : F) {
1573 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1574 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1575 << "\n");
1576 // In some cases it is possible to find a covered block that has no covered
1577 // successors, e.g., when a block calls a function that may call exit(). In
1578 // those cases, BFI could find its successor to be covered while BCI could
1579 // find its successor to be dead.
1580 const bool &Cov = Coverage[&BB];
1581 if (Cov == IsBlockDead(BB).value_or(false)) {
1582 LLVM_DEBUG(
1583 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1584 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1585 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1586 ++NumCorruptCoverage;
1587 }
1588 if (Cov)
1589 ++NumCoveredBlocks;
1590 }
1591 if (PGOVerifyBFI && NumCorruptCoverage) {
1592 auto &Ctx = M->getContext();
1593 Ctx.diagnose(DiagnosticInfoPGOProfile(
1594 M->getName().data(),
1595 Twine("Found inconsistent block coverage for function ") + F.getName() +
1596 " in " + Twine(NumCorruptCoverage) + " blocks.",
1597 DS_Warning));
1598 }
1600 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1601}
1602
1603// Populate the counters from instrumented BBs to all BBs.
1604// In the end of this operation, all BBs should have a valid count value.
1605void PGOUseFunc::populateCounters() {
1606 bool Changes = true;
1607 unsigned NumPasses = 0;
1608 while (Changes) {
1609 NumPasses++;
1610 Changes = false;
1611
1612 // For efficient traversal, it's better to start from the end as most
1613 // of the instrumented edges are at the end.
1614 for (auto &BB : reverse(F)) {
1615 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1616 if (UseBBInfo == nullptr)
1617 continue;
1618 if (!UseBBInfo->Count) {
1619 if (UseBBInfo->UnknownCountOutEdge == 0) {
1620 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1621 Changes = true;
1622 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1623 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1624 Changes = true;
1625 }
1626 }
1627 if (UseBBInfo->Count) {
1628 if (UseBBInfo->UnknownCountOutEdge == 1) {
1629 uint64_t Total = 0;
1630 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1631 // If the one of the successor block can early terminate (no-return),
1632 // we can end up with situation where out edge sum count is larger as
1633 // the source BB's count is collected by a post-dominated block.
1634 if (*UseBBInfo->Count > OutSum)
1635 Total = *UseBBInfo->Count - OutSum;
1636 setEdgeCount(UseBBInfo->OutEdges, Total);
1637 Changes = true;
1638 }
1639 if (UseBBInfo->UnknownCountInEdge == 1) {
1640 uint64_t Total = 0;
1641 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1642 if (*UseBBInfo->Count > InSum)
1643 Total = *UseBBInfo->Count - InSum;
1644 setEdgeCount(UseBBInfo->InEdges, Total);
1645 Changes = true;
1646 }
1647 }
1648 }
1649 }
1650
1651 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1652 (void)NumPasses;
1653#ifndef NDEBUG
1654 // Assert every BB has a valid counter.
1655 for (auto &BB : F) {
1656 auto BI = findBBInfo(&BB);
1657 if (BI == nullptr)
1658 continue;
1659 assert(BI->Count && "BB count is not valid");
1660 }
1661#endif
1662 // Now annotate select instructions. This may fixup impossible block counts.
1663 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1664 assert(CountPosition == ProfileCountSize);
1665
1666 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1667 uint64_t FuncMaxCount = FuncEntryCount;
1668 for (auto &BB : F) {
1669 auto BI = findBBInfo(&BB);
1670 if (BI == nullptr)
1671 continue;
1672 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1673 }
1674
1675 // Fix the obviously inconsistent entry count.
1676 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1677 FuncEntryCount = 1;
1679 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1680
1681 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1682}
1683
1684// Assign the scaled count values to the BB with multiple out edges.
1685void PGOUseFunc::setBranchWeights() {
1686 // Generate MD_prof metadata for every branch instruction.
1687 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1688 << " IsCS=" << IsCS << "\n");
1689 for (auto &BB : F) {
1690 Instruction *TI = BB.getTerminator();
1691 if (TI->getNumSuccessors() < 2)
1692 continue;
1693 if (!(isa<CondBrInst>(TI) || isa<SwitchInst>(TI) ||
1695 isa<CallBrInst>(TI)))
1696 continue;
1697
1698 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1699 if (!*BBCountInfo.Count)
1700 continue;
1701
1702 // We have a non-zero Branch BB.
1703
1704 // SuccessorCount can be greater than OutEdgesCount, because
1705 // removed edges don't appear in OutEdges.
1706 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1707 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1708 assert(OutEdgesCount <= SuccessorCount);
1709
1710 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1711 uint64_t MaxCount = 0;
1712 for (unsigned It = 0; It < OutEdgesCount; It++) {
1713 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1714 const BasicBlock *SrcBB = E->SrcBB;
1715 const BasicBlock *DestBB = E->DestBB;
1716 if (DestBB == nullptr)
1717 continue;
1718 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1719 uint64_t EdgeCount = *E->Count;
1720 if (EdgeCount > MaxCount)
1721 MaxCount = EdgeCount;
1722 EdgeCounts[SuccNum] = EdgeCount;
1723 }
1724
1725 if (MaxCount)
1726 setProfMetadata(TI, EdgeCounts, MaxCount);
1727 else {
1728 // A zero MaxCount can come about when we have a BB with a positive
1729 // count, and whose successor blocks all have 0 count. This can happen
1730 // when there is no exit block and the code exits via a noreturn function.
1731 auto &Ctx = M->getContext();
1732 Ctx.diagnose(DiagnosticInfoPGOProfile(
1733 M->getName().data(),
1734 Twine("Profile in ") + F.getName().str() +
1735 Twine(" partially ignored") +
1736 Twine(", possibly due to the lack of a return path."),
1737 DS_Warning));
1738 }
1739 }
1740}
1741
1743 for (BasicBlock *Pred : predecessors(BB)) {
1744 if (isa<IndirectBrInst>(Pred->getTerminator()))
1745 return true;
1746 }
1747 return false;
1748}
1749
1750void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1751 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1752 // Find irr loop headers
1753 for (auto &BB : F) {
1754 // As a heuristic also annotate indrectbr targets as they have a high chance
1755 // to become an irreducible loop header after the indirectbr tail
1756 // duplication.
1757 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1758 Instruction *TI = BB.getTerminator();
1759 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1760 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1761 }
1762 }
1763}
1764
1765void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1766 Module *M = F.getParent();
1767 IRBuilder<> Builder(&SI);
1768 Type *Int64Ty = Builder.getInt64Ty();
1769 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1770 auto *NormalizedFuncNameVarPtr =
1772 FuncNameVar, PointerType::get(M->getContext(), 0));
1773 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1774 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1775 Builder.getInt32(TotalNumCtrs),
1776 Builder.getInt32(*CurCtrIdx), Step});
1777 ++(*CurCtrIdx);
1778}
1779
1780void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1781 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1782 assert(*CurCtrIdx < CountFromProfile.size() &&
1783 "Out of bound access of counters");
1784 uint64_t SCounts[2];
1785 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1786 ++(*CurCtrIdx);
1787 uint64_t TotalCount = 0;
1788 auto BI = UseFunc->findBBInfo(SI.getParent());
1789 if (BI != nullptr) {
1790 TotalCount = *BI->Count;
1791
1792 // Fix the block count if it is impossible.
1793 if (TotalCount < SCounts[0])
1794 BI->Count = SCounts[0];
1795 }
1796 // False Count
1797 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1798 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1799 if (MaxCount)
1800 setProfMetadata(&SI, SCounts, MaxCount);
1801}
1802
1803void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1804 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1805 return;
1806 // FIXME: do not handle this yet.
1807 if (SI.getCondition()->getType()->isVectorTy())
1808 return;
1809
1810 switch (Mode) {
1811 case VM_counting:
1812 NSIs++;
1813 return;
1814 case VM_instrument:
1815 instrumentOneSelectInst(SI);
1816 return;
1817 case VM_annotate:
1818 annotateOneSelectInst(SI);
1819 return;
1820 }
1821
1822 llvm_unreachable("Unknown visiting mode");
1823}
1824
1826 if (ValueProfKind == IPVK_MemOPSize)
1828 if (ValueProfKind == llvm::IPVK_VTableTarget)
1830 return MaxNumAnnotations;
1831}
1832
1833// Traverse all valuesites and annotate the instructions for all value kind.
1834void PGOUseFunc::annotateValueSites() {
1836 return;
1837
1838 // Create the PGOFuncName meta data.
1839 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1840
1841 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1842 annotateValueSites(Kind);
1843}
1844
1845// Annotate the instructions for a specific value kind.
1846void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1847 assert(Kind <= IPVK_Last);
1848 unsigned ValueSiteIndex = 0;
1849
1850 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1851
1852 // Since there isn't a reliable or fast way for profile reader to tell if a
1853 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1854 // value profile collector over the function IR to find the instrumented sites
1855 // iff function profile records shows the number of instrumented vtable sites
1856 // is not zero. Function cfg already takes the number of instrumented
1857 // indirect call sites into account so it doesn't hash the number of
1858 // instrumented vtables; as a side effect it makes it easier to enable
1859 // profiling and profile use in two steps if needed.
1860 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1861 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1862 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1864 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1865 auto &ValueSites = FuncInfo.ValueSites[Kind];
1866 if (NumValueSites != ValueSites.size()) {
1867 auto &Ctx = M->getContext();
1868 Ctx.diagnose(DiagnosticInfoPGOProfile(
1869 M->getName().data(),
1870 Twine("Inconsistent number of value sites for ") +
1871 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1872 F.getName().str() +
1873 Twine("\", possibly due to the use of a stale profile."),
1874 DS_Warning));
1875 return;
1876 }
1877
1878 for (VPCandidateInfo &I : ValueSites) {
1879 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1880 << "): Index = " << ValueSiteIndex << " out of "
1881 << NumValueSites << "\n");
1883 *M, *I.AnnotatedInst, ProfileRecord,
1884 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1885 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1886 ValueSiteIndex++;
1887 }
1888}
1889
1890// Collect the set of members for each Comdat in module M and store
1891// in ComdatMembers.
1893 Module &M,
1894 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1895 if (!DoComdatRenaming)
1896 return;
1897 for (Function &F : M)
1898 if (Comdat *C = F.getComdat())
1899 ComdatMembers.insert(std::make_pair(C, &F));
1900 for (GlobalVariable &GV : M.globals())
1901 if (Comdat *C = GV.getComdat())
1902 ComdatMembers.insert(std::make_pair(C, &GV));
1903 for (GlobalAlias &GA : M.aliases())
1904 if (Comdat *C = GA.getComdat())
1905 ComdatMembers.insert(std::make_pair(C, &GA));
1906}
1907
1908// Return true if we should not find instrumentation data for this function
1909static bool skipPGOUse(const Function &F) {
1910 if (F.isDeclaration())
1911 return true;
1912 // If there are too many critical edges, PGO might cause
1913 // compiler time problem. Skip PGO if the number of
1914 // critical edges execeed the threshold.
1915 unsigned NumCriticalEdges = 0;
1916 for (auto &BB : F) {
1917 const Instruction *TI = BB.getTerminator();
1918 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1919 if (isCriticalEdge(TI, I))
1920 NumCriticalEdges++;
1921 }
1922 }
1923 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1924 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1925 << ", NumCriticalEdges=" << NumCriticalEdges
1926 << " exceed the threshold. Skip PGO.\n");
1927 return true;
1928 }
1929 return false;
1930}
1931
1932// Return true if we should not instrument this function
1933static bool skipPGOGen(const Function &F) {
1934 if (skipPGOUse(F))
1935 return true;
1936 if (F.hasFnAttribute(llvm::Attribute::Naked))
1937 return true;
1938 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1939 return true;
1940 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1941 return true;
1942 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1943 return true;
1945 if (auto EntryCount = F.getEntryCount())
1946 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1947 return !PGOTreatUnknownAsCold;
1948 }
1949 return false;
1950}
1951
1953 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1956 function_ref<LoopInfo *(Function &)> LookupLI,
1957 PGOInstrumentationType InstrumentationType) {
1958 // For the context-sensitive instrumentation, we should have a separated pass
1959 // (before LTO/ThinLTO linking) to create these variables.
1960 if (InstrumentationType == PGOInstrumentationType::FDO)
1961 createIRLevelProfileFlagVar(M, InstrumentationType);
1962
1963 Triple TT(M.getTargetTriple());
1964 LLVMContext &Ctx = M.getContext();
1965 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1967 M.getName().data(),
1968 Twine("VTable value profiling is presently not "
1969 "supported for non-ELF object formats"),
1970 DS_Warning));
1971 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1972 collectComdatMembers(M, ComdatMembers);
1973
1974 for (auto &F : M) {
1975 if (skipPGOGen(F))
1976 continue;
1977 TargetLibraryInfo &TLI = LookupTLI(F);
1978 BranchProbabilityInfo *BPI = LookupBPI(F);
1979 BlockFrequencyInfo *BFI = LookupBFI(F);
1980 LoopInfo *LI = LookupLI(F);
1981 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
1982 InstrumentationType);
1983 FI.instrument();
1984 }
1985 return true;
1986}
1987
1988PreservedAnalyses
1990 createProfileFileNameVar(M, CSInstrName);
1991 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1992 // will be retained.
1995 if (ProfileSampling)
2000 return PA;
2001}
2002
2005 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2006 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2007 return FAM.getResult<TargetLibraryAnalysis>(F);
2008 };
2009 auto LookupBPI = [&FAM](Function &F) {
2010 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2011 };
2012 auto LookupBFI = [&FAM](Function &F) {
2013 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2014 };
2015 auto LookupLI = [&FAM](Function &F) {
2016 return &FAM.getResult<LoopAnalysis>(F);
2017 };
2018
2019 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2020 InstrumentationType))
2021 return PreservedAnalyses::all();
2022
2023 return PreservedAnalyses::none();
2024}
2025
2026// Using the ratio b/w sums of profile count values and BFI count values to
2027// adjust the func entry count.
2028static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2029 BranchProbabilityInfo &NBPI) {
2030 Function &F = Func.getFunc();
2031 BlockFrequencyInfo NBFI(F, NBPI, LI);
2032#ifndef NDEBUG
2033 auto BFIEntryCount = F.getEntryCount();
2034 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
2035 "Invalid BFI Entrycount");
2036#endif
2037 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2038 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2039 for (auto &BBI : F) {
2040 uint64_t CountValue = 0;
2041 uint64_t BFICountValue = 0;
2042 if (!Func.findBBInfo(&BBI))
2043 continue;
2044 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2045 CountValue = *Func.getBBInfo(&BBI).Count;
2046 BFICountValue = *BFICount;
2047 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2048 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2049 }
2050 if (SumCount.isZero())
2051 return;
2052
2053 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2054 "Incorrect sum of BFI counts");
2055 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2056 return;
2057 double Scale = (SumCount / SumBFICount).convertToDouble();
2058 if (Scale < 1.001 && Scale > 0.999)
2059 return;
2060
2061 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2062 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2063 if (NewEntryCount == 0)
2064 NewEntryCount = 1;
2065 if (NewEntryCount != FuncEntryCount) {
2066 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2067 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2068 << ", entry_count " << FuncEntryCount << " --> "
2069 << NewEntryCount << "\n");
2070 }
2071}
2072
2073// Compare the profile count values with BFI count values, and print out
2074// the non-matching ones.
2075static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2077 uint64_t HotCountThreshold,
2079 Function &F = Func.getFunc();
2080 BlockFrequencyInfo NBFI(F, NBPI, LI);
2081 // bool PrintFunc = false;
2082 bool HotBBOnly = PGOVerifyHotBFI;
2083 StringRef Msg;
2085
2086 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2087 for (auto &BBI : F) {
2088 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2089 if (!BBInfo)
2090 continue;
2091
2092 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2093 uint64_t BFICountValue = 0;
2094
2095 BBNum++;
2096 if (CountValue)
2097 NonZeroBBNum++;
2098 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2099 if (BFICount)
2100 BFICountValue = *BFICount;
2101
2102 if (HotBBOnly) {
2103 bool rawIsHot = CountValue >= HotCountThreshold;
2104 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2105 bool rawIsCold = CountValue <= ColdCountThreshold;
2106 bool ShowCount = false;
2107 if (rawIsHot && !BFIIsHot) {
2108 Msg = "raw-Hot to BFI-nonHot";
2109 ShowCount = true;
2110 } else if (rawIsCold && BFIIsHot) {
2111 Msg = "raw-Cold to BFI-Hot";
2112 ShowCount = true;
2113 }
2114 if (!ShowCount)
2115 continue;
2116 } else {
2117 if ((CountValue < PGOVerifyBFICutoff) &&
2118 (BFICountValue < PGOVerifyBFICutoff))
2119 continue;
2120 uint64_t Diff = (BFICountValue >= CountValue)
2121 ? BFICountValue - CountValue
2122 : CountValue - BFICountValue;
2123 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2124 continue;
2125 }
2126 BBMisMatchNum++;
2127
2128 ORE.emit([&]() {
2130 F.getSubprogram(), &BBI);
2131 Remark << "BB " << ore::NV("Block", BBI.getName())
2132 << " Count=" << ore::NV("Count", CountValue)
2133 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2134 if (!Msg.empty())
2135 Remark << " (" << Msg << ")";
2136 return Remark;
2137 });
2138 }
2139 if (BBMisMatchNum)
2140 ORE.emit([&]() {
2141 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2142 F.getSubprogram(), &F.getEntryBlock())
2143 << "In Func " << ore::NV("Function", F.getName())
2144 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2145 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2146 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2147 });
2148}
2149
2151 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2152 vfs::FileSystem &FS,
2153 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2156 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2157 bool IsCS) {
2158 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2159 auto &Ctx = M.getContext();
2160 // Read the counter array from file.
2161 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2162 ProfileRemappingFileName);
2163 if (Error E = ReaderOrErr.takeError()) {
2164 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2165 Ctx.diagnose(
2166 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2167 });
2168 return false;
2169 }
2170
2171 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2172 std::move(ReaderOrErr.get());
2173 if (!PGOReader) {
2174 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2175 StringRef("Cannot get PGOReader")));
2176 return false;
2177 }
2178 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2179 return false;
2180
2181 // TODO: might need to change the warning once the clang option is finalized.
2182 if (!PGOReader->isIRLevelProfile()) {
2183 Ctx.diagnose(DiagnosticInfoPGOProfile(
2184 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2185 return false;
2186 }
2187 if (PGOReader->functionEntryOnly()) {
2188 Ctx.diagnose(DiagnosticInfoPGOProfile(
2189 ProfileFileName.data(),
2190 "Function entry profiles are not yet supported for optimization"));
2191 return false;
2192 }
2193
2195 for (GlobalVariable &G : M.globals()) {
2196 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2197 continue;
2198
2199 // Create the PGOFuncName meta data.
2200 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2201 }
2202 }
2203
2204 // Add the profile summary (read from the header of the indexed summary) here
2205 // so that we can use it below when reading counters (which checks if the
2206 // function should be marked with a cold or inlinehint attribute).
2207 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2210 PSI->refresh();
2211
2212 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2213 collectComdatMembers(M, ComdatMembers);
2214 std::vector<Function *> HotFunctions;
2215 std::vector<Function *> ColdFunctions;
2216
2217 // If the profile marked as always instrument the entry BB, do the
2218 // same. Note this can be overwritten by the internal option in CFGMST.h
2219 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2220 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2221 InstrumentFuncEntry = PGOInstrumentEntry;
2222 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2223 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2224 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2225
2226 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2227 for (auto &F : M) {
2228 if (skipPGOUse(F))
2229 continue;
2230 TargetLibraryInfo &TLI = LookupTLI(F);
2231 BranchProbabilityInfo *BPI = LookupBPI(F);
2232 BlockFrequencyInfo *BFI = LookupBFI(F);
2233 LoopInfo *LI = LookupLI(F);
2234 if (!HasSingleByteCoverage) {
2235 // Split indirectbr critical edges here before computing the MST rather
2236 // than later in getInstrBB() to avoid invalidating it.
2237 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2238 BFI);
2239 }
2240 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2241 InstrumentFuncEntry, InstrumentLoopEntries,
2242 HasSingleByteCoverage);
2243 if (!Func.getRecord(PGOReader.get()))
2244 continue;
2245 if (HasSingleByteCoverage) {
2246 Func.populateCoverage();
2247 continue;
2248 }
2249 // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
2250 // it means the profile for the function is unrepresentative and this
2251 // function is actually hot / warm. We will reset the function hot / cold
2252 // attribute and drop all the profile counters.
2254 bool AllZeros = false;
2255 if (!Func.readCounters(AllZeros, PseudoKind))
2256 continue;
2257 if (AllZeros) {
2258 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2259 if (Func.getProgramMaxCount() != 0)
2260 ColdFunctions.push_back(&F);
2261 continue;
2262 }
2263 if (PseudoKind != InstrProfRecord::NotPseudo) {
2264 // Clear function attribute cold.
2265 if (F.hasFnAttribute(Attribute::Cold))
2266 F.removeFnAttr(Attribute::Cold);
2267 // Set function attribute as hot.
2268 if (PseudoKind == InstrProfRecord::PseudoHot)
2269 F.addFnAttr(Attribute::Hot);
2270 continue;
2271 }
2272 Func.populateCounters();
2273 Func.setBranchWeights();
2274 Func.annotateValueSites();
2275 Func.annotateIrrLoopHeaderWeights();
2276 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2277 if (FreqAttr == PGOUseFunc::FFA_Cold)
2278 ColdFunctions.push_back(&F);
2279 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2280 HotFunctions.push_back(&F);
2281 if (PGOViewCounts != PGOVCT_None &&
2282 (ViewBlockFreqFuncName.empty() ||
2283 F.getName() == ViewBlockFreqFuncName)) {
2285 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2286 std::make_unique<BranchProbabilityInfo>(F, LI);
2287 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2288 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2290 NewBFI->view();
2291 else if (PGOViewCounts == PGOVCT_Text) {
2292 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2293 NewBFI->print(dbgs());
2294 }
2295 }
2297 (ViewBlockFreqFuncName.empty() ||
2298 F.getName() == ViewBlockFreqFuncName)) {
2300 if (ViewBlockFreqFuncName.empty())
2301 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2302 else
2303 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2304 else if (PGOViewRawCounts == PGOVCT_Text) {
2305 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2306 Func.dumpInfo();
2307 }
2308 }
2309
2312 BranchProbabilityInfo NBPI(F, LI);
2313
2314 // Fix func entry count.
2315 if (PGOFixEntryCount)
2316 fixFuncEntryCount(Func, LI, NBPI);
2317
2318 // Verify BlockFrequency information.
2319 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2320 if (PGOVerifyHotBFI) {
2321 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2323 }
2324 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2325 }
2326 }
2327
2328 // Set function hotness attribute from the profile.
2329 // We have to apply these attributes at the end because their presence
2330 // can affect the BranchProbabilityInfo of any callers, resulting in an
2331 // inconsistent MST between prof-gen and prof-use.
2332 for (auto &F : HotFunctions) {
2333 F->addFnAttr(Attribute::InlineHint);
2334 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2335 << "\n");
2336 }
2337 for (auto &F : ColdFunctions) {
2338 // Only set when there is no Attribute::Hot set by the user. For Hot
2339 // attribute, user's annotation has the precedence over the profile.
2340 if (F->hasFnAttribute(Attribute::Hot)) {
2341 auto &Ctx = M.getContext();
2342 std::string Msg = std::string("Function ") + F->getName().str() +
2343 std::string(" is annotated as a hot function but"
2344 " the profile is cold");
2345 Ctx.diagnose(
2346 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2347 continue;
2348 }
2349 F->addFnAttr(Attribute::Cold);
2350 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2351 << "\n");
2352 }
2353 return true;
2354}
2355
2357 std::string Filename, std::string RemappingFilename, bool IsCS,
2359 : ProfileFileName(std::move(Filename)),
2360 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2361 FS(std::move(VFS)) {
2362 if (!PGOTestProfileFile.empty())
2363 ProfileFileName = PGOTestProfileFile;
2365 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2366 if (!FS)
2368}
2369
2372
2373 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2374 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2375 return FAM.getResult<TargetLibraryAnalysis>(F);
2376 };
2377 auto LookupBPI = [&FAM](Function &F) {
2378 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2379 };
2380 auto LookupBFI = [&FAM](Function &F) {
2381 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2382 };
2383 auto LookupLI = [&FAM](Function &F) {
2384 return &FAM.getResult<LoopAnalysis>(F);
2385 };
2386
2387 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2388 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2389 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2390 IsCS))
2391 return PreservedAnalyses::all();
2392
2393 return PreservedAnalyses::none();
2394}
2395
2396static std::string getSimpleNodeName(const BasicBlock *Node) {
2397 if (!Node->getName().empty())
2398 return Node->getName().str();
2399
2400 std::string SimpleNodeName;
2401 raw_string_ostream OS(SimpleNodeName);
2402 Node->printAsOperand(OS, false);
2403 return SimpleNodeName;
2404}
2405
2407 uint64_t MaxCount) {
2408 auto Weights = downscaleWeights(EdgeCounts, MaxCount);
2409
2410 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2411 : Weights) {
2412 dbgs() << W << " ";
2413 } dbgs() << "\n";);
2414
2415 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2416
2417 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2419 std::string BrCondStr = getBranchCondString(TI);
2420 if (BrCondStr.empty())
2421 return;
2422
2423 uint64_t WSum =
2424 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2425 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2426 uint64_t TotalCount =
2427 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2428 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2429 uint64_t Scale = calculateCountScale(WSum);
2430 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2431 scaleBranchCount(WSum, Scale));
2432 std::string BranchProbStr;
2433 raw_string_ostream OS(BranchProbStr);
2434 OS << BP;
2435 OS << " (total count : " << TotalCount << ")";
2436 Function *F = TI->getParent()->getParent();
2438 ORE.emit([&]() {
2439 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2440 << BrCondStr << " is true with probability : " << BranchProbStr;
2441 });
2442 }
2443}
2444
2445namespace llvm {
2446
2448 MDBuilder MDB(M->getContext());
2449 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2451}
2452
2453template <> struct GraphTraits<PGOUseFunc *> {
2454 using NodeRef = const BasicBlock *;
2457
2458 static NodeRef getEntryNode(const PGOUseFunc *G) {
2459 return &G->getFunc().front();
2460 }
2461
2463 return succ_begin(N);
2464 }
2465
2466 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2467
2468 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2469 return nodes_iterator(G->getFunc().begin());
2470 }
2471
2472 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2473 return nodes_iterator(G->getFunc().end());
2474 }
2475};
2476
2477template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2478 explicit DOTGraphTraits(bool isSimple = false)
2480
2481 static std::string getGraphName(const PGOUseFunc *G) {
2482 return std::string(G->getFunc().getName());
2483 }
2484
2485 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2486 std::string Result;
2487 raw_string_ostream OS(Result);
2488
2489 OS << getSimpleNodeName(Node) << ":\\l";
2490 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2491 OS << "Count : ";
2492 if (BI && BI->Count)
2493 OS << *BI->Count << "\\l";
2494 else
2495 OS << "Unknown\\l";
2496
2497 if (!PGOInstrSelect)
2498 return Result;
2499
2500 for (const Instruction &I : *Node) {
2501 if (!isa<SelectInst>(&I))
2502 continue;
2503 // Display scaled counts for SELECT instruction:
2504 OS << "SELECT : { T = ";
2505 uint64_t TC, FC;
2506 bool HasProf = extractBranchWeights(I, TC, FC);
2507 if (!HasProf)
2508 OS << "Unknown, F = Unknown }\\l";
2509 else
2510 OS << TC << ", F = " << FC << " }\\l";
2511 }
2512 return Result;
2513 }
2514};
2515
2516} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INSTR_PROF_QUOTE(x)
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_DBG_CORRELATE
#define INSTR_PROF_RAW_VERSION
#define INSTR_PROF_RAW_VERSION_VAR
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static constexpr StringLiteral Filename
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
ValueProfileCollector::CandidateInfo VPCandidateInfo
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
StringSet - A set-like wrapper for the StringMap.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1138
Class for arbitrary precision integers.
Definition APInt.h:78
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI bool isIrrLoopHeader(const BasicBlock *BB)
Returns true if BB is an irreducible loop header block.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition CFGMST.h:304
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition CFGMST.h:341
size_t numEdges() const
Definition CFGMST.h:347
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
LLVM_ABI StringRef getName() const
Definition Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
SelectionKind getSelectionKind() const
Definition Comdat.h:47
Conditional Branch instruction.
Value * getCondition() const
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:113
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Class to represent profile counts.
Definition Function.h:299
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:676
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
This instruction compares its operands according to the predicate given to the constructor.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Base class for instruction visitors.
Definition InstVisitor.h:78
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition InstrProf.h:472
std::string message() const override
Return the error message as a string.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition CRC.h:53
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Definition CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
Definition MDBuilder.cpp:21
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Metadata node.
Definition Metadata.h:1080
Tuple of metadata.
Definition Metadata.h:1500
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
Definition User.h:207
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
LLVM_ABI void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
void write64le(void *P, uint64_t V)
Definition Endian.h:478
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:90
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:1013
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FuncHash
Definition InstrProf.h:78
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
FunctionAddr NumCounters
Definition InstrProf.h:91
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr NumValueSites[IPVK_Last+1]
Definition InstrProf.h:93
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
InstrProfValueKind
Definition InstrProf.h:319
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:106
cl::opt< bool > PGOInstrumentColdFunctionOnly
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
TinyPtrVector< BasicBlock * > ColorVector
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
Instruction::const_succ_iterator const_succ_iterator
Definition CFG.h:139
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
#define N
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DefaultDOTGraphTraits(bool simple=false)
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
pointer_iterator< Function::const_iterator > nodes_iterator
std::vector< uint64_t > Counts
Definition InstrProf.h:903
CountPseudoKind getCountPseudoKind() const
Definition InstrProf.h:1001
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition InstrProf.h:1105
static void setCSFlagInHash(uint64_t &FuncHash)
Definition InstrProf.h:1086
static constexpr uint64_t FUNC_HASH_MASK
Definition InstrProf.h:1069