LLVM 22.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
76#include "llvm/IR/Dominators.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
133
134#define DEBUG_TYPE "pgo-instrumentation"
135
136STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
137STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
138STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOEdge, "Number of edges.");
140STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
141STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
142STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
143STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
144STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
145STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
146STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOSelectInsts,
148 "Number of select instruction instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOMemIntrinsics,
150 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
152STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
153STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
154STATISTIC(NumOfCSPGOFunc,
155 "Number of functions having valid profile counts in CSPGO.");
156STATISTIC(NumOfCSPGOMismatch,
157 "Number of functions having mismatch profile in CSPGO.");
158STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
159STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
160
161// Command line option to specify the file to read profile from. This is
162// mainly used for testing.
164 "pgo-test-profile-file", cl::init(""), cl::Hidden,
165 cl::value_desc("filename"),
166 cl::desc("Specify the path of profile data file. This is "
167 "mainly for test purpose."));
169 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
170 cl::value_desc("filename"),
171 cl::desc("Specify the path of profile remapping file. This is mainly for "
172 "test purpose."));
173
174// Command line option to disable value profiling. The default is false:
175// i.e. value profiling is enabled by default. This is for debug purpose.
176static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
178 cl::desc("Disable Value Profiling"));
179
180// Command line option to set the maximum number of VP annotations to write to
181// the metadata for a single indirect call callsite.
183 "icp-max-annotations", cl::init(3), cl::Hidden,
184 cl::desc("Max number of annotations for a single indirect "
185 "call callsite"));
186
187// Command line option to set the maximum number of value annotations
188// to write to the metadata for a single memop intrinsic.
190 "memop-max-annotations", cl::init(4), cl::Hidden,
191 cl::desc("Max number of precise value annotations for a single memop"
192 "intrinsic"));
193
194// Command line option to control appending FunctionHash to the name of a COMDAT
195// function. This is to avoid the hash mismatch caused by the preinliner.
197 "do-comdat-renaming", cl::init(false), cl::Hidden,
198 cl::desc("Append function hash to the name of COMDAT function to avoid "
199 "function hash mismatch due to the preinliner"));
200
201namespace llvm {
202// Command line option to enable/disable the warning about missing profile
203// information.
204cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
206 cl::desc("Use this option to turn on/off "
207 "warnings about missing profile data for "
208 "functions."));
209
210// Command line option to enable/disable the warning about a hash mismatch in
211// the profile data.
213 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
214 cl::desc("Use this option to turn off/on "
215 "warnings about profile cfg mismatch."));
216
217// Command line option to enable/disable the warning about a hash mismatch in
218// the profile data for Comdat functions, which often turns out to be false
219// positive due to the pre-instrumentation inline.
221 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
222 cl::desc("The option is used to turn on/off "
223 "warnings about hash mismatch for comdat "
224 "or weak functions."));
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
263static cl::opt<bool>
264 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
266 cl::desc("Force to instrument loop entries."));
267
269 "pgo-function-entry-coverage", cl::Hidden,
270 cl::desc(
271 "Use this option to enable function entry coverage instrumentation."));
272
274 "pgo-block-coverage",
275 cl::desc("Use this option to enable basic block coverage instrumentation"));
276
277static cl::opt<bool>
278 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
279 cl::desc("Create a dot file of CFGs with block "
280 "coverage inference information"));
281
283 "pgo-temporal-instrumentation",
284 cl::desc("Use this option to enable temporal instrumentation"));
285
286static cl::opt<bool>
287 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
288 cl::desc("Fix function entry count in profile use."));
289
291 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out the non-match BFI count if a hot raw profile count "
293 "becomes non-hot, or a cold raw profile count becomes hot. "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remarks-analysis=pgo."));
296
298 "pgo-verify-bfi", cl::init(false), cl::Hidden,
299 cl::desc("Print out mismatched BFI counts after setting profile metadata "
300 "The print is enabled under -Rpass-analysis=pgo, or "
301 "internal option -pass-remarks-analysis=pgo."));
302
304 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
306 "mismatched BFI if the difference percentage is greater than "
307 "this value (in percentage)."));
308
310 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
311 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
312 "profile count value is below."));
313
315 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
316 cl::value_desc("function name"),
317 cl::desc("Trace the hash of the function with this name."));
318
320 "pgo-function-size-threshold", cl::Hidden,
321 cl::desc("Do not instrument functions smaller than this threshold."));
322
324 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
325 cl::desc("Do not instrument functions with the number of critical edges "
326 " greater than this threshold."));
327
329 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
330 cl::desc("For cold function instrumentation, skip instrumenting functions "
331 "whose entry count is above the given value."));
332
334 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
335 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
336 "unprofiled) functions as cold."));
337
339 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
340 cl::desc("Enable cold function only instrumentation."));
341
343 "ctx-prof-skip-callsite-instr", cl::Hidden,
344 cl::desc("Do not instrument callsites to functions in this list. Intended "
345 "for testing."));
346
348
349// Command line option to turn on CFG dot dump after profile annotation.
350// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
352
353// Command line option to specify the name of the function for CFG dump
354// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
356
357// Command line option to enable vtable value profiling. Defined in
358// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
363} // namespace llvm
364
365namespace {
366class FunctionInstrumenter final {
367 Module &M;
368 Function &F;
370 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
371 BranchProbabilityInfo *const BPI;
372 BlockFrequencyInfo *const BFI;
373 LoopInfo *const LI;
374
375 const PGOInstrumentationType InstrumentationType;
376
377 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
378 // Ctx profiling implicitly captures indirect call cases, but not other
379 // values. Supporting other values is relatively straight-forward - just
380 // another counter range within the context.
381 bool isValueProfilingDisabled() const {
382 return DisableValueProfiling ||
383 InstrumentationType == PGOInstrumentationType::CTXPROF;
384 }
385
386 bool shouldInstrumentEntryBB() const {
387 return PGOInstrumentEntry ||
388 InstrumentationType == PGOInstrumentationType::CTXPROF;
389 }
390
391 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
392
393public:
394 FunctionInstrumenter(
396 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
397 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
398 LoopInfo *LI = nullptr,
400 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
401 LI(LI), InstrumentationType(InstrumentationType) {}
402
403 void instrument();
404};
405} // namespace
406
407// Return a string describing the branch condition that can be
408// used in static branch probability heuristics:
409static std::string getBranchCondString(Instruction *TI) {
411 if (!BI || !BI->isConditional())
412 return std::string();
413
414 Value *Cond = BI->getCondition();
416 if (!CI)
417 return std::string();
418
419 std::string result;
420 raw_string_ostream OS(result);
421 OS << CI->getPredicate() << "_";
422 CI->getOperand(0)->getType()->print(OS, true);
423
424 Value *RHS = CI->getOperand(1);
426 if (CV) {
427 if (CV->isZero())
428 OS << "_Zero";
429 else if (CV->isOne())
430 OS << "_One";
431 else if (CV->isMinusOne())
432 OS << "_MinusOne";
433 else
434 OS << "_Const";
435 }
436 return result;
437}
438
439static const char *ValueProfKindDescr[] = {
440#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
442};
443
444// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
445// aware this is an ir_level profile so it can set the version flag.
446static GlobalVariable *
448 PGOInstrumentationType InstrumentationType) {
450 Type *IntTy64 = Type::getInt64Ty(M.getContext());
452 if (InstrumentationType == PGOInstrumentationType::CSFDO)
453 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
454 if (PGOInstrumentEntry ||
455 InstrumentationType == PGOInstrumentationType::CTXPROF)
456 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
458 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
460 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
462 ProfileVersion |=
465 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
467 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
468 auto IRLevelVersionVariable = new GlobalVariable(
469 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
470 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
471 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
472 if (isGPUProfTarget(M))
473 IRLevelVersionVariable->setVisibility(
475
476 Triple TT(M.getTargetTriple());
477 if (TT.supportsCOMDAT()) {
478 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
479 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
480 }
481 return IRLevelVersionVariable;
482}
483
484namespace {
485
486/// The select instruction visitor plays three roles specified
487/// by the mode. In \c VM_counting mode, it simply counts the number of
488/// select instructions. In \c VM_instrument mode, it inserts code to count
489/// the number times TrueValue of select is taken. In \c VM_annotate mode,
490/// it reads the profile data and annotate the select instruction with metadata.
491enum VisitMode { VM_counting, VM_instrument, VM_annotate };
492class PGOUseFunc;
493
494/// Instruction Visitor class to visit select instructions.
495struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
496 Function &F;
497 unsigned NSIs = 0; // Number of select instructions instrumented.
498 VisitMode Mode = VM_counting; // Visiting mode.
499 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
500 unsigned TotalNumCtrs = 0; // Total number of counters
501 GlobalValue *FuncNameVar = nullptr;
502 uint64_t FuncHash = 0;
503 PGOUseFunc *UseFunc = nullptr;
504 bool HasSingleByteCoverage;
505
506 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
507 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
508
509 void countSelects() {
510 NSIs = 0;
511 Mode = VM_counting;
512 visit(F);
513 }
514
515 // Visit the IR stream and instrument all select instructions. \p
516 // Ind is a pointer to the counter index variable; \p TotalNC
517 // is the total number of counters; \p FNV is the pointer to the
518 // PGO function name var; \p FHash is the function hash.
519 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
520 uint64_t FHash) {
521 Mode = VM_instrument;
522 CurCtrIdx = Ind;
523 TotalNumCtrs = TotalNC;
524 FuncHash = FHash;
525 FuncNameVar = FNV;
526 visit(F);
527 }
528
529 // Visit the IR stream and annotate all select instructions.
530 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
531 Mode = VM_annotate;
532 UseFunc = UF;
533 CurCtrIdx = Ind;
534 visit(F);
535 }
536
537 void instrumentOneSelectInst(SelectInst &SI);
538 void annotateOneSelectInst(SelectInst &SI);
539
540 // Visit \p SI instruction and perform tasks according to visit mode.
541 void visitSelectInst(SelectInst &SI);
542
543 // Return the number of select instructions. This needs be called after
544 // countSelects().
545 unsigned getNumOfSelectInsts() const { return NSIs; }
546};
547
548/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
549/// based instrumentation.
550/// Note that the CFG can be a multi-graph. So there might be multiple edges
551/// with the same SrcBB and DestBB.
552struct PGOEdge {
553 BasicBlock *SrcBB;
554 BasicBlock *DestBB;
555 uint64_t Weight;
556 bool InMST = false;
557 bool Removed = false;
558 bool IsCritical = false;
559
560 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
561 : SrcBB(Src), DestBB(Dest), Weight(W) {}
562
563 /// Return the information string of an edge.
564 std::string infoString() const {
565 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
566 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
567 .str();
568 }
569};
570
571/// This class stores the auxiliary information for each BB in the MST.
572struct PGOBBInfo {
573 PGOBBInfo *Group;
574 uint32_t Index;
575 uint32_t Rank = 0;
576
577 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
578
579 /// Return the information string of this object.
580 std::string infoString() const {
581 return (Twine("Index=") + Twine(Index)).str();
582 }
583};
584
585// This class implements the CFG edges. Note the CFG can be a multi-graph.
586template <class Edge, class BBInfo> class FuncPGOInstrumentation {
587private:
588 Function &F;
589
590 // Is this is context-sensitive instrumentation.
591 bool IsCS;
592
593 // A map that stores the Comdat group in function F.
594 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
595
596 ValueProfileCollector VPC;
597
598 void computeCFGHash();
599 void renameComdatFunction();
600
601public:
602 const TargetLibraryInfo &TLI;
603 std::vector<std::vector<VPCandidateInfo>> ValueSites;
604 SelectInstVisitor SIVisitor;
605 std::string FuncName;
606 std::string DeprecatedFuncName;
607 GlobalVariable *FuncNameVar;
608
609 // CFG hash value for this function.
610 uint64_t FunctionHash = 0;
611
612 // The Minimum Spanning Tree of function CFG.
613 CFGMST<Edge, BBInfo> MST;
614
615 const std::optional<BlockCoverageInference> BCI;
616
617 static std::optional<BlockCoverageInference>
618 constructBCI(Function &Func, bool HasSingleByteCoverage,
619 bool InstrumentFuncEntry) {
620 if (HasSingleByteCoverage)
621 return BlockCoverageInference(Func, InstrumentFuncEntry);
622 return {};
623 }
624
625 // Collect all the BBs that will be instrumented, and store them in
626 // InstrumentBBs.
627 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
628
629 // Give an edge, find the BB that will be instrumented.
630 // Return nullptr if there is no BB to be instrumented.
632
633 // Return the auxiliary BB information.
634 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
635
636 // Return the auxiliary BB information if available.
637 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
638
639 // Dump edges and BB information.
640 void dumpInfo(StringRef Str = "") const {
641 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
642 " Hash: " + Twine(FunctionHash) + "\t" + Str);
643 }
644
645 FuncPGOInstrumentation(
646 Function &Func, TargetLibraryInfo &TLI,
647 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
648 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
649 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
650 bool IsCS = false, bool InstrumentFuncEntry = true,
651 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
652 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
653 TLI(TLI), ValueSites(IPVK_Last + 1),
654 SIVisitor(Func, HasSingleByteCoverage),
655 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
656 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
657 if (BCI && PGOViewBlockCoverageGraph)
658 BCI->viewBlockCoverageGraph();
659 // This should be done before CFG hash computation.
660 SIVisitor.countSelects();
661 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
662 if (!IsCS) {
663 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
664 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
665 NumOfPGOBB += MST.bbInfoSize();
666 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
668 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
669 } else {
670 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
671 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
672 NumOfCSPGOBB += MST.bbInfoSize();
673 }
674
675 FuncName = getIRPGOFuncName(F);
676 DeprecatedFuncName = getPGOFuncName(F);
677 computeCFGHash();
678 if (!ComdatMembers.empty())
679 renameComdatFunction();
680 LLVM_DEBUG(dumpInfo("after CFGMST"));
681
682 for (const auto &E : MST.allEdges()) {
683 if (E->Removed)
684 continue;
685 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
686 if (!E->InMST)
687 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
688 }
689
690 if (CreateGlobalVar)
691 FuncNameVar = createPGOFuncNameVar(F, FuncName);
692 }
693};
694
695} // end anonymous namespace
696
697// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
698// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
699// of selects, indirect calls, mem ops and edges.
700template <class Edge, class BBInfo>
701void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
702 std::vector<uint8_t> Indexes;
703 JamCRC JC;
704 for (auto &BB : F) {
705 for (BasicBlock *Succ : successors(&BB)) {
706 auto BI = findBBInfo(Succ);
707 if (BI == nullptr)
708 continue;
709 uint32_t Index = BI->Index;
710 for (int J = 0; J < 4; J++)
711 Indexes.push_back((uint8_t)(Index >> (J * 8)));
712 }
713 }
714 JC.update(Indexes);
715
716 JamCRC JCH;
717 // The higher 32 bits.
718 auto updateJCH = [&JCH](uint64_t Num) {
719 uint8_t Data[8];
721 JCH.update(Data);
722 };
723 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
724 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
725 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
726 if (BCI) {
727 updateJCH(BCI->getInstrumentedBlocksHash());
728 } else {
729 updateJCH((uint64_t)MST.numEdges());
730 }
731
732 // Hash format for context sensitive profile. Reserve 4 bits for other
733 // information.
734 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
735
736 // Reserve bit 60-63 for other information purpose.
737 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
738 if (IsCS)
740 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
741 << " CRC = " << JC.getCRC()
742 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
743 << ", Edges = " << MST.numEdges() << ", ICSites = "
744 << ValueSites[IPVK_IndirectCallTarget].size()
745 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
746 << ", High32 CRC = " << JCH.getCRC()
747 << ", Hash = " << FunctionHash << "\n";);
748
749 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
750 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
751 << " in building " << F.getParent()->getSourceFileName() << "\n";
752}
753
754// Check if we can safely rename this Comdat function.
755static bool canRenameComdat(
756 Function &F,
757 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
758 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
759 return false;
760
761 // FIXME: Current only handle those Comdat groups that only containing one
762 // function.
763 // (1) For a Comdat group containing multiple functions, we need to have a
764 // unique postfix based on the hashes for each function. There is a
765 // non-trivial code refactoring to do this efficiently.
766 // (2) Variables can not be renamed, so we can not rename Comdat function in a
767 // group including global vars.
768 Comdat *C = F.getComdat();
769 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
770 assert(!isa<GlobalAlias>(CM.second));
771 Function *FM = dyn_cast<Function>(CM.second);
772 if (FM != &F)
773 return false;
774 }
775 return true;
776}
777
778// Append the CFGHash to the Comdat function name.
779template <class Edge, class BBInfo>
780void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
781 if (!canRenameComdat(F, ComdatMembers))
782 return;
783 std::string OrigName = F.getName().str();
784 std::string NewFuncName =
785 Twine(F.getName() + "." + Twine(FunctionHash)).str();
786 F.setName(Twine(NewFuncName));
788 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
789 Comdat *NewComdat;
790 Module *M = F.getParent();
791 // For AvailableExternallyLinkage functions, change the linkage to
792 // LinkOnceODR and put them into comdat. This is because after renaming, there
793 // is no backup external copy available for the function.
794 if (!F.hasComdat()) {
796 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
798 F.setComdat(NewComdat);
799 return;
800 }
801
802 // This function belongs to a single function Comdat group.
803 Comdat *OrigComdat = F.getComdat();
804 std::string NewComdatName =
805 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
806 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
807 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
808
809 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
810 // Must be a function.
811 cast<Function>(CM.second)->setComdat(NewComdat);
812 }
813}
814
815/// Collect all the BBs that will be instruments and add them to
816/// `InstrumentBBs`.
817template <class Edge, class BBInfo>
818void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
819 std::vector<BasicBlock *> &InstrumentBBs) {
820 if (BCI) {
821 for (auto &BB : F)
822 if (BCI->shouldInstrumentBlock(BB))
823 InstrumentBBs.push_back(&BB);
824 return;
825 }
826
827 // Use a worklist as we will update the vector during the iteration.
828 std::vector<Edge *> EdgeList;
829 EdgeList.reserve(MST.numEdges());
830 for (const auto &E : MST.allEdges())
831 EdgeList.push_back(E.get());
832
833 for (auto &E : EdgeList) {
834 BasicBlock *InstrBB = getInstrBB(E);
835 if (InstrBB)
836 InstrumentBBs.push_back(InstrBB);
837 }
838}
839
840// Given a CFG E to be instrumented, find which BB to place the instrumented
841// code. The function will split the critical edge if necessary.
842template <class Edge, class BBInfo>
843BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
844 if (E->InMST || E->Removed)
845 return nullptr;
846
847 BasicBlock *SrcBB = E->SrcBB;
848 BasicBlock *DestBB = E->DestBB;
849 // For a fake edge, instrument the real BB.
850 if (SrcBB == nullptr)
851 return DestBB;
852 if (DestBB == nullptr)
853 return SrcBB;
854
855 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
856 // There are basic blocks (such as catchswitch) cannot be instrumented.
857 // If the returned first insertion point is the end of BB, skip this BB.
858 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
859 return nullptr;
860 return BB;
861 };
862
863 // Instrument the SrcBB if it has a single successor,
864 // otherwise, the DestBB if this is not a critical edge.
865 Instruction *TI = SrcBB->getTerminator();
866 if (TI->getNumSuccessors() <= 1)
867 return canInstrument(SrcBB);
868 if (!E->IsCritical)
869 return canInstrument(DestBB);
870
871 // Some IndirectBr critical edges cannot be split by the previous
872 // SplitIndirectBrCriticalEdges call. Bail out.
873 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
874 BasicBlock *InstrBB =
875 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
876 if (!InstrBB) {
878 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
879 return nullptr;
880 }
881 // For a critical edge, we have to split. Instrument the newly
882 // created BB.
883 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
884 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
885 << " --> " << getBBInfo(DestBB).Index << "\n");
886 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
887 MST.addEdge(SrcBB, InstrBB, 0);
888 // Second one: Add new edge of InstrBB->DestBB.
889 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
890 NewEdge1.InMST = true;
891 E->Removed = true;
892
893 return canInstrument(InstrBB);
894}
895
896// When generating value profiling calls on Windows routines that make use of
897// handler funclets for exception processing an operand bundle needs to attached
898// to the called function. This routine will set \p OpBundles to contain the
899// funclet information, if any is needed, that should be placed on the generated
900// value profiling call for the value profile candidate call.
901static void
905 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
906 if (!OrigCall)
907 return;
908
909 if (!isa<IntrinsicInst>(OrigCall)) {
910 // The instrumentation call should belong to the same funclet as a
911 // non-intrinsic call, so just copy the operand bundle, if any exists.
912 std::optional<OperandBundleUse> ParentFunclet =
913 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
914 if (ParentFunclet)
915 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
916 } else {
917 // Intrinsics or other instructions do not get funclet information from the
918 // front-end. Need to use the BlockColors that was computed by the routine
919 // colorEHFunclets to determine whether a funclet is needed.
920 if (!BlockColors.empty()) {
921 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
922 assert(CV.size() == 1 && "non-unique color for block!");
924 if (EHPadIt->isEHPad())
925 OpBundles.emplace_back("funclet", &*EHPadIt);
926 }
927 }
928}
929
930// Visit all edge and instrument the edges not in MST, and do value profiling.
931// Critical edges will be split.
932void FunctionInstrumenter::instrument() {
933 if (!PGOBlockCoverage) {
934 // Split indirectbr critical edges here before computing the MST rather than
935 // later in getInstrBB() to avoid invalidating it.
936 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
937 }
938
939 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
940 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
941 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
942 InstrumentationType == PGOInstrumentationType::CSFDO,
943 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
945
946 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
947 auto *const CFGHash =
948 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
949 // Make sure that pointer to global is passed in with zero addrspace
950 // This is relevant during GPU profiling
951 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
952 Name, PointerType::get(M.getContext(), 0));
954 auto &EntryBB = F.getEntryBlock();
955 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
956 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
957 // i32 <index>)
958 Builder.CreateIntrinsic(
959 Intrinsic::instrprof_cover,
960 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
961 return;
962 }
963
964 std::vector<BasicBlock *> InstrumentBBs;
965 FuncInfo.getInstrumentBBs(InstrumentBBs);
966 unsigned NumCounters =
967 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
968
969 if (IsCtxProf) {
970 StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument);
971
972 auto *CSIntrinsic =
973 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
974 // We want to count the instrumentable callsites, then instrument them. This
975 // is because the llvm.instrprof.callsite intrinsic has an argument (like
976 // the other instrprof intrinsics) capturing the total number of
977 // instrumented objects (counters, or callsites, in this case). In this
978 // case, we want that value so we can readily pass it to the compiler-rt
979 // APIs that may have to allocate memory based on the nr of callsites.
980 // The traversal logic is the same for both counting and instrumentation,
981 // just needs to be done in succession.
982 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
983 for (auto &BB : F)
984 for (auto &Instr : BB)
985 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
987 continue;
988 if (CS->getCalledFunction() &&
989 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
990 continue;
991 Visitor(CS);
992 }
993 };
994 // First, count callsites.
995 uint32_t TotalNumCallsites = 0;
996 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
997
998 // Now instrument.
999 uint32_t CallsiteIndex = 0;
1000 Visit([&](auto *CB) {
1001 IRBuilder<> Builder(CB);
1002 Builder.CreateCall(CSIntrinsic,
1003 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1004 Builder.getInt32(CallsiteIndex++),
1005 CB->getCalledOperand()});
1006 });
1007 }
1008
1009 uint32_t I = 0;
1011 NumCounters += PGOBlockCoverage ? 8 : 1;
1012 auto &EntryBB = F.getEntryBlock();
1013 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1014 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1015 // i32 <index>)
1016 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1017 {NormalizedNamePtr, CFGHash,
1018 Builder.getInt32(NumCounters),
1019 Builder.getInt32(I)});
1020 I += PGOBlockCoverage ? 8 : 1;
1021 }
1022
1023 for (auto *InstrBB : InstrumentBBs) {
1024 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1025 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1026 "Cannot get the Instrumentation point");
1027 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1028 // i32 <index>)
1029 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1030 : Intrinsic::instrprof_increment,
1031 {NormalizedNamePtr, CFGHash,
1032 Builder.getInt32(NumCounters),
1033 Builder.getInt32(I++)});
1034 }
1035
1036 // Now instrument select instructions:
1037 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1038 FuncInfo.FunctionHash);
1039 assert(I == NumCounters);
1040
1041 if (isValueProfilingDisabled())
1042 return;
1043
1044 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1045
1046 // Intrinsic function calls do not have funclet operand bundles needed for
1047 // Windows exception handling attached to them. However, if value profiling is
1048 // inserted for one of these calls, then a funclet value will need to be set
1049 // on the instrumentation call based on the funclet coloring.
1050 DenseMap<BasicBlock *, ColorVector> BlockColors;
1051 if (F.hasPersonalityFn() &&
1052 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1053 BlockColors = colorEHFunclets(F);
1054
1055 // For each VP Kind, walk the VP candidates and instrument each one.
1056 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1057 unsigned SiteIndex = 0;
1058 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1059 continue;
1060
1061 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1062 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1063 << " site: CallSite Index = " << SiteIndex << "\n");
1064
1065 IRBuilder<> Builder(Cand.InsertPt);
1066 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1067 "Cannot get the Instrumentation point");
1068
1069 Value *ToProfile = nullptr;
1070 if (Cand.V->getType()->isIntegerTy())
1071 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1072 else if (Cand.V->getType()->isPointerTy())
1073 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1074 assert(ToProfile && "value profiling Value is of unexpected type");
1075
1076 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1077 Name, PointerType::get(M.getContext(), 0));
1078
1080 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1081 Builder.CreateCall(
1083 Intrinsic::instrprof_value_profile),
1084 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1085 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1086 OpBundles);
1087 }
1088 } // IPVK_First <= Kind <= IPVK_Last
1089}
1090
1091namespace {
1092
1093// This class represents a CFG edge in profile use compilation.
1094struct PGOUseEdge : public PGOEdge {
1095 using PGOEdge::PGOEdge;
1096
1097 std::optional<uint64_t> Count;
1098
1099 // Set edge count value
1100 void setEdgeCount(uint64_t Value) { Count = Value; }
1101
1102 // Return the information string for this object.
1103 std::string infoString() const {
1104 if (!Count)
1105 return PGOEdge::infoString();
1106 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1107 }
1108};
1109
1110using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1111
1112// This class stores the auxiliary information for each BB.
1113struct PGOUseBBInfo : public PGOBBInfo {
1114 std::optional<uint64_t> Count;
1115 int32_t UnknownCountInEdge = 0;
1116 int32_t UnknownCountOutEdge = 0;
1117 DirectEdges InEdges;
1118 DirectEdges OutEdges;
1119
1120 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1121
1122 // Set the profile count value for this BB.
1123 void setBBInfoCount(uint64_t Value) { Count = Value; }
1124
1125 // Return the information string of this object.
1126 std::string infoString() const {
1127 if (!Count)
1128 return PGOBBInfo::infoString();
1129 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1130 }
1131
1132 // Add an OutEdge and update the edge count.
1133 void addOutEdge(PGOUseEdge *E) {
1134 OutEdges.push_back(E);
1135 UnknownCountOutEdge++;
1136 }
1137
1138 // Add an InEdge and update the edge count.
1139 void addInEdge(PGOUseEdge *E) {
1140 InEdges.push_back(E);
1141 UnknownCountInEdge++;
1142 }
1143};
1144
1145} // end anonymous namespace
1146
1147// Sum up the count values for all the edges.
1149 uint64_t Total = 0;
1150 for (const auto &E : Edges) {
1151 if (E->Removed)
1152 continue;
1153 if (E->Count)
1154 Total += *E->Count;
1155 }
1156 return Total;
1157}
1158
1159namespace {
1160
1161class PGOUseFunc {
1162public:
1163 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1164 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1165 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1166 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1167 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1168 bool HasSingleByteCoverage)
1169 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1170 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1171 InstrumentFuncEntry, InstrumentLoopEntries,
1172 HasSingleByteCoverage),
1173 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1174
1175 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1176
1177 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1178 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1179 /// errors.
1180 bool getRecord(IndexedInstrProfReader *PGOReader);
1181
1182 // Read counts for the instrumented BB from profile.
1183 bool readCounters(bool &AllZeros,
1185
1186 // Populate the counts for all BBs.
1187 void populateCounters();
1188
1189 // Set block coverage based on profile coverage values.
1190 void populateCoverage();
1191
1192 // Set the branch weights based on the count values.
1193 void setBranchWeights();
1194
1195 // Annotate the value profile call sites for all value kind.
1196 void annotateValueSites();
1197
1198 // Annotate the value profile call sites for one value kind.
1199 void annotateValueSites(uint32_t Kind);
1200
1201 // Annotate the irreducible loop header weights.
1202 void annotateIrrLoopHeaderWeights();
1203
1204 // The hotness of the function from the profile count.
1205 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1206
1207 // Return the function hotness from the profile.
1208 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1209
1210 // Return the function hash.
1211 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1212
1213 // Return the profile record for this function;
1214 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1215
1216 // Return the auxiliary BB information.
1217 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1218 return FuncInfo.getBBInfo(BB);
1219 }
1220
1221 // Return the auxiliary BB information if available.
1222 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1223 return FuncInfo.findBBInfo(BB);
1224 }
1225
1226 Function &getFunc() const { return F; }
1227
1228 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1229
1230 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1231
1232private:
1233 Function &F;
1234 Module *M;
1235 BlockFrequencyInfo *BFI;
1236 ProfileSummaryInfo *PSI;
1237
1238 // This member stores the shared information with class PGOGenFunc.
1239 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1240
1241 // The maximum count value in the profile. This is only used in PGO use
1242 // compilation.
1243 uint64_t ProgramMaxCount;
1244
1245 // Position of counter that remains to be read.
1246 uint32_t CountPosition = 0;
1247
1248 // Total size of the profile count for this function.
1249 uint32_t ProfileCountSize = 0;
1250
1251 // ProfileRecord for this function.
1252 NamedInstrProfRecord ProfileRecord;
1253
1254 // Function hotness info derived from profile.
1255 FuncFreqAttr FreqAttr;
1256
1257 // Is to use the context sensitive profile.
1258 bool IsCS;
1259
1260 ValueProfileCollector VPC;
1261
1262 // Find the Instrumented BB and set the value. Return false on error.
1263 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1264
1265 // Set the edge counter value for the unknown edge -- there should be only
1266 // one unknown edge.
1267 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1268
1269 // Set the hot/cold inline hints based on the count values.
1270 // FIXME: This function should be removed once the functionality in
1271 // the inliner is implemented.
1272 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1273 if (PSI->isHotCount(EntryCount))
1274 FreqAttr = FFA_Hot;
1275 else if (PSI->isColdCount(MaxCount))
1276 FreqAttr = FFA_Cold;
1277 }
1278};
1279
1280} // end anonymous namespace
1281
1282/// Set up InEdges/OutEdges for all BBs in the MST.
1284 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1285 // This is not required when there is block coverage inference.
1286 if (FuncInfo.BCI)
1287 return;
1288 for (const auto &E : FuncInfo.MST.allEdges()) {
1289 if (E->Removed)
1290 continue;
1291 const BasicBlock *SrcBB = E->SrcBB;
1292 const BasicBlock *DestBB = E->DestBB;
1293 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1294 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1295 SrcInfo.addOutEdge(E.get());
1296 DestInfo.addInEdge(E.get());
1297 }
1298}
1299
1300// Visit all the edges and assign the count value for the instrumented
1301// edges and the BB. Return false on error.
1302bool PGOUseFunc::setInstrumentedCounts(
1303 const std::vector<uint64_t> &CountFromProfile) {
1304
1305 std::vector<BasicBlock *> InstrumentBBs;
1306 FuncInfo.getInstrumentBBs(InstrumentBBs);
1307
1308 setupBBInfoEdges(FuncInfo);
1309
1310 unsigned NumCounters =
1311 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1312 // The number of counters here should match the number of counters
1313 // in profile. Return if they mismatch.
1314 if (NumCounters != CountFromProfile.size()) {
1315 return false;
1316 }
1317 auto *FuncEntry = &*F.begin();
1318
1319 // Set the profile count to the Instrumented BBs.
1320 uint32_t I = 0;
1321 for (BasicBlock *InstrBB : InstrumentBBs) {
1322 uint64_t CountValue = CountFromProfile[I++];
1323 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1324 // If we reach here, we know that we have some nonzero count
1325 // values in this function. The entry count should not be 0.
1326 // Fix it if necessary.
1327 if (InstrBB == FuncEntry && CountValue == 0)
1328 CountValue = 1;
1329 Info.setBBInfoCount(CountValue);
1330 }
1331 ProfileCountSize = CountFromProfile.size();
1332 CountPosition = I;
1333
1334 // Set the edge count and update the count of unknown edges for BBs.
1335 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1336 E->setEdgeCount(Value);
1337 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1338 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1339 };
1340
1341 // Set the profile count the Instrumented edges. There are BBs that not in
1342 // MST but not instrumented. Need to set the edge count value so that we can
1343 // populate the profile counts later.
1344 for (const auto &E : FuncInfo.MST.allEdges()) {
1345 if (E->Removed || E->InMST)
1346 continue;
1347 const BasicBlock *SrcBB = E->SrcBB;
1348 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1349
1350 // If only one out-edge, the edge profile count should be the same as BB
1351 // profile count.
1352 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1353 setEdgeCount(E.get(), *SrcInfo.Count);
1354 else {
1355 const BasicBlock *DestBB = E->DestBB;
1356 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1357 // If only one in-edge, the edge profile count should be the same as BB
1358 // profile count.
1359 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1360 setEdgeCount(E.get(), *DestInfo.Count);
1361 }
1362 if (E->Count)
1363 continue;
1364 // E's count should have been set from profile. If not, this meenas E skips
1365 // the instrumentation. We set the count to 0.
1366 setEdgeCount(E.get(), 0);
1367 }
1368 return true;
1369}
1370
1371// Set the count value for the unknown edge. There should be one and only one
1372// unknown edge in Edges vector.
1373void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1374 for (auto &E : Edges) {
1375 if (E->Count)
1376 continue;
1377 E->setEdgeCount(Value);
1378
1379 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1380 getBBInfo(E->DestBB).UnknownCountInEdge--;
1381 return;
1382 }
1383 llvm_unreachable("Cannot find the unknown count edge");
1384}
1385
1386// Emit function metadata indicating PGO profile mismatch.
1388 const char MetadataName[] = "instr_prof_hash_mismatch";
1390 // If this metadata already exists, ignore.
1391 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1392 if (Existing) {
1393 MDTuple *Tuple = cast<MDTuple>(Existing);
1394 for (const auto &N : Tuple->operands()) {
1395 if (N.equalsStr(MetadataName))
1396 return;
1397 Names.push_back(N.get());
1398 }
1399 }
1400
1401 MDBuilder MDB(ctx);
1402 Names.push_back(MDB.createString(MetadataName));
1403 MDNode *MD = MDTuple::get(ctx, Names);
1404 F.setMetadata(LLVMContext::MD_annotation, MD);
1405}
1406
1407void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1408 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1409 auto &Ctx = M->getContext();
1410 auto Err = IPE.get();
1411 bool SkipWarning = false;
1412 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1413 << FuncInfo.FuncName << ": ");
1414 if (Err == instrprof_error::unknown_function) {
1415 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1416 SkipWarning = !PGOWarnMissing;
1417 LLVM_DEBUG(dbgs() << "unknown function");
1418 } else if (Err == instrprof_error::hash_mismatch ||
1419 Err == instrprof_error::malformed) {
1420 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1421 SkipWarning =
1424 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1426 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1427 << " skip=" << SkipWarning << ")");
1428 // Emit function metadata indicating PGO profile mismatch.
1429 annotateFunctionWithHashMismatch(F, M->getContext());
1430 }
1431
1432 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1433 if (SkipWarning)
1434 return;
1435
1436 std::string Msg =
1437 IPE.message() + std::string(" ") + F.getName().str() +
1438 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1439 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1440 std::string(" count discarded");
1441
1442 Ctx.diagnose(
1443 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1444 });
1445}
1446
1447bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1448 uint64_t MismatchedFuncSum = 0;
1449 auto Result = PGOReader->getInstrProfRecord(
1450 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1451 &MismatchedFuncSum);
1452 if (Error E = Result.takeError()) {
1453 handleInstrProfError(std::move(E), MismatchedFuncSum);
1454 return false;
1455 }
1456 ProfileRecord = std::move(Result.get());
1457 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1458 return true;
1459}
1460
1461// Read the profile from ProfileFileName and assign the value to the
1462// instrumented BB and the edges. Return true if the profile are successfully
1463// read, and false on errors.
1464bool PGOUseFunc::readCounters(bool &AllZeros,
1466 auto &Ctx = M->getContext();
1467 PseudoKind = ProfileRecord.getCountPseudoKind();
1468 if (PseudoKind != InstrProfRecord::NotPseudo) {
1469 return true;
1470 }
1471 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1472
1473 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1474 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1475
1476 uint64_t ValueSum = 0;
1477 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1478 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1479 ValueSum += CountFromProfile[I];
1480 }
1481 AllZeros = (ValueSum == 0);
1482
1483 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1484
1485 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1486 getBBInfo(nullptr).UnknownCountInEdge = 2;
1487
1488 if (!setInstrumentedCounts(CountFromProfile)) {
1489 LLVM_DEBUG(
1490 dbgs() << "Inconsistent number of counts, skipping this function");
1491 Ctx.diagnose(DiagnosticInfoPGOProfile(
1492 M->getName().data(),
1493 Twine("Inconsistent number of counts in ") + F.getName().str() +
1494 Twine(": the profile may be stale or there is a function name "
1495 "collision."),
1496 DS_Warning));
1497 return false;
1498 }
1499 return true;
1500}
1501
1502void PGOUseFunc::populateCoverage() {
1503 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1504
1505 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1506 DenseMap<const BasicBlock *, bool> Coverage;
1507 unsigned Index = 0;
1508 for (auto &BB : F)
1509 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1510 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1511 assert(Index == CountsFromProfile.size());
1512
1513 // For each B in InverseDependencies[A], if A is covered then B is covered.
1514 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1515 InverseDependencies;
1516 for (auto &BB : F) {
1517 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1518 // If Dep is covered then BB is covered.
1519 InverseDependencies[Dep].insert(&BB);
1520 }
1521 }
1522
1523 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1524 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1525 for (auto &[BB, IsCovered] : Coverage)
1526 if (IsCovered)
1527 CoveredBlocksToProcess.push(BB);
1528
1529 while (!CoveredBlocksToProcess.empty()) {
1530 auto *CoveredBlock = CoveredBlocksToProcess.top();
1531 assert(Coverage[CoveredBlock]);
1532 CoveredBlocksToProcess.pop();
1533 for (auto *BB : InverseDependencies[CoveredBlock]) {
1534 // If CoveredBlock is covered then BB is covered.
1535 bool &Cov = Coverage[BB];
1536 if (Cov)
1537 continue;
1538 Cov = true;
1539 CoveredBlocksToProcess.push(BB);
1540 }
1541 }
1542
1543 // Annotate block coverage.
1544 MDBuilder MDB(F.getContext());
1545 // We set the entry count to 10000 if the entry block is covered so that BFI
1546 // can propagate a fraction of this count to the other covered blocks.
1547 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1548 for (auto &BB : F) {
1549 // For a block A and its successor B, we set the edge weight as follows:
1550 // If A is covered and B is covered, set weight=1.
1551 // If A is covered and B is uncovered, set weight=0.
1552 // If A is uncovered, set weight=1.
1553 // This setup will allow BFI to give nonzero profile counts to only covered
1554 // blocks.
1555 SmallVector<uint32_t, 4> Weights;
1556 for (auto *Succ : successors(&BB))
1557 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1558 if (Weights.size() >= 2)
1559 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1560 /*IsExpected=*/false);
1561 }
1562
1563 unsigned NumCorruptCoverage = 0;
1564 DominatorTree DT(F);
1565 LoopInfo LI(DT);
1566 BranchProbabilityInfo BPI(F, LI);
1567 BlockFrequencyInfo BFI(F, BPI, LI);
1568 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1569 if (auto C = BFI.getBlockProfileCount(&BB))
1570 return C == 0;
1571 return {};
1572 };
1573 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1574 for (auto &BB : F) {
1575 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1576 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1577 << "\n");
1578 // In some cases it is possible to find a covered block that has no covered
1579 // successors, e.g., when a block calls a function that may call exit(). In
1580 // those cases, BFI could find its successor to be covered while BCI could
1581 // find its successor to be dead.
1582 const bool &Cov = Coverage[&BB];
1583 if (Cov == IsBlockDead(BB).value_or(false)) {
1584 LLVM_DEBUG(
1585 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1586 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1587 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1588 ++NumCorruptCoverage;
1589 }
1590 if (Cov)
1591 ++NumCoveredBlocks;
1592 }
1593 if (PGOVerifyBFI && NumCorruptCoverage) {
1594 auto &Ctx = M->getContext();
1595 Ctx.diagnose(DiagnosticInfoPGOProfile(
1596 M->getName().data(),
1597 Twine("Found inconsistent block coverage for function ") + F.getName() +
1598 " in " + Twine(NumCorruptCoverage) + " blocks.",
1599 DS_Warning));
1600 }
1602 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1603}
1604
1605// Populate the counters from instrumented BBs to all BBs.
1606// In the end of this operation, all BBs should have a valid count value.
1607void PGOUseFunc::populateCounters() {
1608 bool Changes = true;
1609 unsigned NumPasses = 0;
1610 while (Changes) {
1611 NumPasses++;
1612 Changes = false;
1613
1614 // For efficient traversal, it's better to start from the end as most
1615 // of the instrumented edges are at the end.
1616 for (auto &BB : reverse(F)) {
1617 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1618 if (UseBBInfo == nullptr)
1619 continue;
1620 if (!UseBBInfo->Count) {
1621 if (UseBBInfo->UnknownCountOutEdge == 0) {
1622 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1623 Changes = true;
1624 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1625 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1626 Changes = true;
1627 }
1628 }
1629 if (UseBBInfo->Count) {
1630 if (UseBBInfo->UnknownCountOutEdge == 1) {
1631 uint64_t Total = 0;
1632 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1633 // If the one of the successor block can early terminate (no-return),
1634 // we can end up with situation where out edge sum count is larger as
1635 // the source BB's count is collected by a post-dominated block.
1636 if (*UseBBInfo->Count > OutSum)
1637 Total = *UseBBInfo->Count - OutSum;
1638 setEdgeCount(UseBBInfo->OutEdges, Total);
1639 Changes = true;
1640 }
1641 if (UseBBInfo->UnknownCountInEdge == 1) {
1642 uint64_t Total = 0;
1643 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1644 if (*UseBBInfo->Count > InSum)
1645 Total = *UseBBInfo->Count - InSum;
1646 setEdgeCount(UseBBInfo->InEdges, Total);
1647 Changes = true;
1648 }
1649 }
1650 }
1651 }
1652
1653 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1654 (void)NumPasses;
1655#ifndef NDEBUG
1656 // Assert every BB has a valid counter.
1657 for (auto &BB : F) {
1658 auto BI = findBBInfo(&BB);
1659 if (BI == nullptr)
1660 continue;
1661 assert(BI->Count && "BB count is not valid");
1662 }
1663#endif
1664 // Now annotate select instructions. This may fixup impossible block counts.
1665 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1666 assert(CountPosition == ProfileCountSize);
1667
1668 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1669 uint64_t FuncMaxCount = FuncEntryCount;
1670 for (auto &BB : F) {
1671 auto BI = findBBInfo(&BB);
1672 if (BI == nullptr)
1673 continue;
1674 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1675 }
1676
1677 // Fix the obviously inconsistent entry count.
1678 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1679 FuncEntryCount = 1;
1681 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1682
1683 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1684}
1685
1686// Assign the scaled count values to the BB with multiple out edges.
1687void PGOUseFunc::setBranchWeights() {
1688 // Generate MD_prof metadata for every branch instruction.
1689 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1690 << " IsCS=" << IsCS << "\n");
1691 for (auto &BB : F) {
1692 Instruction *TI = BB.getTerminator();
1693 if (TI->getNumSuccessors() < 2)
1694 continue;
1695 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1697 isa<CallBrInst>(TI)))
1698 continue;
1699
1700 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1701 if (!*BBCountInfo.Count)
1702 continue;
1703
1704 // We have a non-zero Branch BB.
1705
1706 // SuccessorCount can be greater than OutEdgesCount, because
1707 // removed edges don't appear in OutEdges.
1708 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1709 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1710 assert(OutEdgesCount <= SuccessorCount);
1711
1712 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1713 uint64_t MaxCount = 0;
1714 for (unsigned It = 0; It < OutEdgesCount; It++) {
1715 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1716 const BasicBlock *SrcBB = E->SrcBB;
1717 const BasicBlock *DestBB = E->DestBB;
1718 if (DestBB == nullptr)
1719 continue;
1720 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1721 uint64_t EdgeCount = *E->Count;
1722 if (EdgeCount > MaxCount)
1723 MaxCount = EdgeCount;
1724 EdgeCounts[SuccNum] = EdgeCount;
1725 }
1726
1727 if (MaxCount)
1728 setProfMetadata(TI, EdgeCounts, MaxCount);
1729 else {
1730 // A zero MaxCount can come about when we have a BB with a positive
1731 // count, and whose successor blocks all have 0 count. This can happen
1732 // when there is no exit block and the code exits via a noreturn function.
1733 auto &Ctx = M->getContext();
1734 Ctx.diagnose(DiagnosticInfoPGOProfile(
1735 M->getName().data(),
1736 Twine("Profile in ") + F.getName().str() +
1737 Twine(" partially ignored") +
1738 Twine(", possibly due to the lack of a return path."),
1739 DS_Warning));
1740 }
1741 }
1742}
1743
1745 for (BasicBlock *Pred : predecessors(BB)) {
1746 if (isa<IndirectBrInst>(Pred->getTerminator()))
1747 return true;
1748 }
1749 return false;
1750}
1751
1752void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1753 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1754 // Find irr loop headers
1755 for (auto &BB : F) {
1756 // As a heuristic also annotate indrectbr targets as they have a high chance
1757 // to become an irreducible loop header after the indirectbr tail
1758 // duplication.
1759 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1760 Instruction *TI = BB.getTerminator();
1761 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1762 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1763 }
1764 }
1765}
1766
1767void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1768 Module *M = F.getParent();
1769 IRBuilder<> Builder(&SI);
1770 Type *Int64Ty = Builder.getInt64Ty();
1771 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1772 auto *NormalizedFuncNameVarPtr =
1774 FuncNameVar, PointerType::get(M->getContext(), 0));
1775 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1776 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1777 Builder.getInt32(TotalNumCtrs),
1778 Builder.getInt32(*CurCtrIdx), Step});
1779 ++(*CurCtrIdx);
1780}
1781
1782void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1783 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1784 assert(*CurCtrIdx < CountFromProfile.size() &&
1785 "Out of bound access of counters");
1786 uint64_t SCounts[2];
1787 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1788 ++(*CurCtrIdx);
1789 uint64_t TotalCount = 0;
1790 auto BI = UseFunc->findBBInfo(SI.getParent());
1791 if (BI != nullptr) {
1792 TotalCount = *BI->Count;
1793
1794 // Fix the block count if it is impossible.
1795 if (TotalCount < SCounts[0])
1796 BI->Count = SCounts[0];
1797 }
1798 // False Count
1799 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1800 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1801 if (MaxCount)
1802 setProfMetadata(&SI, SCounts, MaxCount);
1803}
1804
1805void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1806 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1807 return;
1808 // FIXME: do not handle this yet.
1809 if (SI.getCondition()->getType()->isVectorTy())
1810 return;
1811
1812 switch (Mode) {
1813 case VM_counting:
1814 NSIs++;
1815 return;
1816 case VM_instrument:
1817 instrumentOneSelectInst(SI);
1818 return;
1819 case VM_annotate:
1820 annotateOneSelectInst(SI);
1821 return;
1822 }
1823
1824 llvm_unreachable("Unknown visiting mode");
1825}
1826
1828 if (ValueProfKind == IPVK_MemOPSize)
1830 if (ValueProfKind == llvm::IPVK_VTableTarget)
1832 return MaxNumAnnotations;
1833}
1834
1835// Traverse all valuesites and annotate the instructions for all value kind.
1836void PGOUseFunc::annotateValueSites() {
1838 return;
1839
1840 // Create the PGOFuncName meta data.
1841 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1842
1843 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1844 annotateValueSites(Kind);
1845}
1846
1847// Annotate the instructions for a specific value kind.
1848void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1849 assert(Kind <= IPVK_Last);
1850 unsigned ValueSiteIndex = 0;
1851
1852 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1853
1854 // Since there isn't a reliable or fast way for profile reader to tell if a
1855 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1856 // value profile collector over the function IR to find the instrumented sites
1857 // iff function profile records shows the number of instrumented vtable sites
1858 // is not zero. Function cfg already takes the number of instrumented
1859 // indirect call sites into account so it doesn't hash the number of
1860 // instrumented vtables; as a side effect it makes it easier to enable
1861 // profiling and profile use in two steps if needed.
1862 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1863 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1864 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1866 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1867 auto &ValueSites = FuncInfo.ValueSites[Kind];
1868 if (NumValueSites != ValueSites.size()) {
1869 auto &Ctx = M->getContext();
1870 Ctx.diagnose(DiagnosticInfoPGOProfile(
1871 M->getName().data(),
1872 Twine("Inconsistent number of value sites for ") +
1873 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1874 F.getName().str() +
1875 Twine("\", possibly due to the use of a stale profile."),
1876 DS_Warning));
1877 return;
1878 }
1879
1880 for (VPCandidateInfo &I : ValueSites) {
1881 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1882 << "): Index = " << ValueSiteIndex << " out of "
1883 << NumValueSites << "\n");
1885 *M, *I.AnnotatedInst, ProfileRecord,
1886 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1887 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1888 ValueSiteIndex++;
1889 }
1890}
1891
1892// Collect the set of members for each Comdat in module M and store
1893// in ComdatMembers.
1895 Module &M,
1896 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1897 if (!DoComdatRenaming)
1898 return;
1899 for (Function &F : M)
1900 if (Comdat *C = F.getComdat())
1901 ComdatMembers.insert(std::make_pair(C, &F));
1902 for (GlobalVariable &GV : M.globals())
1903 if (Comdat *C = GV.getComdat())
1904 ComdatMembers.insert(std::make_pair(C, &GV));
1905 for (GlobalAlias &GA : M.aliases())
1906 if (Comdat *C = GA.getComdat())
1907 ComdatMembers.insert(std::make_pair(C, &GA));
1908}
1909
1910// Return true if we should not find instrumentation data for this function
1911static bool skipPGOUse(const Function &F) {
1912 if (F.isDeclaration())
1913 return true;
1914 // If there are too many critical edges, PGO might cause
1915 // compiler time problem. Skip PGO if the number of
1916 // critical edges execeed the threshold.
1917 unsigned NumCriticalEdges = 0;
1918 for (auto &BB : F) {
1919 const Instruction *TI = BB.getTerminator();
1920 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1921 if (isCriticalEdge(TI, I))
1922 NumCriticalEdges++;
1923 }
1924 }
1925 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1926 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1927 << ", NumCriticalEdges=" << NumCriticalEdges
1928 << " exceed the threshold. Skip PGO.\n");
1929 return true;
1930 }
1931 return false;
1932}
1933
1934// Return true if we should not instrument this function
1935static bool skipPGOGen(const Function &F) {
1936 if (skipPGOUse(F))
1937 return true;
1938 if (F.hasFnAttribute(llvm::Attribute::Naked))
1939 return true;
1940 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1941 return true;
1942 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1943 return true;
1944 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1945 return true;
1947 if (auto EntryCount = F.getEntryCount())
1948 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1949 return !PGOTreatUnknownAsCold;
1950 }
1951 return false;
1952}
1953
1955 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1958 function_ref<LoopInfo *(Function &)> LookupLI,
1959 PGOInstrumentationType InstrumentationType) {
1960 // For the context-sensitve instrumentation, we should have a separated pass
1961 // (before LTO/ThinLTO linking) to create these variables.
1962 if (InstrumentationType == PGOInstrumentationType::FDO)
1963 createIRLevelProfileFlagVar(M, InstrumentationType);
1964
1965 Triple TT(M.getTargetTriple());
1966 LLVMContext &Ctx = M.getContext();
1967 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1969 M.getName().data(),
1970 Twine("VTable value profiling is presently not "
1971 "supported for non-ELF object formats"),
1972 DS_Warning));
1973 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1974 collectComdatMembers(M, ComdatMembers);
1975
1976 for (auto &F : M) {
1977 if (skipPGOGen(F))
1978 continue;
1979 TargetLibraryInfo &TLI = LookupTLI(F);
1980 BranchProbabilityInfo *BPI = LookupBPI(F);
1981 BlockFrequencyInfo *BFI = LookupBFI(F);
1982 LoopInfo *LI = LookupLI(F);
1983 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
1984 InstrumentationType);
1985 FI.instrument();
1986 }
1987 return true;
1988}
1989
1990PreservedAnalyses
1992 createProfileFileNameVar(M, CSInstrName);
1993 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1994 // will be retained.
1997 if (ProfileSampling)
2002 return PA;
2003}
2004
2007 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2008 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2009 return FAM.getResult<TargetLibraryAnalysis>(F);
2010 };
2011 auto LookupBPI = [&FAM](Function &F) {
2012 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2013 };
2014 auto LookupBFI = [&FAM](Function &F) {
2015 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2016 };
2017 auto LookupLI = [&FAM](Function &F) {
2018 return &FAM.getResult<LoopAnalysis>(F);
2019 };
2020
2021 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2022 InstrumentationType))
2023 return PreservedAnalyses::all();
2024
2025 return PreservedAnalyses::none();
2026}
2027
2028// Using the ratio b/w sums of profile count values and BFI count values to
2029// adjust the func entry count.
2030static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2031 BranchProbabilityInfo &NBPI) {
2032 Function &F = Func.getFunc();
2033 BlockFrequencyInfo NBFI(F, NBPI, LI);
2034#ifndef NDEBUG
2035 auto BFIEntryCount = F.getEntryCount();
2036 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
2037 "Invalid BFI Entrycount");
2038#endif
2039 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2040 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2041 for (auto &BBI : F) {
2042 uint64_t CountValue = 0;
2043 uint64_t BFICountValue = 0;
2044 if (!Func.findBBInfo(&BBI))
2045 continue;
2046 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2047 CountValue = *Func.getBBInfo(&BBI).Count;
2048 BFICountValue = *BFICount;
2049 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2050 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2051 }
2052 if (SumCount.isZero())
2053 return;
2054
2055 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2056 "Incorrect sum of BFI counts");
2057 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2058 return;
2059 double Scale = (SumCount / SumBFICount).convertToDouble();
2060 if (Scale < 1.001 && Scale > 0.999)
2061 return;
2062
2063 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2064 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2065 if (NewEntryCount == 0)
2066 NewEntryCount = 1;
2067 if (NewEntryCount != FuncEntryCount) {
2068 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2069 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2070 << ", entry_count " << FuncEntryCount << " --> "
2071 << NewEntryCount << "\n");
2072 }
2073}
2074
2075// Compare the profile count values with BFI count values, and print out
2076// the non-matching ones.
2077static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2079 uint64_t HotCountThreshold,
2081 Function &F = Func.getFunc();
2082 BlockFrequencyInfo NBFI(F, NBPI, LI);
2083 // bool PrintFunc = false;
2084 bool HotBBOnly = PGOVerifyHotBFI;
2085 StringRef Msg;
2087
2088 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2089 for (auto &BBI : F) {
2090 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2091 if (!BBInfo)
2092 continue;
2093
2094 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2095 uint64_t BFICountValue = 0;
2096
2097 BBNum++;
2098 if (CountValue)
2099 NonZeroBBNum++;
2100 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2101 if (BFICount)
2102 BFICountValue = *BFICount;
2103
2104 if (HotBBOnly) {
2105 bool rawIsHot = CountValue >= HotCountThreshold;
2106 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2107 bool rawIsCold = CountValue <= ColdCountThreshold;
2108 bool ShowCount = false;
2109 if (rawIsHot && !BFIIsHot) {
2110 Msg = "raw-Hot to BFI-nonHot";
2111 ShowCount = true;
2112 } else if (rawIsCold && BFIIsHot) {
2113 Msg = "raw-Cold to BFI-Hot";
2114 ShowCount = true;
2115 }
2116 if (!ShowCount)
2117 continue;
2118 } else {
2119 if ((CountValue < PGOVerifyBFICutoff) &&
2120 (BFICountValue < PGOVerifyBFICutoff))
2121 continue;
2122 uint64_t Diff = (BFICountValue >= CountValue)
2123 ? BFICountValue - CountValue
2124 : CountValue - BFICountValue;
2125 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2126 continue;
2127 }
2128 BBMisMatchNum++;
2129
2130 ORE.emit([&]() {
2132 F.getSubprogram(), &BBI);
2133 Remark << "BB " << ore::NV("Block", BBI.getName())
2134 << " Count=" << ore::NV("Count", CountValue)
2135 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2136 if (!Msg.empty())
2137 Remark << " (" << Msg << ")";
2138 return Remark;
2139 });
2140 }
2141 if (BBMisMatchNum)
2142 ORE.emit([&]() {
2143 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2144 F.getSubprogram(), &F.getEntryBlock())
2145 << "In Func " << ore::NV("Function", F.getName())
2146 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2147 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2148 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2149 });
2150}
2151
2153 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2154 vfs::FileSystem &FS,
2155 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2158 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2159 bool IsCS) {
2160 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2161 auto &Ctx = M.getContext();
2162 // Read the counter array from file.
2163 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2164 ProfileRemappingFileName);
2165 if (Error E = ReaderOrErr.takeError()) {
2166 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2167 Ctx.diagnose(
2168 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2169 });
2170 return false;
2171 }
2172
2173 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2174 std::move(ReaderOrErr.get());
2175 if (!PGOReader) {
2176 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2177 StringRef("Cannot get PGOReader")));
2178 return false;
2179 }
2180 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2181 return false;
2182
2183 // TODO: might need to change the warning once the clang option is finalized.
2184 if (!PGOReader->isIRLevelProfile()) {
2185 Ctx.diagnose(DiagnosticInfoPGOProfile(
2186 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2187 return false;
2188 }
2189 if (PGOReader->functionEntryOnly()) {
2190 Ctx.diagnose(DiagnosticInfoPGOProfile(
2191 ProfileFileName.data(),
2192 "Function entry profiles are not yet supported for optimization"));
2193 return false;
2194 }
2195
2197 for (GlobalVariable &G : M.globals()) {
2198 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2199 continue;
2200
2201 // Create the PGOFuncName meta data.
2202 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2203 }
2204 }
2205
2206 // Add the profile summary (read from the header of the indexed summary) here
2207 // so that we can use it below when reading counters (which checks if the
2208 // function should be marked with a cold or inlinehint attribute).
2209 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2212 PSI->refresh();
2213
2214 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2215 collectComdatMembers(M, ComdatMembers);
2216 std::vector<Function *> HotFunctions;
2217 std::vector<Function *> ColdFunctions;
2218
2219 // If the profile marked as always instrument the entry BB, do the
2220 // same. Note this can be overwritten by the internal option in CFGMST.h
2221 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2222 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2223 InstrumentFuncEntry = PGOInstrumentEntry;
2224 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2225 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2226 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2227
2228 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2229 for (auto &F : M) {
2230 if (skipPGOUse(F))
2231 continue;
2232 TargetLibraryInfo &TLI = LookupTLI(F);
2233 BranchProbabilityInfo *BPI = LookupBPI(F);
2234 BlockFrequencyInfo *BFI = LookupBFI(F);
2235 LoopInfo *LI = LookupLI(F);
2236 if (!HasSingleByteCoverage) {
2237 // Split indirectbr critical edges here before computing the MST rather
2238 // than later in getInstrBB() to avoid invalidating it.
2239 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2240 BFI);
2241 }
2242 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2243 InstrumentFuncEntry, InstrumentLoopEntries,
2244 HasSingleByteCoverage);
2245 if (!Func.getRecord(PGOReader.get()))
2246 continue;
2247 if (HasSingleByteCoverage) {
2248 Func.populateCoverage();
2249 continue;
2250 }
2251 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2252 // it means the profile for the function is unrepresentative and this
2253 // function is actually hot / warm. We will reset the function hot / cold
2254 // attribute and drop all the profile counters.
2256 bool AllZeros = false;
2257 if (!Func.readCounters(AllZeros, PseudoKind))
2258 continue;
2259 if (AllZeros) {
2260 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2261 if (Func.getProgramMaxCount() != 0)
2262 ColdFunctions.push_back(&F);
2263 continue;
2264 }
2265 if (PseudoKind != InstrProfRecord::NotPseudo) {
2266 // Clear function attribute cold.
2267 if (F.hasFnAttribute(Attribute::Cold))
2268 F.removeFnAttr(Attribute::Cold);
2269 // Set function attribute as hot.
2270 if (PseudoKind == InstrProfRecord::PseudoHot)
2271 F.addFnAttr(Attribute::Hot);
2272 continue;
2273 }
2274 Func.populateCounters();
2275 Func.setBranchWeights();
2276 Func.annotateValueSites();
2277 Func.annotateIrrLoopHeaderWeights();
2278 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2279 if (FreqAttr == PGOUseFunc::FFA_Cold)
2280 ColdFunctions.push_back(&F);
2281 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2282 HotFunctions.push_back(&F);
2283 if (PGOViewCounts != PGOVCT_None &&
2284 (ViewBlockFreqFuncName.empty() ||
2285 F.getName() == ViewBlockFreqFuncName)) {
2287 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2288 std::make_unique<BranchProbabilityInfo>(F, LI);
2289 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2290 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2292 NewBFI->view();
2293 else if (PGOViewCounts == PGOVCT_Text) {
2294 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2295 NewBFI->print(dbgs());
2296 }
2297 }
2299 (ViewBlockFreqFuncName.empty() ||
2300 F.getName() == ViewBlockFreqFuncName)) {
2302 if (ViewBlockFreqFuncName.empty())
2303 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2304 else
2305 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2306 else if (PGOViewRawCounts == PGOVCT_Text) {
2307 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2308 Func.dumpInfo();
2309 }
2310 }
2311
2314 BranchProbabilityInfo NBPI(F, LI);
2315
2316 // Fix func entry count.
2317 if (PGOFixEntryCount)
2318 fixFuncEntryCount(Func, LI, NBPI);
2319
2320 // Verify BlockFrequency information.
2321 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2322 if (PGOVerifyHotBFI) {
2323 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2325 }
2326 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2327 }
2328 }
2329
2330 // Set function hotness attribute from the profile.
2331 // We have to apply these attributes at the end because their presence
2332 // can affect the BranchProbabilityInfo of any callers, resulting in an
2333 // inconsistent MST between prof-gen and prof-use.
2334 for (auto &F : HotFunctions) {
2335 F->addFnAttr(Attribute::InlineHint);
2336 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2337 << "\n");
2338 }
2339 for (auto &F : ColdFunctions) {
2340 // Only set when there is no Attribute::Hot set by the user. For Hot
2341 // attribute, user's annotation has the precedence over the profile.
2342 if (F->hasFnAttribute(Attribute::Hot)) {
2343 auto &Ctx = M.getContext();
2344 std::string Msg = std::string("Function ") + F->getName().str() +
2345 std::string(" is annotated as a hot function but"
2346 " the profile is cold");
2347 Ctx.diagnose(
2348 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2349 continue;
2350 }
2351 F->addFnAttr(Attribute::Cold);
2352 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2353 << "\n");
2354 }
2355 return true;
2356}
2357
2359 std::string Filename, std::string RemappingFilename, bool IsCS,
2361 : ProfileFileName(std::move(Filename)),
2362 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2363 FS(std::move(VFS)) {
2364 if (!PGOTestProfileFile.empty())
2365 ProfileFileName = PGOTestProfileFile;
2367 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2368 if (!FS)
2370}
2371
2374
2375 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2376 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2377 return FAM.getResult<TargetLibraryAnalysis>(F);
2378 };
2379 auto LookupBPI = [&FAM](Function &F) {
2380 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2381 };
2382 auto LookupBFI = [&FAM](Function &F) {
2383 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2384 };
2385 auto LookupLI = [&FAM](Function &F) {
2386 return &FAM.getResult<LoopAnalysis>(F);
2387 };
2388
2389 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2390 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2391 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2392 IsCS))
2393 return PreservedAnalyses::all();
2394
2395 return PreservedAnalyses::none();
2396}
2397
2398static std::string getSimpleNodeName(const BasicBlock *Node) {
2399 if (!Node->getName().empty())
2400 return Node->getName().str();
2401
2402 std::string SimpleNodeName;
2403 raw_string_ostream OS(SimpleNodeName);
2404 Node->printAsOperand(OS, false);
2405 return SimpleNodeName;
2406}
2407
2409 uint64_t MaxCount) {
2410 auto Weights = downscaleWeights(EdgeCounts, MaxCount);
2411
2412 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2413 : Weights) {
2414 dbgs() << W << " ";
2415 } dbgs() << "\n";);
2416
2417 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2418
2419 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2421 std::string BrCondStr = getBranchCondString(TI);
2422 if (BrCondStr.empty())
2423 return;
2424
2425 uint64_t WSum =
2426 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2427 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2428 uint64_t TotalCount =
2429 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2430 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2431 uint64_t Scale = calculateCountScale(WSum);
2432 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2433 scaleBranchCount(WSum, Scale));
2434 std::string BranchProbStr;
2435 raw_string_ostream OS(BranchProbStr);
2436 OS << BP;
2437 OS << " (total count : " << TotalCount << ")";
2438 Function *F = TI->getParent()->getParent();
2440 ORE.emit([&]() {
2441 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2442 << BrCondStr << " is true with probability : " << BranchProbStr;
2443 });
2444 }
2445}
2446
2447namespace llvm {
2448
2450 MDBuilder MDB(M->getContext());
2451 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2453}
2454
2455template <> struct GraphTraits<PGOUseFunc *> {
2456 using NodeRef = const BasicBlock *;
2459
2460 static NodeRef getEntryNode(const PGOUseFunc *G) {
2461 return &G->getFunc().front();
2462 }
2463
2465 return succ_begin(N);
2466 }
2467
2468 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2469
2470 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2471 return nodes_iterator(G->getFunc().begin());
2472 }
2473
2474 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2475 return nodes_iterator(G->getFunc().end());
2476 }
2477};
2478
2479template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2480 explicit DOTGraphTraits(bool isSimple = false)
2482
2483 static std::string getGraphName(const PGOUseFunc *G) {
2484 return std::string(G->getFunc().getName());
2485 }
2486
2487 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2488 std::string Result;
2489 raw_string_ostream OS(Result);
2490
2491 OS << getSimpleNodeName(Node) << ":\\l";
2492 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2493 OS << "Count : ";
2494 if (BI && BI->Count)
2495 OS << *BI->Count << "\\l";
2496 else
2497 OS << "Unknown\\l";
2498
2499 if (!PGOInstrSelect)
2500 return Result;
2501
2502 for (const Instruction &I : *Node) {
2503 if (!isa<SelectInst>(&I))
2504 continue;
2505 // Display scaled counts for SELECT instruction:
2506 OS << "SELECT : { T = ";
2507 uint64_t TC, FC;
2508 bool HasProf = extractBranchWeights(I, TC, FC);
2509 if (!HasProf)
2510 OS << "Unknown, F = Unknown }\\l";
2511 else
2512 OS << TC << ", F = " << FC << " }\\l";
2513 }
2514 return Result;
2515 }
2516};
2517
2518} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INSTR_PROF_QUOTE(x)
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_DBG_CORRELATE
#define INSTR_PROF_RAW_VERSION
#define INSTR_PROF_RAW_VERSION_VAR
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
ValueProfileCollector::CandidateInfo VPCandidateInfo
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
StringSet - A set-like wrapper for the StringMap.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1079
Class for arbitrary precision integers.
Definition APInt.h:78
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition CFGMST.h:304
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition CFGMST.h:341
size_t numEdges() const
Definition CFGMST.h:347
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
LLVM_ABI StringRef getName() const
Definition Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
SelectionKind getSelectionKind() const
Definition Comdat.h:47
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Class to represent profile counts.
Definition Function.h:297
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:597
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ProtectedVisibility
The GV is protected.
Definition GlobalValue.h:70
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
This instruction compares its operands according to the predicate given to the constructor.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Base class for instruction visitors.
Definition InstVisitor.h:78
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition InstrProf.h:465
std::string message() const override
Return the error message as a string.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition CRC.h:53
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Definition CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
Definition MDBuilder.cpp:21
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Metadata node.
Definition Metadata.h:1078
Tuple of metadata.
Definition Metadata.h:1497
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
Definition User.h:232
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
void write64le(void *P, uint64_t V)
Definition Endian.h:478
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
LLVM_ABI cl::opt< bool > DebugInfoCorrelate
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:80
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FuncHash
Definition InstrProf.h:78
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
FunctionAddr NumCounters
Definition InstrProf.h:91
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr NumValueSites[IPVK_Last+1]
Definition InstrProf.h:93
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
InstrProfValueKind
Definition InstrProf.h:312
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:96
cl::opt< bool > PGOInstrumentColdFunctionOnly
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
TinyPtrVector< BasicBlock * > ColorVector
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
auto predecessors(const MachineBasicBlock *BB)
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition CFG.h:244
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DefaultDOTGraphTraits(bool simple=false)
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
pointer_iterator< Function::const_iterator > nodes_iterator
std::vector< uint64_t > Counts
Definition InstrProf.h:896
CountPseudoKind getCountPseudoKind() const
Definition InstrProf.h:994
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition InstrProf.h:1096
static void setCSFlagInHash(uint64_t &FuncHash)
Definition InstrProf.h:1077