LLVM 20.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
117#include <algorithm>
118#include <cassert>
119#include <cstdint>
120#include <memory>
121#include <numeric>
122#include <optional>
123#include <stack>
124#include <string>
125#include <unordered_map>
126#include <utility>
127#include <vector>
128
129using namespace llvm;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
163 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is"
166 "mainly for test purpose."));
168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
182 "icp-max-annotations", cl::init(3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
189 "memop-max-annotations", cl::init(4), cl::Hidden,
190 cl::desc("Max number of preicise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
196 "do-comdat-renaming", cl::init(false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224} // namespace llvm
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
264 "pgo-function-entry-coverage", cl::Hidden,
265 cl::desc(
266 "Use this option to enable function entry coverage instrumentation."));
267
269 "pgo-block-coverage",
270 cl::desc("Use this option to enable basic block coverage instrumentation"));
271
272static cl::opt<bool>
273 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274 cl::desc("Create a dot file of CFGs with block "
275 "coverage inference information"));
276
278 "pgo-temporal-instrumentation",
279 cl::desc("Use this option to enable temporal instrumentation"));
280
281static cl::opt<bool>
282 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283 cl::desc("Fix function entry count in profile use."));
284
286 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287 cl::desc("Print out the non-match BFI count if a hot raw profile count "
288 "becomes non-hot, or a cold raw profile count becomes hot. "
289 "The print is enabled under -Rpass-analysis=pgo, or "
290 "internal option -pass-remakrs-analysis=pgo."));
291
293 "pgo-verify-bfi", cl::init(false), cl::Hidden,
294 cl::desc("Print out mismatched BFI counts after setting profile metadata "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remakrs-analysis=pgo."));
297
299 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
301 "mismatched BFI if the difference percentage is greater than "
302 "this value (in percentage)."));
303
305 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307 "profile count value is below."));
308
310 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311 cl::value_desc("function name"),
312 cl::desc("Trace the hash of the function with this name."));
313
315 "pgo-function-size-threshold", cl::Hidden,
316 cl::desc("Do not instrument functions smaller than this threshold."));
317
319 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320 cl::desc("Do not instrument functions with the number of critical edges "
321 " greater than this threshold."));
322
324
325namespace llvm {
326// Command line option to turn on CFG dot dump after profile annotation.
327// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
329
330// Command line option to specify the name of the function for CFG dump
331// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
333
334// Command line option to enable vtable value profiling. Defined in
335// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
339} // namespace llvm
340
342 return PGOInstrumentEntry ||
344}
345
346// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
347// profiling implicitly captures indirect call cases, but not other values.
348// Supporting other values is relatively straight-forward - just another counter
349// range within the context.
351 return DisableValueProfiling ||
353}
354
355// Return a string describing the branch condition that can be
356// used in static branch probability heuristics:
357static std::string getBranchCondString(Instruction *TI) {
358 BranchInst *BI = dyn_cast<BranchInst>(TI);
359 if (!BI || !BI->isConditional())
360 return std::string();
361
362 Value *Cond = BI->getCondition();
363 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
364 if (!CI)
365 return std::string();
366
367 std::string result;
368 raw_string_ostream OS(result);
369 OS << CI->getPredicate() << "_";
370 CI->getOperand(0)->getType()->print(OS, true);
371
372 Value *RHS = CI->getOperand(1);
373 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
374 if (CV) {
375 if (CV->isZero())
376 OS << "_Zero";
377 else if (CV->isOne())
378 OS << "_One";
379 else if (CV->isMinusOne())
380 OS << "_MinusOne";
381 else
382 OS << "_Const";
383 }
384 OS.flush();
385 return result;
386}
387
388static const char *ValueProfKindDescr[] = {
389#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
391};
392
393// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
394// aware this is an ir_level profile so it can set the version flag.
396 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
397 Type *IntTy64 = Type::getInt64Ty(M.getContext());
398 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
399 if (IsCS)
400 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
402 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
404 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
406 ProfileVersion |=
407 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
409 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
411 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
412 auto IRLevelVersionVariable = new GlobalVariable(
413 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
414 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
415 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
416 Triple TT(M.getTargetTriple());
417 if (TT.supportsCOMDAT()) {
418 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
419 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
420 }
421 return IRLevelVersionVariable;
422}
423
424namespace {
425
426/// The select instruction visitor plays three roles specified
427/// by the mode. In \c VM_counting mode, it simply counts the number of
428/// select instructions. In \c VM_instrument mode, it inserts code to count
429/// the number times TrueValue of select is taken. In \c VM_annotate mode,
430/// it reads the profile data and annotate the select instruction with metadata.
431enum VisitMode { VM_counting, VM_instrument, VM_annotate };
432class PGOUseFunc;
433
434/// Instruction Visitor class to visit select instructions.
435struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
436 Function &F;
437 unsigned NSIs = 0; // Number of select instructions instrumented.
438 VisitMode Mode = VM_counting; // Visiting mode.
439 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
440 unsigned TotalNumCtrs = 0; // Total number of counters
441 GlobalVariable *FuncNameVar = nullptr;
442 uint64_t FuncHash = 0;
443 PGOUseFunc *UseFunc = nullptr;
444 bool HasSingleByteCoverage;
445
446 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
447 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
448
449 void countSelects() {
450 NSIs = 0;
451 Mode = VM_counting;
452 visit(F);
453 }
454
455 // Visit the IR stream and instrument all select instructions. \p
456 // Ind is a pointer to the counter index variable; \p TotalNC
457 // is the total number of counters; \p FNV is the pointer to the
458 // PGO function name var; \p FHash is the function hash.
459 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
460 uint64_t FHash) {
461 Mode = VM_instrument;
462 CurCtrIdx = Ind;
463 TotalNumCtrs = TotalNC;
464 FuncHash = FHash;
465 FuncNameVar = FNV;
466 visit(F);
467 }
468
469 // Visit the IR stream and annotate all select instructions.
470 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
471 Mode = VM_annotate;
472 UseFunc = UF;
473 CurCtrIdx = Ind;
474 visit(F);
475 }
476
477 void instrumentOneSelectInst(SelectInst &SI);
478 void annotateOneSelectInst(SelectInst &SI);
479
480 // Visit \p SI instruction and perform tasks according to visit mode.
481 void visitSelectInst(SelectInst &SI);
482
483 // Return the number of select instructions. This needs be called after
484 // countSelects().
485 unsigned getNumOfSelectInsts() const { return NSIs; }
486};
487
488/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
489/// based instrumentation.
490/// Note that the CFG can be a multi-graph. So there might be multiple edges
491/// with the same SrcBB and DestBB.
492struct PGOEdge {
493 BasicBlock *SrcBB;
494 BasicBlock *DestBB;
495 uint64_t Weight;
496 bool InMST = false;
497 bool Removed = false;
498 bool IsCritical = false;
499
500 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
501 : SrcBB(Src), DestBB(Dest), Weight(W) {}
502
503 /// Return the information string of an edge.
504 std::string infoString() const {
505 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
506 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
507 .str();
508 }
509};
510
511/// This class stores the auxiliary information for each BB in the MST.
512struct PGOBBInfo {
513 PGOBBInfo *Group;
515 uint32_t Rank = 0;
516
517 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
518
519 /// Return the information string of this object.
520 std::string infoString() const {
521 return (Twine("Index=") + Twine(Index)).str();
522 }
523};
524
525// This class implements the CFG edges. Note the CFG can be a multi-graph.
526template <class Edge, class BBInfo> class FuncPGOInstrumentation {
527private:
528 Function &F;
529
530 // Is this is context-sensitive instrumentation.
531 bool IsCS;
532
533 // A map that stores the Comdat group in function F.
534 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
535
537
538 void computeCFGHash();
539 void renameComdatFunction();
540
541public:
542 const TargetLibraryInfo &TLI;
543 std::vector<std::vector<VPCandidateInfo>> ValueSites;
544 SelectInstVisitor SIVisitor;
545 std::string FuncName;
546 std::string DeprecatedFuncName;
547 GlobalVariable *FuncNameVar;
548
549 // CFG hash value for this function.
550 uint64_t FunctionHash = 0;
551
552 // The Minimum Spanning Tree of function CFG.
554
555 const std::optional<BlockCoverageInference> BCI;
556
557 static std::optional<BlockCoverageInference>
558 constructBCI(Function &Func, bool HasSingleByteCoverage,
559 bool InstrumentFuncEntry) {
560 if (HasSingleByteCoverage)
561 return BlockCoverageInference(Func, InstrumentFuncEntry);
562 return {};
563 }
564
565 // Collect all the BBs that will be instrumented, and store them in
566 // InstrumentBBs.
567 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
568
569 // Give an edge, find the BB that will be instrumented.
570 // Return nullptr if there is no BB to be instrumented.
571 BasicBlock *getInstrBB(Edge *E);
572
573 // Return the auxiliary BB information.
574 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
575
576 // Return the auxiliary BB information if available.
577 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
578
579 // Dump edges and BB information.
580 void dumpInfo(StringRef Str = "") const {
581 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
582 " Hash: " + Twine(FunctionHash) + "\t" + Str);
583 }
584
585 FuncPGOInstrumentation(
586 Function &Func, TargetLibraryInfo &TLI,
587 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
588 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
589 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
590 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
591 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
592 TLI(TLI), ValueSites(IPVK_Last + 1),
593 SIVisitor(Func, HasSingleByteCoverage),
594 MST(F, InstrumentFuncEntry, BPI, BFI),
595 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
596 if (BCI && PGOViewBlockCoverageGraph)
597 BCI->viewBlockCoverageGraph();
598 // This should be done before CFG hash computation.
599 SIVisitor.countSelects();
600 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
601 if (!IsCS) {
602 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
603 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
604 NumOfPGOBB += MST.bbInfoSize();
605 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
607 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
608 } else {
609 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
610 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
611 NumOfCSPGOBB += MST.bbInfoSize();
612 }
613
614 FuncName = getIRPGOFuncName(F);
615 DeprecatedFuncName = getPGOFuncName(F);
616 computeCFGHash();
617 if (!ComdatMembers.empty())
618 renameComdatFunction();
619 LLVM_DEBUG(dumpInfo("after CFGMST"));
620
621 for (const auto &E : MST.allEdges()) {
622 if (E->Removed)
623 continue;
624 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
625 if (!E->InMST)
626 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
627 }
628
629 if (CreateGlobalVar)
630 FuncNameVar = createPGOFuncNameVar(F, FuncName);
631 }
632};
633
634} // end anonymous namespace
635
636// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
637// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
638// of selects, indirect calls, mem ops and edges.
639template <class Edge, class BBInfo>
640void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
641 std::vector<uint8_t> Indexes;
642 JamCRC JC;
643 for (auto &BB : F) {
644 for (BasicBlock *Succ : successors(&BB)) {
645 auto BI = findBBInfo(Succ);
646 if (BI == nullptr)
647 continue;
648 uint32_t Index = BI->Index;
649 for (int J = 0; J < 4; J++)
650 Indexes.push_back((uint8_t)(Index >> (J * 8)));
651 }
652 }
653 JC.update(Indexes);
654
655 JamCRC JCH;
656 // The higher 32 bits.
657 auto updateJCH = [&JCH](uint64_t Num) {
658 uint8_t Data[8];
660 JCH.update(Data);
661 };
662 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
663 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
664 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
665 if (BCI) {
666 updateJCH(BCI->getInstrumentedBlocksHash());
667 } else {
668 updateJCH((uint64_t)MST.numEdges());
669 }
670
671 // Hash format for context sensitive profile. Reserve 4 bits for other
672 // information.
673 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
674
675 // Reserve bit 60-63 for other information purpose.
676 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
677 if (IsCS)
679 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
680 << " CRC = " << JC.getCRC()
681 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
682 << ", Edges = " << MST.numEdges() << ", ICSites = "
683 << ValueSites[IPVK_IndirectCallTarget].size()
684 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
685 << ", High32 CRC = " << JCH.getCRC()
686 << ", Hash = " << FunctionHash << "\n";);
687
688 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
689 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
690 << " in building " << F.getParent()->getSourceFileName() << "\n";
691}
692
693// Check if we can safely rename this Comdat function.
694static bool canRenameComdat(
695 Function &F,
696 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
697 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
698 return false;
699
700 // FIXME: Current only handle those Comdat groups that only containing one
701 // function.
702 // (1) For a Comdat group containing multiple functions, we need to have a
703 // unique postfix based on the hashes for each function. There is a
704 // non-trivial code refactoring to do this efficiently.
705 // (2) Variables can not be renamed, so we can not rename Comdat function in a
706 // group including global vars.
707 Comdat *C = F.getComdat();
708 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
709 assert(!isa<GlobalAlias>(CM.second));
710 Function *FM = dyn_cast<Function>(CM.second);
711 if (FM != &F)
712 return false;
713 }
714 return true;
715}
716
717// Append the CFGHash to the Comdat function name.
718template <class Edge, class BBInfo>
719void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
720 if (!canRenameComdat(F, ComdatMembers))
721 return;
722 std::string OrigName = F.getName().str();
723 std::string NewFuncName =
724 Twine(F.getName() + "." + Twine(FunctionHash)).str();
725 F.setName(Twine(NewFuncName));
727 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
728 Comdat *NewComdat;
729 Module *M = F.getParent();
730 // For AvailableExternallyLinkage functions, change the linkage to
731 // LinkOnceODR and put them into comdat. This is because after renaming, there
732 // is no backup external copy available for the function.
733 if (!F.hasComdat()) {
735 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
737 F.setComdat(NewComdat);
738 return;
739 }
740
741 // This function belongs to a single function Comdat group.
742 Comdat *OrigComdat = F.getComdat();
743 std::string NewComdatName =
744 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
745 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
746 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
747
748 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
749 // Must be a function.
750 cast<Function>(CM.second)->setComdat(NewComdat);
751 }
752}
753
754/// Collect all the BBs that will be instruments and add them to
755/// `InstrumentBBs`.
756template <class Edge, class BBInfo>
757void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
758 std::vector<BasicBlock *> &InstrumentBBs) {
759 if (BCI) {
760 for (auto &BB : F)
761 if (BCI->shouldInstrumentBlock(BB))
762 InstrumentBBs.push_back(&BB);
763 return;
764 }
765
766 // Use a worklist as we will update the vector during the iteration.
767 std::vector<Edge *> EdgeList;
768 EdgeList.reserve(MST.numEdges());
769 for (const auto &E : MST.allEdges())
770 EdgeList.push_back(E.get());
771
772 for (auto &E : EdgeList) {
773 BasicBlock *InstrBB = getInstrBB(E);
774 if (InstrBB)
775 InstrumentBBs.push_back(InstrBB);
776 }
777}
778
779// Given a CFG E to be instrumented, find which BB to place the instrumented
780// code. The function will split the critical edge if necessary.
781template <class Edge, class BBInfo>
782BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
783 if (E->InMST || E->Removed)
784 return nullptr;
785
786 BasicBlock *SrcBB = E->SrcBB;
787 BasicBlock *DestBB = E->DestBB;
788 // For a fake edge, instrument the real BB.
789 if (SrcBB == nullptr)
790 return DestBB;
791 if (DestBB == nullptr)
792 return SrcBB;
793
794 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
795 // There are basic blocks (such as catchswitch) cannot be instrumented.
796 // If the returned first insertion point is the end of BB, skip this BB.
797 if (BB->getFirstInsertionPt() == BB->end())
798 return nullptr;
799 return BB;
800 };
801
802 // Instrument the SrcBB if it has a single successor,
803 // otherwise, the DestBB if this is not a critical edge.
804 Instruction *TI = SrcBB->getTerminator();
805 if (TI->getNumSuccessors() <= 1)
806 return canInstrument(SrcBB);
807 if (!E->IsCritical)
808 return canInstrument(DestBB);
809
810 // Some IndirectBr critical edges cannot be split by the previous
811 // SplitIndirectBrCriticalEdges call. Bail out.
812 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
813 BasicBlock *InstrBB =
814 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
815 if (!InstrBB) {
817 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
818 return nullptr;
819 }
820 // For a critical edge, we have to split. Instrument the newly
821 // created BB.
822 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
823 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
824 << " --> " << getBBInfo(DestBB).Index << "\n");
825 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
826 MST.addEdge(SrcBB, InstrBB, 0);
827 // Second one: Add new edge of InstrBB->DestBB.
828 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
829 NewEdge1.InMST = true;
830 E->Removed = true;
831
832 return canInstrument(InstrBB);
833}
834
835// When generating value profiling calls on Windows routines that make use of
836// handler funclets for exception processing an operand bundle needs to attached
837// to the called function. This routine will set \p OpBundles to contain the
838// funclet information, if any is needed, that should be placed on the generated
839// value profiling call for the value profile candidate call.
840static void
844 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
845 if (!OrigCall)
846 return;
847
848 if (!isa<IntrinsicInst>(OrigCall)) {
849 // The instrumentation call should belong to the same funclet as a
850 // non-intrinsic call, so just copy the operand bundle, if any exists.
851 std::optional<OperandBundleUse> ParentFunclet =
852 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
853 if (ParentFunclet)
854 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
855 } else {
856 // Intrinsics or other instructions do not get funclet information from the
857 // front-end. Need to use the BlockColors that was computed by the routine
858 // colorEHFunclets to determine whether a funclet is needed.
859 if (!BlockColors.empty()) {
860 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
861 assert(CV.size() == 1 && "non-unique color for block!");
862 Instruction *EHPad = CV.front()->getFirstNonPHI();
863 if (EHPad->isEHPad())
864 OpBundles.emplace_back("funclet", EHPad);
865 }
866 }
867}
868
869// Visit all edge and instrument the edges not in MST, and do value profiling.
870// Critical edges will be split.
874 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
875 bool IsCS) {
876 if (!PGOBlockCoverage) {
877 // Split indirectbr critical edges here before computing the MST rather than
878 // later in getInstrBB() to avoid invalidating it.
879 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
880 }
881
882 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
883 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
885
886 auto Name = FuncInfo.FuncNameVar;
887 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
888 FuncInfo.FunctionHash);
890 auto &EntryBB = F.getEntryBlock();
891 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
892 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
893 // i32 <index>)
894 Builder.CreateCall(
895 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
896 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
897 return;
898 }
899
900 std::vector<BasicBlock *> InstrumentBBs;
901 FuncInfo.getInstrumentBBs(InstrumentBBs);
902 unsigned NumCounters =
903 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
904
906 auto *CSIntrinsic =
907 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
908 // We want to count the instrumentable callsites, then instrument them. This
909 // is because the llvm.instrprof.callsite intrinsic has an argument (like
910 // the other instrprof intrinsics) capturing the total number of
911 // instrumented objects (counters, or callsites, in this case). In this
912 // case, we want that value so we can readily pass it to the compiler-rt
913 // APIs that may have to allocate memory based on the nr of callsites.
914 // The traversal logic is the same for both counting and instrumentation,
915 // just needs to be done in succession.
916 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
917 for (auto &BB : F)
918 for (auto &Instr : BB)
919 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
920 if ((CS->getCalledFunction() &&
921 CS->getCalledFunction()->isIntrinsic()) ||
922 dyn_cast<InlineAsm>(CS->getCalledOperand()))
923 continue;
924 Visitor(CS);
925 }
926 };
927 // First, count callsites.
928 uint32_t TotalNrCallsites = 0;
929 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
930
931 // Now instrument.
932 uint32_t CallsiteIndex = 0;
933 Visit([&](auto *CB) {
934 IRBuilder<> Builder(CB);
935 Builder.CreateCall(CSIntrinsic,
936 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
937 Builder.getInt32(CallsiteIndex++),
938 CB->getCalledOperand()});
939 });
940 }
941
942 uint32_t I = 0;
944 NumCounters += PGOBlockCoverage ? 8 : 1;
945 auto &EntryBB = F.getEntryBlock();
946 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
947 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
948 // i32 <index>)
949 Builder.CreateCall(
950 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
951 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
952 I += PGOBlockCoverage ? 8 : 1;
953 }
954
955 for (auto *InstrBB : InstrumentBBs) {
956 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
957 assert(Builder.GetInsertPoint() != InstrBB->end() &&
958 "Cannot get the Instrumentation point");
959 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
960 // i32 <index>)
961 Builder.CreateCall(
963 ? Intrinsic::instrprof_cover
964 : Intrinsic::instrprof_increment),
965 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
966 }
967
968 // Now instrument select instructions:
969 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
970 FuncInfo.FunctionHash);
971 assert(I == NumCounters);
972
974 return;
975
976 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
977
978 // Intrinsic function calls do not have funclet operand bundles needed for
979 // Windows exception handling attached to them. However, if value profiling is
980 // inserted for one of these calls, then a funclet value will need to be set
981 // on the instrumentation call based on the funclet coloring.
983 if (F.hasPersonalityFn() &&
984 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
985 BlockColors = colorEHFunclets(F);
986
987 // For each VP Kind, walk the VP candidates and instrument each one.
988 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
989 unsigned SiteIndex = 0;
990 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
991 continue;
992
993 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
994 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
995 << " site: CallSite Index = " << SiteIndex << "\n");
996
997 IRBuilder<> Builder(Cand.InsertPt);
998 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
999 "Cannot get the Instrumentation point");
1000
1001 Value *ToProfile = nullptr;
1002 if (Cand.V->getType()->isIntegerTy())
1003 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1004 else if (Cand.V->getType()->isPointerTy())
1005 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1006 assert(ToProfile && "value profiling Value is of unexpected type");
1007
1009 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1010 Builder.CreateCall(
1011 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1012 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1013 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1014 OpBundles);
1015 }
1016 } // IPVK_First <= Kind <= IPVK_Last
1017}
1018
1019namespace {
1020
1021// This class represents a CFG edge in profile use compilation.
1022struct PGOUseEdge : public PGOEdge {
1023 using PGOEdge::PGOEdge;
1024
1025 std::optional<uint64_t> Count;
1026
1027 // Set edge count value
1028 void setEdgeCount(uint64_t Value) { Count = Value; }
1029
1030 // Return the information string for this object.
1031 std::string infoString() const {
1032 if (!Count)
1033 return PGOEdge::infoString();
1034 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1035 }
1036};
1037
1038using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1039
1040// This class stores the auxiliary information for each BB.
1041struct PGOUseBBInfo : public PGOBBInfo {
1042 std::optional<uint64_t> Count;
1043 int32_t UnknownCountInEdge = 0;
1044 int32_t UnknownCountOutEdge = 0;
1045 DirectEdges InEdges;
1046 DirectEdges OutEdges;
1047
1048 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1049
1050 // Set the profile count value for this BB.
1051 void setBBInfoCount(uint64_t Value) { Count = Value; }
1052
1053 // Return the information string of this object.
1054 std::string infoString() const {
1055 if (!Count)
1056 return PGOBBInfo::infoString();
1057 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1058 }
1059
1060 // Add an OutEdge and update the edge count.
1061 void addOutEdge(PGOUseEdge *E) {
1062 OutEdges.push_back(E);
1063 UnknownCountOutEdge++;
1064 }
1065
1066 // Add an InEdge and update the edge count.
1067 void addInEdge(PGOUseEdge *E) {
1068 InEdges.push_back(E);
1069 UnknownCountInEdge++;
1070 }
1071};
1072
1073} // end anonymous namespace
1074
1075// Sum up the count values for all the edges.
1077 uint64_t Total = 0;
1078 for (const auto &E : Edges) {
1079 if (E->Removed)
1080 continue;
1081 if (E->Count)
1082 Total += *E->Count;
1083 }
1084 return Total;
1085}
1086
1087namespace {
1088
1089class PGOUseFunc {
1090public:
1091 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1092 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1094 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1095 bool HasSingleByteCoverage)
1096 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1097 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1098 InstrumentFuncEntry, HasSingleByteCoverage),
1099 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1100
1101 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1102
1103 // Read counts for the instrumented BB from profile.
1104 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1106
1107 // Populate the counts for all BBs.
1108 void populateCounters();
1109
1110 // Set block coverage based on profile coverage values.
1111 void populateCoverage(IndexedInstrProfReader *PGOReader);
1112
1113 // Set the branch weights based on the count values.
1114 void setBranchWeights();
1115
1116 // Annotate the value profile call sites for all value kind.
1117 void annotateValueSites();
1118
1119 // Annotate the value profile call sites for one value kind.
1120 void annotateValueSites(uint32_t Kind);
1121
1122 // Annotate the irreducible loop header weights.
1123 void annotateIrrLoopHeaderWeights();
1124
1125 // The hotness of the function from the profile count.
1126 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1127
1128 // Return the function hotness from the profile.
1129 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1130
1131 // Return the function hash.
1132 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1133
1134 // Return the profile record for this function;
1135 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1136
1137 // Return the auxiliary BB information.
1138 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1139 return FuncInfo.getBBInfo(BB);
1140 }
1141
1142 // Return the auxiliary BB information if available.
1143 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1144 return FuncInfo.findBBInfo(BB);
1145 }
1146
1147 Function &getFunc() const { return F; }
1148
1149 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1150
1151 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1152
1153private:
1154 Function &F;
1155 Module *M;
1157 ProfileSummaryInfo *PSI;
1158
1159 // This member stores the shared information with class PGOGenFunc.
1160 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1161
1162 // The maximum count value in the profile. This is only used in PGO use
1163 // compilation.
1164 uint64_t ProgramMaxCount;
1165
1166 // Position of counter that remains to be read.
1167 uint32_t CountPosition = 0;
1168
1169 // Total size of the profile count for this function.
1170 uint32_t ProfileCountSize = 0;
1171
1172 // ProfileRecord for this function.
1173 InstrProfRecord ProfileRecord;
1174
1175 // Function hotness info derived from profile.
1176 FuncFreqAttr FreqAttr;
1177
1178 // Is to use the context sensitive profile.
1179 bool IsCS;
1180
1182
1183 // Find the Instrumented BB and set the value. Return false on error.
1184 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1185
1186 // Set the edge counter value for the unknown edge -- there should be only
1187 // one unknown edge.
1188 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1189
1190 // Set the hot/cold inline hints based on the count values.
1191 // FIXME: This function should be removed once the functionality in
1192 // the inliner is implemented.
1193 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1194 if (PSI->isHotCount(EntryCount))
1195 FreqAttr = FFA_Hot;
1196 else if (PSI->isColdCount(MaxCount))
1197 FreqAttr = FFA_Cold;
1198 }
1199};
1200
1201} // end anonymous namespace
1202
1203/// Set up InEdges/OutEdges for all BBs in the MST.
1205 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1206 // This is not required when there is block coverage inference.
1207 if (FuncInfo.BCI)
1208 return;
1209 for (const auto &E : FuncInfo.MST.allEdges()) {
1210 if (E->Removed)
1211 continue;
1212 const BasicBlock *SrcBB = E->SrcBB;
1213 const BasicBlock *DestBB = E->DestBB;
1214 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1215 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1216 SrcInfo.addOutEdge(E.get());
1217 DestInfo.addInEdge(E.get());
1218 }
1219}
1220
1221// Visit all the edges and assign the count value for the instrumented
1222// edges and the BB. Return false on error.
1223bool PGOUseFunc::setInstrumentedCounts(
1224 const std::vector<uint64_t> &CountFromProfile) {
1225
1226 std::vector<BasicBlock *> InstrumentBBs;
1227 FuncInfo.getInstrumentBBs(InstrumentBBs);
1228
1229 setupBBInfoEdges(FuncInfo);
1230
1231 unsigned NumCounters =
1232 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1233 // The number of counters here should match the number of counters
1234 // in profile. Return if they mismatch.
1235 if (NumCounters != CountFromProfile.size()) {
1236 return false;
1237 }
1238 auto *FuncEntry = &*F.begin();
1239
1240 // Set the profile count to the Instrumented BBs.
1241 uint32_t I = 0;
1242 for (BasicBlock *InstrBB : InstrumentBBs) {
1243 uint64_t CountValue = CountFromProfile[I++];
1244 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1245 // If we reach here, we know that we have some nonzero count
1246 // values in this function. The entry count should not be 0.
1247 // Fix it if necessary.
1248 if (InstrBB == FuncEntry && CountValue == 0)
1249 CountValue = 1;
1250 Info.setBBInfoCount(CountValue);
1251 }
1252 ProfileCountSize = CountFromProfile.size();
1253 CountPosition = I;
1254
1255 // Set the edge count and update the count of unknown edges for BBs.
1256 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1257 E->setEdgeCount(Value);
1258 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1259 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1260 };
1261
1262 // Set the profile count the Instrumented edges. There are BBs that not in
1263 // MST but not instrumented. Need to set the edge count value so that we can
1264 // populate the profile counts later.
1265 for (const auto &E : FuncInfo.MST.allEdges()) {
1266 if (E->Removed || E->InMST)
1267 continue;
1268 const BasicBlock *SrcBB = E->SrcBB;
1269 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1270
1271 // If only one out-edge, the edge profile count should be the same as BB
1272 // profile count.
1273 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1274 setEdgeCount(E.get(), *SrcInfo.Count);
1275 else {
1276 const BasicBlock *DestBB = E->DestBB;
1277 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1278 // If only one in-edge, the edge profile count should be the same as BB
1279 // profile count.
1280 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1281 setEdgeCount(E.get(), *DestInfo.Count);
1282 }
1283 if (E->Count)
1284 continue;
1285 // E's count should have been set from profile. If not, this meenas E skips
1286 // the instrumentation. We set the count to 0.
1287 setEdgeCount(E.get(), 0);
1288 }
1289 return true;
1290}
1291
1292// Set the count value for the unknown edge. There should be one and only one
1293// unknown edge in Edges vector.
1294void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1295 for (auto &E : Edges) {
1296 if (E->Count)
1297 continue;
1298 E->setEdgeCount(Value);
1299
1300 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1301 getBBInfo(E->DestBB).UnknownCountInEdge--;
1302 return;
1303 }
1304 llvm_unreachable("Cannot find the unknown count edge");
1305}
1306
1307// Emit function metadata indicating PGO profile mismatch.
1309 const char MetadataName[] = "instr_prof_hash_mismatch";
1311 // If this metadata already exists, ignore.
1312 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1313 if (Existing) {
1314 MDTuple *Tuple = cast<MDTuple>(Existing);
1315 for (const auto &N : Tuple->operands()) {
1316 if (N.equalsStr(MetadataName))
1317 return;
1318 Names.push_back(N.get());
1319 }
1320 }
1321
1322 MDBuilder MDB(ctx);
1323 Names.push_back(MDB.createString(MetadataName));
1324 MDNode *MD = MDTuple::get(ctx, Names);
1325 F.setMetadata(LLVMContext::MD_annotation, MD);
1326}
1327
1328void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1329 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1330 auto &Ctx = M->getContext();
1331 auto Err = IPE.get();
1332 bool SkipWarning = false;
1333 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1334 << FuncInfo.FuncName << ": ");
1335 if (Err == instrprof_error::unknown_function) {
1336 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1337 SkipWarning = !PGOWarnMissing;
1338 LLVM_DEBUG(dbgs() << "unknown function");
1339 } else if (Err == instrprof_error::hash_mismatch ||
1340 Err == instrprof_error::malformed) {
1341 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1342 SkipWarning =
1345 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1347 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1348 << " skip=" << SkipWarning << ")");
1349 // Emit function metadata indicating PGO profile mismatch.
1350 annotateFunctionWithHashMismatch(F, M->getContext());
1351 }
1352
1353 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1354 if (SkipWarning)
1355 return;
1356
1357 std::string Msg =
1358 IPE.message() + std::string(" ") + F.getName().str() +
1359 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1360 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1361 std::string(" count discarded");
1362
1363 Ctx.diagnose(
1364 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1365 });
1366}
1367
1368// Read the profile from ProfileFileName and assign the value to the
1369// instrumented BB and the edges. This function also updates ProgramMaxCount.
1370// Return true if the profile are successfully read, and false on errors.
1371bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1373 auto &Ctx = M->getContext();
1374 uint64_t MismatchedFuncSum = 0;
1376 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1377 &MismatchedFuncSum);
1378 if (Error E = Result.takeError()) {
1379 handleInstrProfError(std::move(E), MismatchedFuncSum);
1380 return false;
1381 }
1382 ProfileRecord = std::move(Result.get());
1383 PseudoKind = ProfileRecord.getCountPseudoKind();
1384 if (PseudoKind != InstrProfRecord::NotPseudo) {
1385 return true;
1386 }
1387 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1388
1389 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1390 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1391
1392 uint64_t ValueSum = 0;
1393 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1394 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1395 ValueSum += CountFromProfile[I];
1396 }
1397 AllZeros = (ValueSum == 0);
1398
1399 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1400
1401 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1402 getBBInfo(nullptr).UnknownCountInEdge = 2;
1403
1404 if (!setInstrumentedCounts(CountFromProfile)) {
1405 LLVM_DEBUG(
1406 dbgs() << "Inconsistent number of counts, skipping this function");
1407 Ctx.diagnose(DiagnosticInfoPGOProfile(
1408 M->getName().data(),
1409 Twine("Inconsistent number of counts in ") + F.getName().str() +
1410 Twine(": the profile may be stale or there is a function name "
1411 "collision."),
1412 DS_Warning));
1413 return false;
1414 }
1415 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1416 return true;
1417}
1418
1419void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1420 uint64_t MismatchedFuncSum = 0;
1422 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1423 &MismatchedFuncSum);
1424 if (auto Err = Result.takeError()) {
1425 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1426 return;
1427 }
1428 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1429
1430 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1432 unsigned Index = 0;
1433 for (auto &BB : F)
1434 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1435 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1436 assert(Index == CountsFromProfile.size());
1437
1438 // For each B in InverseDependencies[A], if A is covered then B is covered.
1440 InverseDependencies;
1441 for (auto &BB : F) {
1442 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1443 // If Dep is covered then BB is covered.
1444 InverseDependencies[Dep].insert(&BB);
1445 }
1446 }
1447
1448 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1449 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1450 for (auto &[BB, IsCovered] : Coverage)
1451 if (IsCovered)
1452 CoveredBlocksToProcess.push(BB);
1453
1454 while (!CoveredBlocksToProcess.empty()) {
1455 auto *CoveredBlock = CoveredBlocksToProcess.top();
1456 assert(Coverage[CoveredBlock]);
1457 CoveredBlocksToProcess.pop();
1458 for (auto *BB : InverseDependencies[CoveredBlock]) {
1459 // If CoveredBlock is covered then BB is covered.
1460 if (Coverage[BB])
1461 continue;
1462 Coverage[BB] = true;
1463 CoveredBlocksToProcess.push(BB);
1464 }
1465 }
1466
1467 // Annotate block coverage.
1468 MDBuilder MDB(F.getContext());
1469 // We set the entry count to 10000 if the entry block is covered so that BFI
1470 // can propagate a fraction of this count to the other covered blocks.
1471 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1472 for (auto &BB : F) {
1473 // For a block A and its successor B, we set the edge weight as follows:
1474 // If A is covered and B is covered, set weight=1.
1475 // If A is covered and B is uncovered, set weight=0.
1476 // If A is uncovered, set weight=1.
1477 // This setup will allow BFI to give nonzero profile counts to only covered
1478 // blocks.
1480 for (auto *Succ : successors(&BB))
1481 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1482 if (Weights.size() >= 2)
1483 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1484 /*IsExpected=*/false);
1485 }
1486
1487 unsigned NumCorruptCoverage = 0;
1488 DominatorTree DT(F);
1489 LoopInfo LI(DT);
1490 BranchProbabilityInfo BPI(F, LI);
1491 BlockFrequencyInfo BFI(F, BPI, LI);
1492 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1493 if (auto C = BFI.getBlockProfileCount(&BB))
1494 return C == 0;
1495 return {};
1496 };
1497 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1498 for (auto &BB : F) {
1499 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1500 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1501 << "\n");
1502 // In some cases it is possible to find a covered block that has no covered
1503 // successors, e.g., when a block calls a function that may call exit(). In
1504 // those cases, BFI could find its successor to be covered while BCI could
1505 // find its successor to be dead.
1506 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1507 LLVM_DEBUG(
1508 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1509 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1510 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1511 ++NumCorruptCoverage;
1512 }
1513 if (Coverage[&BB])
1514 ++NumCoveredBlocks;
1515 }
1516 if (PGOVerifyBFI && NumCorruptCoverage) {
1517 auto &Ctx = M->getContext();
1518 Ctx.diagnose(DiagnosticInfoPGOProfile(
1519 M->getName().data(),
1520 Twine("Found inconsistent block coverage for function ") + F.getName() +
1521 " in " + Twine(NumCorruptCoverage) + " blocks.",
1522 DS_Warning));
1523 }
1525 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1526}
1527
1528// Populate the counters from instrumented BBs to all BBs.
1529// In the end of this operation, all BBs should have a valid count value.
1530void PGOUseFunc::populateCounters() {
1531 bool Changes = true;
1532 unsigned NumPasses = 0;
1533 while (Changes) {
1534 NumPasses++;
1535 Changes = false;
1536
1537 // For efficient traversal, it's better to start from the end as most
1538 // of the instrumented edges are at the end.
1539 for (auto &BB : reverse(F)) {
1540 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1541 if (UseBBInfo == nullptr)
1542 continue;
1543 if (!UseBBInfo->Count) {
1544 if (UseBBInfo->UnknownCountOutEdge == 0) {
1545 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1546 Changes = true;
1547 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1548 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1549 Changes = true;
1550 }
1551 }
1552 if (UseBBInfo->Count) {
1553 if (UseBBInfo->UnknownCountOutEdge == 1) {
1554 uint64_t Total = 0;
1555 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1556 // If the one of the successor block can early terminate (no-return),
1557 // we can end up with situation where out edge sum count is larger as
1558 // the source BB's count is collected by a post-dominated block.
1559 if (*UseBBInfo->Count > OutSum)
1560 Total = *UseBBInfo->Count - OutSum;
1561 setEdgeCount(UseBBInfo->OutEdges, Total);
1562 Changes = true;
1563 }
1564 if (UseBBInfo->UnknownCountInEdge == 1) {
1565 uint64_t Total = 0;
1566 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1567 if (*UseBBInfo->Count > InSum)
1568 Total = *UseBBInfo->Count - InSum;
1569 setEdgeCount(UseBBInfo->InEdges, Total);
1570 Changes = true;
1571 }
1572 }
1573 }
1574 }
1575
1576 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1577 (void)NumPasses;
1578#ifndef NDEBUG
1579 // Assert every BB has a valid counter.
1580 for (auto &BB : F) {
1581 auto BI = findBBInfo(&BB);
1582 if (BI == nullptr)
1583 continue;
1584 assert(BI->Count && "BB count is not valid");
1585 }
1586#endif
1587 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1588 uint64_t FuncMaxCount = FuncEntryCount;
1589 for (auto &BB : F) {
1590 auto BI = findBBInfo(&BB);
1591 if (BI == nullptr)
1592 continue;
1593 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1594 }
1595
1596 // Fix the obviously inconsistent entry count.
1597 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1598 FuncEntryCount = 1;
1600 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1601
1602 // Now annotate select instructions
1603 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1604 assert(CountPosition == ProfileCountSize);
1605
1606 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1607}
1608
1609// Assign the scaled count values to the BB with multiple out edges.
1610void PGOUseFunc::setBranchWeights() {
1611 // Generate MD_prof metadata for every branch instruction.
1612 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1613 << " IsCS=" << IsCS << "\n");
1614 for (auto &BB : F) {
1615 Instruction *TI = BB.getTerminator();
1616 if (TI->getNumSuccessors() < 2)
1617 continue;
1618 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1619 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1620 isa<CallBrInst>(TI)))
1621 continue;
1622
1623 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1624 if (!*BBCountInfo.Count)
1625 continue;
1626
1627 // We have a non-zero Branch BB.
1628 unsigned Size = BBCountInfo.OutEdges.size();
1629 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1630 uint64_t MaxCount = 0;
1631 for (unsigned s = 0; s < Size; s++) {
1632 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1633 const BasicBlock *SrcBB = E->SrcBB;
1634 const BasicBlock *DestBB = E->DestBB;
1635 if (DestBB == nullptr)
1636 continue;
1637 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1638 uint64_t EdgeCount = *E->Count;
1639 if (EdgeCount > MaxCount)
1640 MaxCount = EdgeCount;
1641 EdgeCounts[SuccNum] = EdgeCount;
1642 }
1643
1644 if (MaxCount)
1645 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1646 else {
1647 // A zero MaxCount can come about when we have a BB with a positive
1648 // count, and whose successor blocks all have 0 count. This can happen
1649 // when there is no exit block and the code exits via a noreturn function.
1650 auto &Ctx = M->getContext();
1651 Ctx.diagnose(DiagnosticInfoPGOProfile(
1652 M->getName().data(),
1653 Twine("Profile in ") + F.getName().str() +
1654 Twine(" partially ignored") +
1655 Twine(", possibly due to the lack of a return path."),
1656 DS_Warning));
1657 }
1658 }
1659}
1660
1662 for (BasicBlock *Pred : predecessors(BB)) {
1663 if (isa<IndirectBrInst>(Pred->getTerminator()))
1664 return true;
1665 }
1666 return false;
1667}
1668
1669void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1670 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1671 // Find irr loop headers
1672 for (auto &BB : F) {
1673 // As a heuristic also annotate indrectbr targets as they have a high chance
1674 // to become an irreducible loop header after the indirectbr tail
1675 // duplication.
1676 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1677 Instruction *TI = BB.getTerminator();
1678 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1679 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1680 }
1681 }
1682}
1683
1684void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1685 Module *M = F.getParent();
1686 IRBuilder<> Builder(&SI);
1687 Type *Int64Ty = Builder.getInt64Ty();
1688 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1689 Builder.CreateCall(
1690 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1691 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1692 Builder.getInt32(*CurCtrIdx), Step});
1693 ++(*CurCtrIdx);
1694}
1695
1696void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1697 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1698 assert(*CurCtrIdx < CountFromProfile.size() &&
1699 "Out of bound access of counters");
1700 uint64_t SCounts[2];
1701 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1702 ++(*CurCtrIdx);
1703 uint64_t TotalCount = 0;
1704 auto BI = UseFunc->findBBInfo(SI.getParent());
1705 if (BI != nullptr)
1706 TotalCount = *BI->Count;
1707 // False Count
1708 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1709 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1710 if (MaxCount)
1711 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1712}
1713
1714void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1715 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1716 return;
1717 // FIXME: do not handle this yet.
1718 if (SI.getCondition()->getType()->isVectorTy())
1719 return;
1720
1721 switch (Mode) {
1722 case VM_counting:
1723 NSIs++;
1724 return;
1725 case VM_instrument:
1726 instrumentOneSelectInst(SI);
1727 return;
1728 case VM_annotate:
1729 annotateOneSelectInst(SI);
1730 return;
1731 }
1732
1733 llvm_unreachable("Unknown visiting mode");
1734}
1735
1737 if (ValueProfKind == IPVK_MemOPSize)
1739 if (ValueProfKind == llvm::IPVK_VTableTarget)
1741 return MaxNumAnnotations;
1742}
1743
1744// Traverse all valuesites and annotate the instructions for all value kind.
1745void PGOUseFunc::annotateValueSites() {
1747 return;
1748
1749 // Create the PGOFuncName meta data.
1750 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1751
1752 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1753 annotateValueSites(Kind);
1754}
1755
1756// Annotate the instructions for a specific value kind.
1757void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1758 assert(Kind <= IPVK_Last);
1759 unsigned ValueSiteIndex = 0;
1760
1761 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1762
1763 // Since there isn't a reliable or fast way for profile reader to tell if a
1764 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1765 // value profile collector over the function IR to find the instrumented sites
1766 // iff function profile records shows the number of instrumented vtable sites
1767 // is not zero. Function cfg already takes the number of instrumented
1768 // indirect call sites into account so it doesn't hash the number of
1769 // instrumented vtables; as a side effect it makes it easier to enable
1770 // profiling and profile use in two steps if needed.
1771 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1772 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1773 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1775 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1776 auto &ValueSites = FuncInfo.ValueSites[Kind];
1777 if (NumValueSites != ValueSites.size()) {
1778 auto &Ctx = M->getContext();
1779 Ctx.diagnose(DiagnosticInfoPGOProfile(
1780 M->getName().data(),
1781 Twine("Inconsistent number of value sites for ") +
1782 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1783 F.getName().str() +
1784 Twine("\", possibly due to the use of a stale profile."),
1785 DS_Warning));
1786 return;
1787 }
1788
1789 for (VPCandidateInfo &I : ValueSites) {
1790 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1791 << "): Index = " << ValueSiteIndex << " out of "
1792 << NumValueSites << "\n");
1794 *M, *I.AnnotatedInst, ProfileRecord,
1795 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1796 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1797 ValueSiteIndex++;
1798 }
1799}
1800
1801// Collect the set of members for each Comdat in module M and store
1802// in ComdatMembers.
1804 Module &M,
1805 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1806 if (!DoComdatRenaming)
1807 return;
1808 for (Function &F : M)
1809 if (Comdat *C = F.getComdat())
1810 ComdatMembers.insert(std::make_pair(C, &F));
1811 for (GlobalVariable &GV : M.globals())
1812 if (Comdat *C = GV.getComdat())
1813 ComdatMembers.insert(std::make_pair(C, &GV));
1814 for (GlobalAlias &GA : M.aliases())
1815 if (Comdat *C = GA.getComdat())
1816 ComdatMembers.insert(std::make_pair(C, &GA));
1817}
1818
1819// Return true if we should not find instrumentation data for this function
1820static bool skipPGOUse(const Function &F) {
1821 if (F.isDeclaration())
1822 return true;
1823 // If there are too many critical edges, PGO might cause
1824 // compiler time problem. Skip PGO if the number of
1825 // critical edges execeed the threshold.
1826 unsigned NumCriticalEdges = 0;
1827 for (auto &BB : F) {
1828 const Instruction *TI = BB.getTerminator();
1829 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1830 if (isCriticalEdge(TI, I))
1831 NumCriticalEdges++;
1832 }
1833 }
1834 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1835 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1836 << ", NumCriticalEdges=" << NumCriticalEdges
1837 << " exceed the threshold. Skip PGO.\n");
1838 return true;
1839 }
1840 return false;
1841}
1842
1843// Return true if we should not instrument this function
1844static bool skipPGOGen(const Function &F) {
1845 if (skipPGOUse(F))
1846 return true;
1847 if (F.hasFnAttribute(llvm::Attribute::Naked))
1848 return true;
1849 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1850 return true;
1851 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1852 return true;
1853 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1854 return true;
1855 return false;
1856}
1857
1859 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1861 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1862 // For the context-sensitve instrumentation, we should have a separated pass
1863 // (before LTO/ThinLTO linking) to create these variables.
1865 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1866
1867 Triple TT(M.getTargetTriple());
1868 LLVMContext &Ctx = M.getContext();
1869 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1871 M.getName().data(),
1872 Twine("VTable value profiling is presently not "
1873 "supported for non-ELF object formats"),
1874 DS_Warning));
1875 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1876 collectComdatMembers(M, ComdatMembers);
1877
1878 for (auto &F : M) {
1879 if (skipPGOGen(F))
1880 continue;
1881 auto &TLI = LookupTLI(F);
1882 auto *BPI = LookupBPI(F);
1883 auto *BFI = LookupBFI(F);
1884 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1885 }
1886 return true;
1887}
1888
1891 createProfileFileNameVar(M, CSInstrName);
1892 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1893 // will be retained.
1895 if (ProfileSampling)
1900 return PA;
1901}
1902
1905 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1906 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1908 };
1909 auto LookupBPI = [&FAM](Function &F) {
1911 };
1912 auto LookupBFI = [&FAM](Function &F) {
1914 };
1915
1916 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1917 return PreservedAnalyses::all();
1918
1919 return PreservedAnalyses::none();
1920}
1921
1922// Using the ratio b/w sums of profile count values and BFI count values to
1923// adjust the func entry count.
1924static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1925 BranchProbabilityInfo &NBPI) {
1926 Function &F = Func.getFunc();
1927 BlockFrequencyInfo NBFI(F, NBPI, LI);
1928#ifndef NDEBUG
1929 auto BFIEntryCount = F.getEntryCount();
1930 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1931 "Invalid BFI Entrycount");
1932#endif
1933 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1934 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1935 for (auto &BBI : F) {
1936 uint64_t CountValue = 0;
1937 uint64_t BFICountValue = 0;
1938 if (!Func.findBBInfo(&BBI))
1939 continue;
1940 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1941 CountValue = *Func.getBBInfo(&BBI).Count;
1942 BFICountValue = *BFICount;
1943 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1944 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1945 }
1946 if (SumCount.isZero())
1947 return;
1948
1949 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1950 "Incorrect sum of BFI counts");
1951 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1952 return;
1953 double Scale = (SumCount / SumBFICount).convertToDouble();
1954 if (Scale < 1.001 && Scale > 0.999)
1955 return;
1956
1957 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1958 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1959 if (NewEntryCount == 0)
1960 NewEntryCount = 1;
1961 if (NewEntryCount != FuncEntryCount) {
1962 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1963 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1964 << ", entry_count " << FuncEntryCount << " --> "
1965 << NewEntryCount << "\n");
1966 }
1967}
1968
1969// Compare the profile count values with BFI count values, and print out
1970// the non-matching ones.
1971static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1973 uint64_t HotCountThreshold,
1975 Function &F = Func.getFunc();
1976 BlockFrequencyInfo NBFI(F, NBPI, LI);
1977 // bool PrintFunc = false;
1978 bool HotBBOnly = PGOVerifyHotBFI;
1979 StringRef Msg;
1981
1982 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1983 for (auto &BBI : F) {
1984 uint64_t CountValue = 0;
1985 uint64_t BFICountValue = 0;
1986
1987 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1988
1989 BBNum++;
1990 if (CountValue)
1991 NonZeroBBNum++;
1992 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1993 if (BFICount)
1994 BFICountValue = *BFICount;
1995
1996 if (HotBBOnly) {
1997 bool rawIsHot = CountValue >= HotCountThreshold;
1998 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1999 bool rawIsCold = CountValue <= ColdCountThreshold;
2000 bool ShowCount = false;
2001 if (rawIsHot && !BFIIsHot) {
2002 Msg = "raw-Hot to BFI-nonHot";
2003 ShowCount = true;
2004 } else if (rawIsCold && BFIIsHot) {
2005 Msg = "raw-Cold to BFI-Hot";
2006 ShowCount = true;
2007 }
2008 if (!ShowCount)
2009 continue;
2010 } else {
2011 if ((CountValue < PGOVerifyBFICutoff) &&
2012 (BFICountValue < PGOVerifyBFICutoff))
2013 continue;
2014 uint64_t Diff = (BFICountValue >= CountValue)
2015 ? BFICountValue - CountValue
2016 : CountValue - BFICountValue;
2017 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2018 continue;
2019 }
2020 BBMisMatchNum++;
2021
2022 ORE.emit([&]() {
2024 F.getSubprogram(), &BBI);
2025 Remark << "BB " << ore::NV("Block", BBI.getName())
2026 << " Count=" << ore::NV("Count", CountValue)
2027 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2028 if (!Msg.empty())
2029 Remark << " (" << Msg << ")";
2030 return Remark;
2031 });
2032 }
2033 if (BBMisMatchNum)
2034 ORE.emit([&]() {
2035 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2036 F.getSubprogram(), &F.getEntryBlock())
2037 << "In Func " << ore::NV("Function", F.getName())
2038 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2039 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2040 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2041 });
2042}
2043
2045 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2046 vfs::FileSystem &FS,
2047 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2050 ProfileSummaryInfo *PSI, bool IsCS) {
2051 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2052 auto &Ctx = M.getContext();
2053 // Read the counter array from file.
2054 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2055 ProfileRemappingFileName);
2056 if (Error E = ReaderOrErr.takeError()) {
2057 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2058 Ctx.diagnose(
2059 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2060 });
2061 return false;
2062 }
2063
2064 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2065 std::move(ReaderOrErr.get());
2066 if (!PGOReader) {
2067 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2068 StringRef("Cannot get PGOReader")));
2069 return false;
2070 }
2071 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2072 return false;
2073
2074 // TODO: might need to change the warning once the clang option is finalized.
2075 if (!PGOReader->isIRLevelProfile()) {
2076 Ctx.diagnose(DiagnosticInfoPGOProfile(
2077 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2078 return false;
2079 }
2080 if (PGOReader->functionEntryOnly()) {
2081 Ctx.diagnose(DiagnosticInfoPGOProfile(
2082 ProfileFileName.data(),
2083 "Function entry profiles are not yet supported for optimization"));
2084 return false;
2085 }
2086
2088 for (GlobalVariable &G : M.globals()) {
2089 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2090 continue;
2091
2092 // Create the PGOFuncName meta data.
2093 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2094 }
2095 }
2096
2097 // Add the profile summary (read from the header of the indexed summary) here
2098 // so that we can use it below when reading counters (which checks if the
2099 // function should be marked with a cold or inlinehint attribute).
2100 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2103 PSI->refresh();
2104
2105 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2106 collectComdatMembers(M, ComdatMembers);
2107 std::vector<Function *> HotFunctions;
2108 std::vector<Function *> ColdFunctions;
2109
2110 // If the profile marked as always instrument the entry BB, do the
2111 // same. Note this can be overwritten by the internal option in CFGMST.h
2112 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2113 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2114 InstrumentFuncEntry = PGOInstrumentEntry;
2115 InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2116
2117 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2118 for (auto &F : M) {
2119 if (skipPGOUse(F))
2120 continue;
2121 auto &TLI = LookupTLI(F);
2122 auto *BPI = LookupBPI(F);
2123 auto *BFI = LookupBFI(F);
2124 if (!HasSingleByteCoverage) {
2125 // Split indirectbr critical edges here before computing the MST rather
2126 // than later in getInstrBB() to avoid invalidating it.
2127 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2128 BFI);
2129 }
2130 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2131 InstrumentFuncEntry, HasSingleByteCoverage);
2132 if (HasSingleByteCoverage) {
2133 Func.populateCoverage(PGOReader.get());
2134 continue;
2135 }
2136 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2137 // it means the profile for the function is unrepresentative and this
2138 // function is actually hot / warm. We will reset the function hot / cold
2139 // attribute and drop all the profile counters.
2141 bool AllZeros = false;
2142 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2143 continue;
2144 if (AllZeros) {
2145 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2146 if (Func.getProgramMaxCount() != 0)
2147 ColdFunctions.push_back(&F);
2148 continue;
2149 }
2150 if (PseudoKind != InstrProfRecord::NotPseudo) {
2151 // Clear function attribute cold.
2152 if (F.hasFnAttribute(Attribute::Cold))
2153 F.removeFnAttr(Attribute::Cold);
2154 // Set function attribute as hot.
2155 if (PseudoKind == InstrProfRecord::PseudoHot)
2156 F.addFnAttr(Attribute::Hot);
2157 continue;
2158 }
2159 Func.populateCounters();
2160 Func.setBranchWeights();
2161 Func.annotateValueSites();
2162 Func.annotateIrrLoopHeaderWeights();
2163 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2164 if (FreqAttr == PGOUseFunc::FFA_Cold)
2165 ColdFunctions.push_back(&F);
2166 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2167 HotFunctions.push_back(&F);
2168 if (PGOViewCounts != PGOVCT_None &&
2169 (ViewBlockFreqFuncName.empty() ||
2170 F.getName() == ViewBlockFreqFuncName)) {
2172 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2173 std::make_unique<BranchProbabilityInfo>(F, LI);
2174 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2175 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2177 NewBFI->view();
2178 else if (PGOViewCounts == PGOVCT_Text) {
2179 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2180 NewBFI->print(dbgs());
2181 }
2182 }
2184 (ViewBlockFreqFuncName.empty() ||
2185 F.getName() == ViewBlockFreqFuncName)) {
2187 if (ViewBlockFreqFuncName.empty())
2188 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2189 else
2190 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2191 else if (PGOViewRawCounts == PGOVCT_Text) {
2192 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2193 Func.dumpInfo();
2194 }
2195 }
2196
2199 BranchProbabilityInfo NBPI(F, LI);
2200
2201 // Fix func entry count.
2202 if (PGOFixEntryCount)
2203 fixFuncEntryCount(Func, LI, NBPI);
2204
2205 // Verify BlockFrequency information.
2206 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2207 if (PGOVerifyHotBFI) {
2208 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2210 }
2211 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2212 }
2213 }
2214
2215 // Set function hotness attribute from the profile.
2216 // We have to apply these attributes at the end because their presence
2217 // can affect the BranchProbabilityInfo of any callers, resulting in an
2218 // inconsistent MST between prof-gen and prof-use.
2219 for (auto &F : HotFunctions) {
2220 F->addFnAttr(Attribute::InlineHint);
2221 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2222 << "\n");
2223 }
2224 for (auto &F : ColdFunctions) {
2225 // Only set when there is no Attribute::Hot set by the user. For Hot
2226 // attribute, user's annotation has the precedence over the profile.
2227 if (F->hasFnAttribute(Attribute::Hot)) {
2228 auto &Ctx = M.getContext();
2229 std::string Msg = std::string("Function ") + F->getName().str() +
2230 std::string(" is annotated as a hot function but"
2231 " the profile is cold");
2232 Ctx.diagnose(
2233 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2234 continue;
2235 }
2236 F->addFnAttr(Attribute::Cold);
2237 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2238 << "\n");
2239 }
2240 return true;
2241}
2242
2244 std::string Filename, std::string RemappingFilename, bool IsCS,
2246 : ProfileFileName(std::move(Filename)),
2247 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2248 FS(std::move(VFS)) {
2249 if (!PGOTestProfileFile.empty())
2250 ProfileFileName = PGOTestProfileFile;
2251 if (!PGOTestProfileRemappingFile.empty())
2252 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2253 if (!FS)
2255}
2256
2259
2260 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2261 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2263 };
2264 auto LookupBPI = [&FAM](Function &F) {
2266 };
2267 auto LookupBFI = [&FAM](Function &F) {
2269 };
2270
2271 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2272 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2273 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2274 return PreservedAnalyses::all();
2275
2276 return PreservedAnalyses::none();
2277}
2278
2279static std::string getSimpleNodeName(const BasicBlock *Node) {
2280 if (!Node->getName().empty())
2281 return Node->getName().str();
2282
2283 std::string SimpleNodeName;
2284 raw_string_ostream OS(SimpleNodeName);
2285 Node->printAsOperand(OS, false);
2286 return SimpleNodeName;
2287}
2288
2290 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2291 assert(MaxCount > 0 && "Bad max count");
2292 uint64_t Scale = calculateCountScale(MaxCount);
2294 for (const auto &ECI : EdgeCounts)
2295 Weights.push_back(scaleBranchCount(ECI, Scale));
2296
2297 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2298 : Weights) {
2299 dbgs() << W << " ";
2300 } dbgs() << "\n";);
2301
2302 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2303
2304 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2306 std::string BrCondStr = getBranchCondString(TI);
2307 if (BrCondStr.empty())
2308 return;
2309
2310 uint64_t WSum =
2311 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2312 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2313 uint64_t TotalCount =
2314 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2315 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2316 Scale = calculateCountScale(WSum);
2317 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2318 scaleBranchCount(WSum, Scale));
2319 std::string BranchProbStr;
2320 raw_string_ostream OS(BranchProbStr);
2321 OS << BP;
2322 OS << " (total count : " << TotalCount << ")";
2323 OS.flush();
2324 Function *F = TI->getParent()->getParent();
2326 ORE.emit([&]() {
2327 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2328 << BrCondStr << " is true with probability : " << BranchProbStr;
2329 });
2330 }
2331}
2332
2333namespace llvm {
2334
2336 MDBuilder MDB(M->getContext());
2337 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2338 MDB.createIrrLoopHeaderWeight(Count));
2339}
2340
2341template <> struct GraphTraits<PGOUseFunc *> {
2342 using NodeRef = const BasicBlock *;
2345
2346 static NodeRef getEntryNode(const PGOUseFunc *G) {
2347 return &G->getFunc().front();
2348 }
2349
2351 return succ_begin(N);
2352 }
2353
2354 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2355
2356 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2357 return nodes_iterator(G->getFunc().begin());
2358 }
2359
2360 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2361 return nodes_iterator(G->getFunc().end());
2362 }
2363};
2364
2365template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2366 explicit DOTGraphTraits(bool isSimple = false)
2368
2369 static std::string getGraphName(const PGOUseFunc *G) {
2370 return std::string(G->getFunc().getName());
2371 }
2372
2373 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2374 std::string Result;
2375 raw_string_ostream OS(Result);
2376
2377 OS << getSimpleNodeName(Node) << ":\\l";
2378 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2379 OS << "Count : ";
2380 if (BI && BI->Count)
2381 OS << *BI->Count << "\\l";
2382 else
2383 OS << "Unknown\\l";
2384
2385 if (!PGOInstrSelect)
2386 return Result;
2387
2388 for (const Instruction &I : *Node) {
2389 if (!isa<SelectInst>(&I))
2390 continue;
2391 // Display scaled counts for SELECT instruction:
2392 OS << "SELECT : { T = ";
2393 uint64_t TC, FC;
2394 bool HasProf = extractBranchWeights(I, TC, FC);
2395 if (!HasProf)
2396 OS << "Unknown, F = Unknown }\\l";
2397 else
2398 OS << TC << ", F = " << FC << " }\\l";
2399 }
2400 return Result;
2401 }
2402};
2403
2404} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:988
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:289
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:544
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2044
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:488
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:409
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:204
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1408
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1412
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:467
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1282
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:267
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1464
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1487
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:823
std::vector< uint64_t > Counts
Definition: InstrProf.h:824
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:921
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1023
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1004