LLVM 19.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <string>
123#include <unordered_map>
124#include <utility>
125#include <vector>
126
127using namespace llvm;
130
131#define DEBUG_TYPE "pgo-instrumentation"
132
133STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
134STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
135STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
136STATISTIC(NumOfPGOEdge, "Number of edges.");
137STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
138STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
139STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
140STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
141STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
142STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
143STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
144STATISTIC(NumOfCSPGOSelectInsts,
145 "Number of select instruction instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOMemIntrinsics,
147 "Number of mem intrinsics instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
149STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
150STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
151STATISTIC(NumOfCSPGOFunc,
152 "Number of functions having valid profile counts in CSPGO.");
153STATISTIC(NumOfCSPGOMismatch,
154 "Number of functions having mismatch profile in CSPGO.");
155STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
156STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
157
158// Command line option to specify the file to read profile from. This is
159// mainly used for testing.
161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
162 cl::value_desc("filename"),
163 cl::desc("Specify the path of profile data file. This is"
164 "mainly for test purpose."));
166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
167 cl::value_desc("filename"),
168 cl::desc("Specify the path of profile remapping file. This is mainly for "
169 "test purpose."));
170
171// Command line option to disable value profiling. The default is false:
172// i.e. value profiling is enabled by default. This is for debug purpose.
173static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
175 cl::desc("Disable Value Profiling"));
176
177// Command line option to set the maximum number of VP annotations to write to
178// the metadata for a single indirect call callsite.
180 "icp-max-annotations", cl::init(3), cl::Hidden,
181 cl::desc("Max number of annotations for a single indirect "
182 "call callsite"));
183
184// Command line option to set the maximum number of value annotations
185// to write to the metadata for a single memop intrinsic.
187 "memop-max-annotations", cl::init(4), cl::Hidden,
188 cl::desc("Max number of preicise value annotations for a single memop"
189 "intrinsic"));
190
191// Command line option to control appending FunctionHash to the name of a COMDAT
192// function. This is to avoid the hash mismatch caused by the preinliner.
194 "do-comdat-renaming", cl::init(false), cl::Hidden,
195 cl::desc("Append function hash to the name of COMDAT function to avoid "
196 "function hash mismatch due to the preinliner"));
197
198namespace llvm {
199// Command line option to enable/disable the warning about missing profile
200// information.
201cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
203 cl::desc("Use this option to turn on/off "
204 "warnings about missing profile data for "
205 "functions."));
206
207// Command line option to enable/disable the warning about a hash mismatch in
208// the profile data.
210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
211 cl::desc("Use this option to turn off/on "
212 "warnings about profile cfg mismatch."));
213
214// Command line option to enable/disable the warning about a hash mismatch in
215// the profile data for Comdat functions, which often turns out to be false
216// positive due to the pre-instrumentation inline.
218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
219 cl::desc("The option is used to turn on/off "
220 "warnings about hash mismatch for comdat "
221 "or weak functions."));
222} // namespace llvm
223
224// Command line option to enable/disable select instruction instrumentation.
225static cl::opt<bool>
226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
227 cl::desc("Use this option to turn on/off SELECT "
228 "instruction instrumentation. "));
229
230// Command line option to turn on CFG dot or text dump of raw profile counts
232 "pgo-view-raw-counts", cl::Hidden,
233 cl::desc("A boolean option to show CFG dag or text "
234 "with raw profile counts from "
235 "profile data. See also option "
236 "-pgo-view-counts. To limit graph "
237 "display to only one function, use "
238 "filtering option -view-bfi-func-name."),
239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
241 clEnumValN(PGOVCT_Text, "text", "show in text.")));
242
243// Command line option to enable/disable memop intrinsic call.size profiling.
244static cl::opt<bool>
245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
246 cl::desc("Use this option to turn on/off "
247 "memory intrinsic size profiling."));
248
249// Emit branch probability as optimization remarks.
250static cl::opt<bool>
251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
252 cl::desc("When this option is on, the annotated "
253 "branch probability will be emitted as "
254 "optimization remarks: -{Rpass|"
255 "pass-remarks}=pgo-instrumentation"));
256
258 "pgo-instrument-entry", cl::init(false), cl::Hidden,
259 cl::desc("Force to instrument function entry basicblock."));
260
262 "pgo-function-entry-coverage", cl::Hidden,
263 cl::desc(
264 "Use this option to enable function entry coverage instrumentation."));
265
267 "pgo-block-coverage",
268 cl::desc("Use this option to enable basic block coverage instrumentation"));
269
270static cl::opt<bool>
271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
272 cl::desc("Create a dot file of CFGs with block "
273 "coverage inference information"));
274
276 "pgo-temporal-instrumentation",
277 cl::desc("Use this option to enable temporal instrumentation"));
278
279static cl::opt<bool>
280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
281 cl::desc("Fix function entry count in profile use."));
282
284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
285 cl::desc("Print out the non-match BFI count if a hot raw profile count "
286 "becomes non-hot, or a cold raw profile count becomes hot. "
287 "The print is enabled under -Rpass-analysis=pgo, or "
288 "internal option -pass-remakrs-analysis=pgo."));
289
291 "pgo-verify-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out mismatched BFI counts after setting profile metadata "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remakrs-analysis=pgo."));
295
297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
298 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
299 "mismatched BFI if the difference percentage is greater than "
300 "this value (in percentage)."));
301
303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
305 "profile count value is below."));
306
308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
309 cl::value_desc("function name"),
310 cl::desc("Trace the hash of the function with this name."));
311
313 "pgo-function-size-threshold", cl::Hidden,
314 cl::desc("Do not instrument functions smaller than this threshold."));
315
317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
318 cl::desc("Do not instrument functions with the number of critical edges "
319 " greater than this threshold."));
320
321namespace llvm {
322// Command line option to turn on CFG dot dump after profile annotation.
323// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
325
326// Command line option to specify the name of the function for CFG dump
327// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
329
330// Command line option to enable vtable value profiling. Defined in
331// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
334} // namespace llvm
335
336// Return a string describing the branch condition that can be
337// used in static branch probability heuristics:
338static std::string getBranchCondString(Instruction *TI) {
339 BranchInst *BI = dyn_cast<BranchInst>(TI);
340 if (!BI || !BI->isConditional())
341 return std::string();
342
343 Value *Cond = BI->getCondition();
344 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
345 if (!CI)
346 return std::string();
347
348 std::string result;
349 raw_string_ostream OS(result);
350 OS << CI->getPredicate() << "_";
351 CI->getOperand(0)->getType()->print(OS, true);
352
353 Value *RHS = CI->getOperand(1);
354 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
355 if (CV) {
356 if (CV->isZero())
357 OS << "_Zero";
358 else if (CV->isOne())
359 OS << "_One";
360 else if (CV->isMinusOne())
361 OS << "_MinusOne";
362 else
363 OS << "_Const";
364 }
365 OS.flush();
366 return result;
367}
368
369static const char *ValueProfKindDescr[] = {
370#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
372};
373
374// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
375// aware this is an ir_level profile so it can set the version flag.
377 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
378 Type *IntTy64 = Type::getInt64Ty(M.getContext());
379 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
380 if (IsCS)
381 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
383 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
385 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
387 ProfileVersion |=
388 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
390 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
392 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
393 auto IRLevelVersionVariable = new GlobalVariable(
394 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
395 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
396 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
397 Triple TT(M.getTargetTriple());
398 if (TT.supportsCOMDAT()) {
399 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
400 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
401 }
402 return IRLevelVersionVariable;
403}
404
405namespace {
406
407/// The select instruction visitor plays three roles specified
408/// by the mode. In \c VM_counting mode, it simply counts the number of
409/// select instructions. In \c VM_instrument mode, it inserts code to count
410/// the number times TrueValue of select is taken. In \c VM_annotate mode,
411/// it reads the profile data and annotate the select instruction with metadata.
412enum VisitMode { VM_counting, VM_instrument, VM_annotate };
413class PGOUseFunc;
414
415/// Instruction Visitor class to visit select instructions.
416struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
417 Function &F;
418 unsigned NSIs = 0; // Number of select instructions instrumented.
419 VisitMode Mode = VM_counting; // Visiting mode.
420 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
421 unsigned TotalNumCtrs = 0; // Total number of counters
422 GlobalVariable *FuncNameVar = nullptr;
423 uint64_t FuncHash = 0;
424 PGOUseFunc *UseFunc = nullptr;
425 bool HasSingleByteCoverage;
426
427 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
428 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
429
430 void countSelects() {
431 NSIs = 0;
432 Mode = VM_counting;
433 visit(F);
434 }
435
436 // Visit the IR stream and instrument all select instructions. \p
437 // Ind is a pointer to the counter index variable; \p TotalNC
438 // is the total number of counters; \p FNV is the pointer to the
439 // PGO function name var; \p FHash is the function hash.
440 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
441 uint64_t FHash) {
442 Mode = VM_instrument;
443 CurCtrIdx = Ind;
444 TotalNumCtrs = TotalNC;
445 FuncHash = FHash;
446 FuncNameVar = FNV;
447 visit(F);
448 }
449
450 // Visit the IR stream and annotate all select instructions.
451 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
452 Mode = VM_annotate;
453 UseFunc = UF;
454 CurCtrIdx = Ind;
455 visit(F);
456 }
457
458 void instrumentOneSelectInst(SelectInst &SI);
459 void annotateOneSelectInst(SelectInst &SI);
460
461 // Visit \p SI instruction and perform tasks according to visit mode.
462 void visitSelectInst(SelectInst &SI);
463
464 // Return the number of select instructions. This needs be called after
465 // countSelects().
466 unsigned getNumOfSelectInsts() const { return NSIs; }
467};
468
469/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
470/// based instrumentation.
471/// Note that the CFG can be a multi-graph. So there might be multiple edges
472/// with the same SrcBB and DestBB.
473struct PGOEdge {
474 BasicBlock *SrcBB;
475 BasicBlock *DestBB;
476 uint64_t Weight;
477 bool InMST = false;
478 bool Removed = false;
479 bool IsCritical = false;
480
481 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
482 : SrcBB(Src), DestBB(Dest), Weight(W) {}
483
484 /// Return the information string of an edge.
485 std::string infoString() const {
486 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
487 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
488 .str();
489 }
490};
491
492/// This class stores the auxiliary information for each BB in the MST.
493struct PGOBBInfo {
494 PGOBBInfo *Group;
496 uint32_t Rank = 0;
497
498 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
499
500 /// Return the information string of this object.
501 std::string infoString() const {
502 return (Twine("Index=") + Twine(Index)).str();
503 }
504};
505
506// This class implements the CFG edges. Note the CFG can be a multi-graph.
507template <class Edge, class BBInfo> class FuncPGOInstrumentation {
508private:
509 Function &F;
510
511 // Is this is context-sensitive instrumentation.
512 bool IsCS;
513
514 // A map that stores the Comdat group in function F.
515 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
516
518
519 void computeCFGHash();
520 void renameComdatFunction();
521
522public:
523 const TargetLibraryInfo &TLI;
524 std::vector<std::vector<VPCandidateInfo>> ValueSites;
525 SelectInstVisitor SIVisitor;
526 std::string FuncName;
527 std::string DeprecatedFuncName;
528 GlobalVariable *FuncNameVar;
529
530 // CFG hash value for this function.
531 uint64_t FunctionHash = 0;
532
533 // The Minimum Spanning Tree of function CFG.
535
536 const std::optional<BlockCoverageInference> BCI;
537
538 static std::optional<BlockCoverageInference>
539 constructBCI(Function &Func, bool HasSingleByteCoverage,
540 bool InstrumentFuncEntry) {
541 if (HasSingleByteCoverage)
542 return BlockCoverageInference(Func, InstrumentFuncEntry);
543 return {};
544 }
545
546 // Collect all the BBs that will be instrumented, and store them in
547 // InstrumentBBs.
548 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
549
550 // Give an edge, find the BB that will be instrumented.
551 // Return nullptr if there is no BB to be instrumented.
552 BasicBlock *getInstrBB(Edge *E);
553
554 // Return the auxiliary BB information.
555 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
556
557 // Return the auxiliary BB information if available.
558 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
559
560 // Dump edges and BB information.
561 void dumpInfo(StringRef Str = "") const {
562 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
563 " Hash: " + Twine(FunctionHash) + "\t" + Str);
564 }
565
566 FuncPGOInstrumentation(
567 Function &Func, TargetLibraryInfo &TLI,
568 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
569 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
570 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
571 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
572 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
573 TLI(TLI), ValueSites(IPVK_Last + 1),
574 SIVisitor(Func, HasSingleByteCoverage),
575 MST(F, InstrumentFuncEntry, BPI, BFI),
576 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
577 if (BCI && PGOViewBlockCoverageGraph)
578 BCI->viewBlockCoverageGraph();
579 // This should be done before CFG hash computation.
580 SIVisitor.countSelects();
581 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
582 if (!IsCS) {
583 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
584 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
585 NumOfPGOBB += MST.bbInfoSize();
586 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
588 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
589 } else {
590 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
591 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
592 NumOfCSPGOBB += MST.bbInfoSize();
593 }
594
595 FuncName = getIRPGOFuncName(F);
596 DeprecatedFuncName = getPGOFuncName(F);
597 computeCFGHash();
598 if (!ComdatMembers.empty())
599 renameComdatFunction();
600 LLVM_DEBUG(dumpInfo("after CFGMST"));
601
602 for (const auto &E : MST.allEdges()) {
603 if (E->Removed)
604 continue;
605 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
606 if (!E->InMST)
607 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
608 }
609
610 if (CreateGlobalVar)
611 FuncNameVar = createPGOFuncNameVar(F, FuncName);
612 }
613};
614
615} // end anonymous namespace
616
617// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
618// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
619// of selects, indirect calls, mem ops and edges.
620template <class Edge, class BBInfo>
621void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
622 std::vector<uint8_t> Indexes;
623 JamCRC JC;
624 for (auto &BB : F) {
625 for (BasicBlock *Succ : successors(&BB)) {
626 auto BI = findBBInfo(Succ);
627 if (BI == nullptr)
628 continue;
629 uint32_t Index = BI->Index;
630 for (int J = 0; J < 4; J++)
631 Indexes.push_back((uint8_t)(Index >> (J * 8)));
632 }
633 }
634 JC.update(Indexes);
635
636 JamCRC JCH;
637 // The higher 32 bits.
638 auto updateJCH = [&JCH](uint64_t Num) {
639 uint8_t Data[8];
641 JCH.update(Data);
642 };
643 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
644 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
645 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
646 if (BCI) {
647 updateJCH(BCI->getInstrumentedBlocksHash());
648 } else {
649 updateJCH((uint64_t)MST.numEdges());
650 }
651
652 // Hash format for context sensitive profile. Reserve 4 bits for other
653 // information.
654 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
655
656 // Reserve bit 60-63 for other information purpose.
657 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
658 if (IsCS)
660 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
661 << " CRC = " << JC.getCRC()
662 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
663 << ", Edges = " << MST.numEdges() << ", ICSites = "
664 << ValueSites[IPVK_IndirectCallTarget].size()
665 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
666 << ", High32 CRC = " << JCH.getCRC()
667 << ", Hash = " << FunctionHash << "\n";);
668
669 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
670 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
671 << " in building " << F.getParent()->getSourceFileName() << "\n";
672}
673
674// Check if we can safely rename this Comdat function.
675static bool canRenameComdat(
676 Function &F,
677 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
678 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
679 return false;
680
681 // FIXME: Current only handle those Comdat groups that only containing one
682 // function.
683 // (1) For a Comdat group containing multiple functions, we need to have a
684 // unique postfix based on the hashes for each function. There is a
685 // non-trivial code refactoring to do this efficiently.
686 // (2) Variables can not be renamed, so we can not rename Comdat function in a
687 // group including global vars.
688 Comdat *C = F.getComdat();
689 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
690 assert(!isa<GlobalAlias>(CM.second));
691 Function *FM = dyn_cast<Function>(CM.second);
692 if (FM != &F)
693 return false;
694 }
695 return true;
696}
697
698// Append the CFGHash to the Comdat function name.
699template <class Edge, class BBInfo>
700void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
701 if (!canRenameComdat(F, ComdatMembers))
702 return;
703 std::string OrigName = F.getName().str();
704 std::string NewFuncName =
705 Twine(F.getName() + "." + Twine(FunctionHash)).str();
706 F.setName(Twine(NewFuncName));
708 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
709 Comdat *NewComdat;
710 Module *M = F.getParent();
711 // For AvailableExternallyLinkage functions, change the linkage to
712 // LinkOnceODR and put them into comdat. This is because after renaming, there
713 // is no backup external copy available for the function.
714 if (!F.hasComdat()) {
716 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
718 F.setComdat(NewComdat);
719 return;
720 }
721
722 // This function belongs to a single function Comdat group.
723 Comdat *OrigComdat = F.getComdat();
724 std::string NewComdatName =
725 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
726 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
727 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
728
729 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
730 // Must be a function.
731 cast<Function>(CM.second)->setComdat(NewComdat);
732 }
733}
734
735/// Collect all the BBs that will be instruments and add them to
736/// `InstrumentBBs`.
737template <class Edge, class BBInfo>
738void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
739 std::vector<BasicBlock *> &InstrumentBBs) {
740 if (BCI) {
741 for (auto &BB : F)
742 if (BCI->shouldInstrumentBlock(BB))
743 InstrumentBBs.push_back(&BB);
744 return;
745 }
746
747 // Use a worklist as we will update the vector during the iteration.
748 std::vector<Edge *> EdgeList;
749 EdgeList.reserve(MST.numEdges());
750 for (const auto &E : MST.allEdges())
751 EdgeList.push_back(E.get());
752
753 for (auto &E : EdgeList) {
754 BasicBlock *InstrBB = getInstrBB(E);
755 if (InstrBB)
756 InstrumentBBs.push_back(InstrBB);
757 }
758}
759
760// Given a CFG E to be instrumented, find which BB to place the instrumented
761// code. The function will split the critical edge if necessary.
762template <class Edge, class BBInfo>
763BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
764 if (E->InMST || E->Removed)
765 return nullptr;
766
767 BasicBlock *SrcBB = E->SrcBB;
768 BasicBlock *DestBB = E->DestBB;
769 // For a fake edge, instrument the real BB.
770 if (SrcBB == nullptr)
771 return DestBB;
772 if (DestBB == nullptr)
773 return SrcBB;
774
775 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
776 // There are basic blocks (such as catchswitch) cannot be instrumented.
777 // If the returned first insertion point is the end of BB, skip this BB.
778 if (BB->getFirstInsertionPt() == BB->end())
779 return nullptr;
780 return BB;
781 };
782
783 // Instrument the SrcBB if it has a single successor,
784 // otherwise, the DestBB if this is not a critical edge.
785 Instruction *TI = SrcBB->getTerminator();
786 if (TI->getNumSuccessors() <= 1)
787 return canInstrument(SrcBB);
788 if (!E->IsCritical)
789 return canInstrument(DestBB);
790
791 // Some IndirectBr critical edges cannot be split by the previous
792 // SplitIndirectBrCriticalEdges call. Bail out.
793 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
794 BasicBlock *InstrBB =
795 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
796 if (!InstrBB) {
798 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
799 return nullptr;
800 }
801 // For a critical edge, we have to split. Instrument the newly
802 // created BB.
803 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
804 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
805 << " --> " << getBBInfo(DestBB).Index << "\n");
806 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
807 MST.addEdge(SrcBB, InstrBB, 0);
808 // Second one: Add new edge of InstrBB->DestBB.
809 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
810 NewEdge1.InMST = true;
811 E->Removed = true;
812
813 return canInstrument(InstrBB);
814}
815
816// When generating value profiling calls on Windows routines that make use of
817// handler funclets for exception processing an operand bundle needs to attached
818// to the called function. This routine will set \p OpBundles to contain the
819// funclet information, if any is needed, that should be placed on the generated
820// value profiling call for the value profile candidate call.
821static void
825 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
826 if (!OrigCall)
827 return;
828
829 if (!isa<IntrinsicInst>(OrigCall)) {
830 // The instrumentation call should belong to the same funclet as a
831 // non-intrinsic call, so just copy the operand bundle, if any exists.
832 std::optional<OperandBundleUse> ParentFunclet =
833 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
834 if (ParentFunclet)
835 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
836 } else {
837 // Intrinsics or other instructions do not get funclet information from the
838 // front-end. Need to use the BlockColors that was computed by the routine
839 // colorEHFunclets to determine whether a funclet is needed.
840 if (!BlockColors.empty()) {
841 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
842 assert(CV.size() == 1 && "non-unique color for block!");
843 Instruction *EHPad = CV.front()->getFirstNonPHI();
844 if (EHPad->isEHPad())
845 OpBundles.emplace_back("funclet", EHPad);
846 }
847 }
848}
849
850// Visit all edge and instrument the edges not in MST, and do value profiling.
851// Critical edges will be split.
855 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
856 bool IsCS) {
857 if (!PGOBlockCoverage) {
858 // Split indirectbr critical edges here before computing the MST rather than
859 // later in getInstrBB() to avoid invalidating it.
860 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
861 }
862
863 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
864 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
866
867 auto Name = FuncInfo.FuncNameVar;
868 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
869 FuncInfo.FunctionHash);
871 auto &EntryBB = F.getEntryBlock();
872 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
873 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
874 // i32 <index>)
875 Builder.CreateCall(
876 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
877 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
878 return;
879 }
880
881 std::vector<BasicBlock *> InstrumentBBs;
882 FuncInfo.getInstrumentBBs(InstrumentBBs);
883 unsigned NumCounters =
884 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
885
886 uint32_t I = 0;
888 NumCounters += PGOBlockCoverage ? 8 : 1;
889 auto &EntryBB = F.getEntryBlock();
890 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
891 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
892 // i32 <index>)
893 Builder.CreateCall(
894 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
895 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
896 I += PGOBlockCoverage ? 8 : 1;
897 }
898
899 for (auto *InstrBB : InstrumentBBs) {
900 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
901 assert(Builder.GetInsertPoint() != InstrBB->end() &&
902 "Cannot get the Instrumentation point");
903 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
904 // i32 <index>)
905 Builder.CreateCall(
907 ? Intrinsic::instrprof_cover
908 : Intrinsic::instrprof_increment),
909 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
910 }
911
912 // Now instrument select instructions:
913 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
914 FuncInfo.FunctionHash);
915 assert(I == NumCounters);
916
918 return;
919
920 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
921
922 // Intrinsic function calls do not have funclet operand bundles needed for
923 // Windows exception handling attached to them. However, if value profiling is
924 // inserted for one of these calls, then a funclet value will need to be set
925 // on the instrumentation call based on the funclet coloring.
927 if (F.hasPersonalityFn() &&
928 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
929 BlockColors = colorEHFunclets(F);
930
931 // For each VP Kind, walk the VP candidates and instrument each one.
932 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
933 unsigned SiteIndex = 0;
934 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
935 continue;
936
937 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
938 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
939 << " site: CallSite Index = " << SiteIndex << "\n");
940
941 IRBuilder<> Builder(Cand.InsertPt);
942 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
943 "Cannot get the Instrumentation point");
944
945 Value *ToProfile = nullptr;
946 if (Cand.V->getType()->isIntegerTy())
947 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
948 else if (Cand.V->getType()->isPointerTy())
949 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
950 assert(ToProfile && "value profiling Value is of unexpected type");
951
953 populateEHOperandBundle(Cand, BlockColors, OpBundles);
954 Builder.CreateCall(
955 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
956 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
957 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
958 OpBundles);
959 }
960 } // IPVK_First <= Kind <= IPVK_Last
961}
962
963namespace {
964
965// This class represents a CFG edge in profile use compilation.
966struct PGOUseEdge : public PGOEdge {
967 using PGOEdge::PGOEdge;
968
969 std::optional<uint64_t> Count;
970
971 // Set edge count value
972 void setEdgeCount(uint64_t Value) { Count = Value; }
973
974 // Return the information string for this object.
975 std::string infoString() const {
976 if (!Count)
977 return PGOEdge::infoString();
978 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
979 }
980};
981
982using DirectEdges = SmallVector<PGOUseEdge *, 2>;
983
984// This class stores the auxiliary information for each BB.
985struct PGOUseBBInfo : public PGOBBInfo {
986 std::optional<uint64_t> Count;
987 int32_t UnknownCountInEdge = 0;
988 int32_t UnknownCountOutEdge = 0;
989 DirectEdges InEdges;
990 DirectEdges OutEdges;
991
992 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
993
994 // Set the profile count value for this BB.
995 void setBBInfoCount(uint64_t Value) { Count = Value; }
996
997 // Return the information string of this object.
998 std::string infoString() const {
999 if (!Count)
1000 return PGOBBInfo::infoString();
1001 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1002 }
1003
1004 // Add an OutEdge and update the edge count.
1005 void addOutEdge(PGOUseEdge *E) {
1006 OutEdges.push_back(E);
1007 UnknownCountOutEdge++;
1008 }
1009
1010 // Add an InEdge and update the edge count.
1011 void addInEdge(PGOUseEdge *E) {
1012 InEdges.push_back(E);
1013 UnknownCountInEdge++;
1014 }
1015};
1016
1017} // end anonymous namespace
1018
1019// Sum up the count values for all the edges.
1021 uint64_t Total = 0;
1022 for (const auto &E : Edges) {
1023 if (E->Removed)
1024 continue;
1025 if (E->Count)
1026 Total += *E->Count;
1027 }
1028 return Total;
1029}
1030
1031namespace {
1032
1033class PGOUseFunc {
1034public:
1035 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1036 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1038 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1039 bool HasSingleByteCoverage)
1040 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1041 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1042 InstrumentFuncEntry, HasSingleByteCoverage),
1043 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1044
1045 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1046
1047 // Read counts for the instrumented BB from profile.
1048 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1050
1051 // Populate the counts for all BBs.
1052 void populateCounters();
1053
1054 // Set block coverage based on profile coverage values.
1055 void populateCoverage(IndexedInstrProfReader *PGOReader);
1056
1057 // Set the branch weights based on the count values.
1058 void setBranchWeights();
1059
1060 // Annotate the value profile call sites for all value kind.
1061 void annotateValueSites();
1062
1063 // Annotate the value profile call sites for one value kind.
1064 void annotateValueSites(uint32_t Kind);
1065
1066 // Annotate the irreducible loop header weights.
1067 void annotateIrrLoopHeaderWeights();
1068
1069 // The hotness of the function from the profile count.
1070 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1071
1072 // Return the function hotness from the profile.
1073 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1074
1075 // Return the function hash.
1076 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1077
1078 // Return the profile record for this function;
1079 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1080
1081 // Return the auxiliary BB information.
1082 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1083 return FuncInfo.getBBInfo(BB);
1084 }
1085
1086 // Return the auxiliary BB information if available.
1087 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1088 return FuncInfo.findBBInfo(BB);
1089 }
1090
1091 Function &getFunc() const { return F; }
1092
1093 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1094
1095 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1096
1097private:
1098 Function &F;
1099 Module *M;
1101 ProfileSummaryInfo *PSI;
1102
1103 // This member stores the shared information with class PGOGenFunc.
1104 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1105
1106 // The maximum count value in the profile. This is only used in PGO use
1107 // compilation.
1108 uint64_t ProgramMaxCount;
1109
1110 // Position of counter that remains to be read.
1111 uint32_t CountPosition = 0;
1112
1113 // Total size of the profile count for this function.
1114 uint32_t ProfileCountSize = 0;
1115
1116 // ProfileRecord for this function.
1117 InstrProfRecord ProfileRecord;
1118
1119 // Function hotness info derived from profile.
1120 FuncFreqAttr FreqAttr;
1121
1122 // Is to use the context sensitive profile.
1123 bool IsCS;
1124
1125 // Find the Instrumented BB and set the value. Return false on error.
1126 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1127
1128 // Set the edge counter value for the unknown edge -- there should be only
1129 // one unknown edge.
1130 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1131
1132 // Set the hot/cold inline hints based on the count values.
1133 // FIXME: This function should be removed once the functionality in
1134 // the inliner is implemented.
1135 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1136 if (PSI->isHotCount(EntryCount))
1137 FreqAttr = FFA_Hot;
1138 else if (PSI->isColdCount(MaxCount))
1139 FreqAttr = FFA_Cold;
1140 }
1141};
1142
1143} // end anonymous namespace
1144
1145/// Set up InEdges/OutEdges for all BBs in the MST.
1147 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1148 // This is not required when there is block coverage inference.
1149 if (FuncInfo.BCI)
1150 return;
1151 for (const auto &E : FuncInfo.MST.allEdges()) {
1152 if (E->Removed)
1153 continue;
1154 const BasicBlock *SrcBB = E->SrcBB;
1155 const BasicBlock *DestBB = E->DestBB;
1156 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1157 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1158 SrcInfo.addOutEdge(E.get());
1159 DestInfo.addInEdge(E.get());
1160 }
1161}
1162
1163// Visit all the edges and assign the count value for the instrumented
1164// edges and the BB. Return false on error.
1165bool PGOUseFunc::setInstrumentedCounts(
1166 const std::vector<uint64_t> &CountFromProfile) {
1167
1168 std::vector<BasicBlock *> InstrumentBBs;
1169 FuncInfo.getInstrumentBBs(InstrumentBBs);
1170
1171 setupBBInfoEdges(FuncInfo);
1172
1173 unsigned NumCounters =
1174 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1175 // The number of counters here should match the number of counters
1176 // in profile. Return if they mismatch.
1177 if (NumCounters != CountFromProfile.size()) {
1178 return false;
1179 }
1180 auto *FuncEntry = &*F.begin();
1181
1182 // Set the profile count to the Instrumented BBs.
1183 uint32_t I = 0;
1184 for (BasicBlock *InstrBB : InstrumentBBs) {
1185 uint64_t CountValue = CountFromProfile[I++];
1186 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1187 // If we reach here, we know that we have some nonzero count
1188 // values in this function. The entry count should not be 0.
1189 // Fix it if necessary.
1190 if (InstrBB == FuncEntry && CountValue == 0)
1191 CountValue = 1;
1192 Info.setBBInfoCount(CountValue);
1193 }
1194 ProfileCountSize = CountFromProfile.size();
1195 CountPosition = I;
1196
1197 // Set the edge count and update the count of unknown edges for BBs.
1198 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1199 E->setEdgeCount(Value);
1200 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1201 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1202 };
1203
1204 // Set the profile count the Instrumented edges. There are BBs that not in
1205 // MST but not instrumented. Need to set the edge count value so that we can
1206 // populate the profile counts later.
1207 for (const auto &E : FuncInfo.MST.allEdges()) {
1208 if (E->Removed || E->InMST)
1209 continue;
1210 const BasicBlock *SrcBB = E->SrcBB;
1211 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1212
1213 // If only one out-edge, the edge profile count should be the same as BB
1214 // profile count.
1215 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1216 setEdgeCount(E.get(), *SrcInfo.Count);
1217 else {
1218 const BasicBlock *DestBB = E->DestBB;
1219 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1220 // If only one in-edge, the edge profile count should be the same as BB
1221 // profile count.
1222 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1223 setEdgeCount(E.get(), *DestInfo.Count);
1224 }
1225 if (E->Count)
1226 continue;
1227 // E's count should have been set from profile. If not, this meenas E skips
1228 // the instrumentation. We set the count to 0.
1229 setEdgeCount(E.get(), 0);
1230 }
1231 return true;
1232}
1233
1234// Set the count value for the unknown edge. There should be one and only one
1235// unknown edge in Edges vector.
1236void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1237 for (auto &E : Edges) {
1238 if (E->Count)
1239 continue;
1240 E->setEdgeCount(Value);
1241
1242 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1243 getBBInfo(E->DestBB).UnknownCountInEdge--;
1244 return;
1245 }
1246 llvm_unreachable("Cannot find the unknown count edge");
1247}
1248
1249// Emit function metadata indicating PGO profile mismatch.
1251 const char MetadataName[] = "instr_prof_hash_mismatch";
1253 // If this metadata already exists, ignore.
1254 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1255 if (Existing) {
1256 MDTuple *Tuple = cast<MDTuple>(Existing);
1257 for (const auto &N : Tuple->operands()) {
1258 if (N.equalsStr(MetadataName))
1259 return;
1260 Names.push_back(N.get());
1261 }
1262 }
1263
1264 MDBuilder MDB(ctx);
1265 Names.push_back(MDB.createString(MetadataName));
1266 MDNode *MD = MDTuple::get(ctx, Names);
1267 F.setMetadata(LLVMContext::MD_annotation, MD);
1268}
1269
1270void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1271 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1272 auto &Ctx = M->getContext();
1273 auto Err = IPE.get();
1274 bool SkipWarning = false;
1275 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1276 << FuncInfo.FuncName << ": ");
1277 if (Err == instrprof_error::unknown_function) {
1278 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1279 SkipWarning = !PGOWarnMissing;
1280 LLVM_DEBUG(dbgs() << "unknown function");
1281 } else if (Err == instrprof_error::hash_mismatch ||
1282 Err == instrprof_error::malformed) {
1283 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1284 SkipWarning =
1287 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1289 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1290 << " skip=" << SkipWarning << ")");
1291 // Emit function metadata indicating PGO profile mismatch.
1292 annotateFunctionWithHashMismatch(F, M->getContext());
1293 }
1294
1295 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1296 if (SkipWarning)
1297 return;
1298
1299 std::string Msg =
1300 IPE.message() + std::string(" ") + F.getName().str() +
1301 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1302 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1303 std::string(" count discarded");
1304
1305 Ctx.diagnose(
1306 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1307 });
1308}
1309
1310// Read the profile from ProfileFileName and assign the value to the
1311// instrumented BB and the edges. This function also updates ProgramMaxCount.
1312// Return true if the profile are successfully read, and false on errors.
1313bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1315 auto &Ctx = M->getContext();
1316 uint64_t MismatchedFuncSum = 0;
1318 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1319 &MismatchedFuncSum);
1320 if (Error E = Result.takeError()) {
1321 handleInstrProfError(std::move(E), MismatchedFuncSum);
1322 return false;
1323 }
1324 ProfileRecord = std::move(Result.get());
1325 PseudoKind = ProfileRecord.getCountPseudoKind();
1326 if (PseudoKind != InstrProfRecord::NotPseudo) {
1327 return true;
1328 }
1329 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1330
1331 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1332 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1333
1334 uint64_t ValueSum = 0;
1335 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1336 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1337 ValueSum += CountFromProfile[I];
1338 }
1339 AllZeros = (ValueSum == 0);
1340
1341 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1342
1343 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1344 getBBInfo(nullptr).UnknownCountInEdge = 2;
1345
1346 if (!setInstrumentedCounts(CountFromProfile)) {
1347 LLVM_DEBUG(
1348 dbgs() << "Inconsistent number of counts, skipping this function");
1349 Ctx.diagnose(DiagnosticInfoPGOProfile(
1350 M->getName().data(),
1351 Twine("Inconsistent number of counts in ") + F.getName().str() +
1352 Twine(": the profile may be stale or there is a function name "
1353 "collision."),
1354 DS_Warning));
1355 return false;
1356 }
1357 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1358 return true;
1359}
1360
1361void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1362 uint64_t MismatchedFuncSum = 0;
1364 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1365 &MismatchedFuncSum);
1366 if (auto Err = Result.takeError()) {
1367 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1368 return;
1369 }
1370 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1371
1372 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1374 unsigned Index = 0;
1375 for (auto &BB : F)
1376 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1377 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1378 assert(Index == CountsFromProfile.size());
1379
1380 // For each B in InverseDependencies[A], if A is covered then B is covered.
1382 InverseDependencies;
1383 for (auto &BB : F) {
1384 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1385 // If Dep is covered then BB is covered.
1386 InverseDependencies[Dep].insert(&BB);
1387 }
1388 }
1389
1390 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1391 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1392 for (auto &[BB, IsCovered] : Coverage)
1393 if (IsCovered)
1394 CoveredBlocksToProcess.push(BB);
1395
1396 while (!CoveredBlocksToProcess.empty()) {
1397 auto *CoveredBlock = CoveredBlocksToProcess.top();
1398 assert(Coverage[CoveredBlock]);
1399 CoveredBlocksToProcess.pop();
1400 for (auto *BB : InverseDependencies[CoveredBlock]) {
1401 // If CoveredBlock is covered then BB is covered.
1402 if (Coverage[BB])
1403 continue;
1404 Coverage[BB] = true;
1405 CoveredBlocksToProcess.push(BB);
1406 }
1407 }
1408
1409 // Annotate block coverage.
1410 MDBuilder MDB(F.getContext());
1411 // We set the entry count to 10000 if the entry block is covered so that BFI
1412 // can propagate a fraction of this count to the other covered blocks.
1413 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1414 for (auto &BB : F) {
1415 // For a block A and its successor B, we set the edge weight as follows:
1416 // If A is covered and B is covered, set weight=1.
1417 // If A is covered and B is uncovered, set weight=0.
1418 // If A is uncovered, set weight=1.
1419 // This setup will allow BFI to give nonzero profile counts to only covered
1420 // blocks.
1422 for (auto *Succ : successors(&BB))
1423 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1424 if (Weights.size() >= 2)
1425 llvm::setBranchWeights(*BB.getTerminator(), Weights);
1426 }
1427
1428 unsigned NumCorruptCoverage = 0;
1429 DominatorTree DT(F);
1430 LoopInfo LI(DT);
1431 BranchProbabilityInfo BPI(F, LI);
1432 BlockFrequencyInfo BFI(F, BPI, LI);
1433 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1434 if (auto C = BFI.getBlockProfileCount(&BB))
1435 return C == 0;
1436 return {};
1437 };
1438 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1439 for (auto &BB : F) {
1440 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1441 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1442 << "\n");
1443 // In some cases it is possible to find a covered block that has no covered
1444 // successors, e.g., when a block calls a function that may call exit(). In
1445 // those cases, BFI could find its successor to be covered while BCI could
1446 // find its successor to be dead.
1447 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1448 LLVM_DEBUG(
1449 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1450 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1451 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1452 ++NumCorruptCoverage;
1453 }
1454 if (Coverage[&BB])
1455 ++NumCoveredBlocks;
1456 }
1457 if (PGOVerifyBFI && NumCorruptCoverage) {
1458 auto &Ctx = M->getContext();
1459 Ctx.diagnose(DiagnosticInfoPGOProfile(
1460 M->getName().data(),
1461 Twine("Found inconsistent block coverage for function ") + F.getName() +
1462 " in " + Twine(NumCorruptCoverage) + " blocks.",
1463 DS_Warning));
1464 }
1466 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1467}
1468
1469// Populate the counters from instrumented BBs to all BBs.
1470// In the end of this operation, all BBs should have a valid count value.
1471void PGOUseFunc::populateCounters() {
1472 bool Changes = true;
1473 unsigned NumPasses = 0;
1474 while (Changes) {
1475 NumPasses++;
1476 Changes = false;
1477
1478 // For efficient traversal, it's better to start from the end as most
1479 // of the instrumented edges are at the end.
1480 for (auto &BB : reverse(F)) {
1481 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1482 if (UseBBInfo == nullptr)
1483 continue;
1484 if (!UseBBInfo->Count) {
1485 if (UseBBInfo->UnknownCountOutEdge == 0) {
1486 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1487 Changes = true;
1488 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1489 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1490 Changes = true;
1491 }
1492 }
1493 if (UseBBInfo->Count) {
1494 if (UseBBInfo->UnknownCountOutEdge == 1) {
1495 uint64_t Total = 0;
1496 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1497 // If the one of the successor block can early terminate (no-return),
1498 // we can end up with situation where out edge sum count is larger as
1499 // the source BB's count is collected by a post-dominated block.
1500 if (*UseBBInfo->Count > OutSum)
1501 Total = *UseBBInfo->Count - OutSum;
1502 setEdgeCount(UseBBInfo->OutEdges, Total);
1503 Changes = true;
1504 }
1505 if (UseBBInfo->UnknownCountInEdge == 1) {
1506 uint64_t Total = 0;
1507 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1508 if (*UseBBInfo->Count > InSum)
1509 Total = *UseBBInfo->Count - InSum;
1510 setEdgeCount(UseBBInfo->InEdges, Total);
1511 Changes = true;
1512 }
1513 }
1514 }
1515 }
1516
1517 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1518 (void)NumPasses;
1519#ifndef NDEBUG
1520 // Assert every BB has a valid counter.
1521 for (auto &BB : F) {
1522 auto BI = findBBInfo(&BB);
1523 if (BI == nullptr)
1524 continue;
1525 assert(BI->Count && "BB count is not valid");
1526 }
1527#endif
1528 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1529 uint64_t FuncMaxCount = FuncEntryCount;
1530 for (auto &BB : F) {
1531 auto BI = findBBInfo(&BB);
1532 if (BI == nullptr)
1533 continue;
1534 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1535 }
1536
1537 // Fix the obviously inconsistent entry count.
1538 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1539 FuncEntryCount = 1;
1541 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1542
1543 // Now annotate select instructions
1544 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1545 assert(CountPosition == ProfileCountSize);
1546
1547 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1548}
1549
1550// Assign the scaled count values to the BB with multiple out edges.
1551void PGOUseFunc::setBranchWeights() {
1552 // Generate MD_prof metadata for every branch instruction.
1553 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1554 << " IsCS=" << IsCS << "\n");
1555 for (auto &BB : F) {
1556 Instruction *TI = BB.getTerminator();
1557 if (TI->getNumSuccessors() < 2)
1558 continue;
1559 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1560 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1561 isa<CallBrInst>(TI)))
1562 continue;
1563
1564 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1565 if (!*BBCountInfo.Count)
1566 continue;
1567
1568 // We have a non-zero Branch BB.
1569 unsigned Size = BBCountInfo.OutEdges.size();
1570 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1571 uint64_t MaxCount = 0;
1572 for (unsigned s = 0; s < Size; s++) {
1573 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1574 const BasicBlock *SrcBB = E->SrcBB;
1575 const BasicBlock *DestBB = E->DestBB;
1576 if (DestBB == nullptr)
1577 continue;
1578 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1579 uint64_t EdgeCount = *E->Count;
1580 if (EdgeCount > MaxCount)
1581 MaxCount = EdgeCount;
1582 EdgeCounts[SuccNum] = EdgeCount;
1583 }
1584
1585 if (MaxCount)
1586 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1587 else {
1588 // A zero MaxCount can come about when we have a BB with a positive
1589 // count, and whose successor blocks all have 0 count. This can happen
1590 // when there is no exit block and the code exits via a noreturn function.
1591 auto &Ctx = M->getContext();
1592 Ctx.diagnose(DiagnosticInfoPGOProfile(
1593 M->getName().data(),
1594 Twine("Profile in ") + F.getName().str() +
1595 Twine(" partially ignored") +
1596 Twine(", possibly due to the lack of a return path."),
1597 DS_Warning));
1598 }
1599 }
1600}
1601
1603 for (BasicBlock *Pred : predecessors(BB)) {
1604 if (isa<IndirectBrInst>(Pred->getTerminator()))
1605 return true;
1606 }
1607 return false;
1608}
1609
1610void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1611 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1612 // Find irr loop headers
1613 for (auto &BB : F) {
1614 // As a heuristic also annotate indrectbr targets as they have a high chance
1615 // to become an irreducible loop header after the indirectbr tail
1616 // duplication.
1617 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1618 Instruction *TI = BB.getTerminator();
1619 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1620 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1621 }
1622 }
1623}
1624
1625void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1626 Module *M = F.getParent();
1627 IRBuilder<> Builder(&SI);
1628 Type *Int64Ty = Builder.getInt64Ty();
1629 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1630 Builder.CreateCall(
1631 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1632 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1633 Builder.getInt32(*CurCtrIdx), Step});
1634 ++(*CurCtrIdx);
1635}
1636
1637void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1638 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1639 assert(*CurCtrIdx < CountFromProfile.size() &&
1640 "Out of bound access of counters");
1641 uint64_t SCounts[2];
1642 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1643 ++(*CurCtrIdx);
1644 uint64_t TotalCount = 0;
1645 auto BI = UseFunc->findBBInfo(SI.getParent());
1646 if (BI != nullptr)
1647 TotalCount = *BI->Count;
1648 // False Count
1649 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1650 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1651 if (MaxCount)
1652 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1653}
1654
1655void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1656 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1657 return;
1658 // FIXME: do not handle this yet.
1659 if (SI.getCondition()->getType()->isVectorTy())
1660 return;
1661
1662 switch (Mode) {
1663 case VM_counting:
1664 NSIs++;
1665 return;
1666 case VM_instrument:
1667 instrumentOneSelectInst(SI);
1668 return;
1669 case VM_annotate:
1670 annotateOneSelectInst(SI);
1671 return;
1672 }
1673
1674 llvm_unreachable("Unknown visiting mode");
1675}
1676
1677// Traverse all valuesites and annotate the instructions for all value kind.
1678void PGOUseFunc::annotateValueSites() {
1680 return;
1681
1682 // Create the PGOFuncName meta data.
1683 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1684
1685 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1686 annotateValueSites(Kind);
1687}
1688
1689// Annotate the instructions for a specific value kind.
1690void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1691 assert(Kind <= IPVK_Last);
1692 unsigned ValueSiteIndex = 0;
1693 auto &ValueSites = FuncInfo.ValueSites[Kind];
1694 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1695 if (NumValueSites != ValueSites.size()) {
1696 auto &Ctx = M->getContext();
1697 Ctx.diagnose(DiagnosticInfoPGOProfile(
1698 M->getName().data(),
1699 Twine("Inconsistent number of value sites for ") +
1700 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1701 F.getName().str() +
1702 Twine("\", possibly due to the use of a stale profile."),
1703 DS_Warning));
1704 return;
1705 }
1706
1707 for (VPCandidateInfo &I : ValueSites) {
1708 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1709 << "): Index = " << ValueSiteIndex << " out of "
1710 << NumValueSites << "\n");
1711 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1712 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1713 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1715 ValueSiteIndex++;
1716 }
1717}
1718
1719// Collect the set of members for each Comdat in module M and store
1720// in ComdatMembers.
1722 Module &M,
1723 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1724 if (!DoComdatRenaming)
1725 return;
1726 for (Function &F : M)
1727 if (Comdat *C = F.getComdat())
1728 ComdatMembers.insert(std::make_pair(C, &F));
1729 for (GlobalVariable &GV : M.globals())
1730 if (Comdat *C = GV.getComdat())
1731 ComdatMembers.insert(std::make_pair(C, &GV));
1732 for (GlobalAlias &GA : M.aliases())
1733 if (Comdat *C = GA.getComdat())
1734 ComdatMembers.insert(std::make_pair(C, &GA));
1735}
1736
1737// Return true if we should not find instrumentation data for this function
1738static bool skipPGOUse(const Function &F) {
1739 if (F.isDeclaration())
1740 return true;
1741 // If there are too many critical edges, PGO might cause
1742 // compiler time problem. Skip PGO if the number of
1743 // critical edges execeed the threshold.
1744 unsigned NumCriticalEdges = 0;
1745 for (auto &BB : F) {
1746 const Instruction *TI = BB.getTerminator();
1747 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1748 if (isCriticalEdge(TI, I))
1749 NumCriticalEdges++;
1750 }
1751 }
1752 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1753 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1754 << ", NumCriticalEdges=" << NumCriticalEdges
1755 << " exceed the threshold. Skip PGO.\n");
1756 return true;
1757 }
1758 return false;
1759}
1760
1761// Return true if we should not instrument this function
1762static bool skipPGOGen(const Function &F) {
1763 if (skipPGOUse(F))
1764 return true;
1765 if (F.hasFnAttribute(llvm::Attribute::Naked))
1766 return true;
1767 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1768 return true;
1769 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1770 return true;
1771 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1772 return true;
1773 return false;
1774}
1775
1777 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1779 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1780 // For the context-sensitve instrumentation, we should have a separated pass
1781 // (before LTO/ThinLTO linking) to create these variables.
1782 if (!IsCS)
1783 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1784
1785 Triple TT(M.getTargetTriple());
1786 LLVMContext &Ctx = M.getContext();
1787 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1789 M.getName().data(),
1790 Twine("VTable value profiling is presently not "
1791 "supported for non-ELF object formats"),
1792 DS_Warning));
1793 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1794 collectComdatMembers(M, ComdatMembers);
1795
1796 for (auto &F : M) {
1797 if (skipPGOGen(F))
1798 continue;
1799 auto &TLI = LookupTLI(F);
1800 auto *BPI = LookupBPI(F);
1801 auto *BFI = LookupBFI(F);
1802 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1803 }
1804 return true;
1805}
1806
1809 createProfileFileNameVar(M, CSInstrName);
1810 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1811 // will be retained.
1816 return PA;
1817}
1818
1821 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1822 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1824 };
1825 auto LookupBPI = [&FAM](Function &F) {
1827 };
1828 auto LookupBFI = [&FAM](Function &F) {
1830 };
1831
1832 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1833 return PreservedAnalyses::all();
1834
1835 return PreservedAnalyses::none();
1836}
1837
1838// Using the ratio b/w sums of profile count values and BFI count values to
1839// adjust the func entry count.
1840static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1841 BranchProbabilityInfo &NBPI) {
1842 Function &F = Func.getFunc();
1843 BlockFrequencyInfo NBFI(F, NBPI, LI);
1844#ifndef NDEBUG
1845 auto BFIEntryCount = F.getEntryCount();
1846 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1847 "Invalid BFI Entrycount");
1848#endif
1849 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1850 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1851 for (auto &BBI : F) {
1852 uint64_t CountValue = 0;
1853 uint64_t BFICountValue = 0;
1854 if (!Func.findBBInfo(&BBI))
1855 continue;
1856 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1857 CountValue = *Func.getBBInfo(&BBI).Count;
1858 BFICountValue = *BFICount;
1859 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1860 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1861 }
1862 if (SumCount.isZero())
1863 return;
1864
1865 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1866 "Incorrect sum of BFI counts");
1867 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1868 return;
1869 double Scale = (SumCount / SumBFICount).convertToDouble();
1870 if (Scale < 1.001 && Scale > 0.999)
1871 return;
1872
1873 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1874 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1875 if (NewEntryCount == 0)
1876 NewEntryCount = 1;
1877 if (NewEntryCount != FuncEntryCount) {
1878 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1879 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1880 << ", entry_count " << FuncEntryCount << " --> "
1881 << NewEntryCount << "\n");
1882 }
1883}
1884
1885// Compare the profile count values with BFI count values, and print out
1886// the non-matching ones.
1887static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1889 uint64_t HotCountThreshold,
1891 Function &F = Func.getFunc();
1892 BlockFrequencyInfo NBFI(F, NBPI, LI);
1893 // bool PrintFunc = false;
1894 bool HotBBOnly = PGOVerifyHotBFI;
1895 StringRef Msg;
1897
1898 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1899 for (auto &BBI : F) {
1900 uint64_t CountValue = 0;
1901 uint64_t BFICountValue = 0;
1902
1903 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1904
1905 BBNum++;
1906 if (CountValue)
1907 NonZeroBBNum++;
1908 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1909 if (BFICount)
1910 BFICountValue = *BFICount;
1911
1912 if (HotBBOnly) {
1913 bool rawIsHot = CountValue >= HotCountThreshold;
1914 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1915 bool rawIsCold = CountValue <= ColdCountThreshold;
1916 bool ShowCount = false;
1917 if (rawIsHot && !BFIIsHot) {
1918 Msg = "raw-Hot to BFI-nonHot";
1919 ShowCount = true;
1920 } else if (rawIsCold && BFIIsHot) {
1921 Msg = "raw-Cold to BFI-Hot";
1922 ShowCount = true;
1923 }
1924 if (!ShowCount)
1925 continue;
1926 } else {
1927 if ((CountValue < PGOVerifyBFICutoff) &&
1928 (BFICountValue < PGOVerifyBFICutoff))
1929 continue;
1930 uint64_t Diff = (BFICountValue >= CountValue)
1931 ? BFICountValue - CountValue
1932 : CountValue - BFICountValue;
1933 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1934 continue;
1935 }
1936 BBMisMatchNum++;
1937
1938 ORE.emit([&]() {
1940 F.getSubprogram(), &BBI);
1941 Remark << "BB " << ore::NV("Block", BBI.getName())
1942 << " Count=" << ore::NV("Count", CountValue)
1943 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1944 if (!Msg.empty())
1945 Remark << " (" << Msg << ")";
1946 return Remark;
1947 });
1948 }
1949 if (BBMisMatchNum)
1950 ORE.emit([&]() {
1951 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1952 F.getSubprogram(), &F.getEntryBlock())
1953 << "In Func " << ore::NV("Function", F.getName())
1954 << ": Num_of_BB=" << ore::NV("Count", BBNum)
1955 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1956 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1957 });
1958}
1959
1961 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1962 vfs::FileSystem &FS,
1963 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1966 ProfileSummaryInfo *PSI, bool IsCS) {
1967 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1968 auto &Ctx = M.getContext();
1969 // Read the counter array from file.
1970 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
1971 ProfileRemappingFileName);
1972 if (Error E = ReaderOrErr.takeError()) {
1973 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1974 Ctx.diagnose(
1975 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1976 });
1977 return false;
1978 }
1979
1980 std::unique_ptr<IndexedInstrProfReader> PGOReader =
1981 std::move(ReaderOrErr.get());
1982 if (!PGOReader) {
1983 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1984 StringRef("Cannot get PGOReader")));
1985 return false;
1986 }
1987 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1988 return false;
1989
1990 // TODO: might need to change the warning once the clang option is finalized.
1991 if (!PGOReader->isIRLevelProfile()) {
1992 Ctx.diagnose(DiagnosticInfoPGOProfile(
1993 ProfileFileName.data(), "Not an IR level instrumentation profile"));
1994 return false;
1995 }
1996 if (PGOReader->functionEntryOnly()) {
1997 Ctx.diagnose(DiagnosticInfoPGOProfile(
1998 ProfileFileName.data(),
1999 "Function entry profiles are not yet supported for optimization"));
2000 return false;
2001 }
2002
2003 // Add the profile summary (read from the header of the indexed summary) here
2004 // so that we can use it below when reading counters (which checks if the
2005 // function should be marked with a cold or inlinehint attribute).
2006 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2009 PSI->refresh();
2010
2011 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2012 collectComdatMembers(M, ComdatMembers);
2013 std::vector<Function *> HotFunctions;
2014 std::vector<Function *> ColdFunctions;
2015
2016 // If the profile marked as always instrument the entry BB, do the
2017 // same. Note this can be overwritten by the internal option in CFGMST.h
2018 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2019 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2020 InstrumentFuncEntry = PGOInstrumentEntry;
2021 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2022 for (auto &F : M) {
2023 if (skipPGOUse(F))
2024 continue;
2025 auto &TLI = LookupTLI(F);
2026 auto *BPI = LookupBPI(F);
2027 auto *BFI = LookupBFI(F);
2028 if (!HasSingleByteCoverage) {
2029 // Split indirectbr critical edges here before computing the MST rather
2030 // than later in getInstrBB() to avoid invalidating it.
2031 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2032 BFI);
2033 }
2034 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2035 InstrumentFuncEntry, HasSingleByteCoverage);
2036 if (HasSingleByteCoverage) {
2037 Func.populateCoverage(PGOReader.get());
2038 continue;
2039 }
2040 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2041 // it means the profile for the function is unrepresentative and this
2042 // function is actually hot / warm. We will reset the function hot / cold
2043 // attribute and drop all the profile counters.
2045 bool AllZeros = false;
2046 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2047 continue;
2048 if (AllZeros) {
2049 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2050 if (Func.getProgramMaxCount() != 0)
2051 ColdFunctions.push_back(&F);
2052 continue;
2053 }
2054 if (PseudoKind != InstrProfRecord::NotPseudo) {
2055 // Clear function attribute cold.
2056 if (F.hasFnAttribute(Attribute::Cold))
2057 F.removeFnAttr(Attribute::Cold);
2058 // Set function attribute as hot.
2059 if (PseudoKind == InstrProfRecord::PseudoHot)
2060 F.addFnAttr(Attribute::Hot);
2061 continue;
2062 }
2063 Func.populateCounters();
2064 Func.setBranchWeights();
2065 Func.annotateValueSites();
2066 Func.annotateIrrLoopHeaderWeights();
2067 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2068 if (FreqAttr == PGOUseFunc::FFA_Cold)
2069 ColdFunctions.push_back(&F);
2070 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2071 HotFunctions.push_back(&F);
2072 if (PGOViewCounts != PGOVCT_None &&
2073 (ViewBlockFreqFuncName.empty() ||
2074 F.getName().equals(ViewBlockFreqFuncName))) {
2076 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2077 std::make_unique<BranchProbabilityInfo>(F, LI);
2078 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2079 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2081 NewBFI->view();
2082 else if (PGOViewCounts == PGOVCT_Text) {
2083 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2084 NewBFI->print(dbgs());
2085 }
2086 }
2088 (ViewBlockFreqFuncName.empty() ||
2089 F.getName().equals(ViewBlockFreqFuncName))) {
2091 if (ViewBlockFreqFuncName.empty())
2092 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2093 else
2094 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2095 else if (PGOViewRawCounts == PGOVCT_Text) {
2096 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2097 Func.dumpInfo();
2098 }
2099 }
2100
2103 BranchProbabilityInfo NBPI(F, LI);
2104
2105 // Fix func entry count.
2106 if (PGOFixEntryCount)
2107 fixFuncEntryCount(Func, LI, NBPI);
2108
2109 // Verify BlockFrequency information.
2110 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2111 if (PGOVerifyHotBFI) {
2112 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2114 }
2115 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2116 }
2117 }
2118
2119 // Set function hotness attribute from the profile.
2120 // We have to apply these attributes at the end because their presence
2121 // can affect the BranchProbabilityInfo of any callers, resulting in an
2122 // inconsistent MST between prof-gen and prof-use.
2123 for (auto &F : HotFunctions) {
2124 F->addFnAttr(Attribute::InlineHint);
2125 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2126 << "\n");
2127 }
2128 for (auto &F : ColdFunctions) {
2129 // Only set when there is no Attribute::Hot set by the user. For Hot
2130 // attribute, user's annotation has the precedence over the profile.
2131 if (F->hasFnAttribute(Attribute::Hot)) {
2132 auto &Ctx = M.getContext();
2133 std::string Msg = std::string("Function ") + F->getName().str() +
2134 std::string(" is annotated as a hot function but"
2135 " the profile is cold");
2136 Ctx.diagnose(
2137 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2138 continue;
2139 }
2140 F->addFnAttr(Attribute::Cold);
2141 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2142 << "\n");
2143 }
2144 return true;
2145}
2146
2148 std::string Filename, std::string RemappingFilename, bool IsCS,
2150 : ProfileFileName(std::move(Filename)),
2151 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2152 FS(std::move(VFS)) {
2153 if (!PGOTestProfileFile.empty())
2154 ProfileFileName = PGOTestProfileFile;
2155 if (!PGOTestProfileRemappingFile.empty())
2156 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2157 if (!FS)
2159}
2160
2163
2164 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2165 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2167 };
2168 auto LookupBPI = [&FAM](Function &F) {
2170 };
2171 auto LookupBFI = [&FAM](Function &F) {
2173 };
2174
2175 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2176
2177 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2178 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2179 return PreservedAnalyses::all();
2180
2181 return PreservedAnalyses::none();
2182}
2183
2184static std::string getSimpleNodeName(const BasicBlock *Node) {
2185 if (!Node->getName().empty())
2186 return Node->getName().str();
2187
2188 std::string SimpleNodeName;
2189 raw_string_ostream OS(SimpleNodeName);
2190 Node->printAsOperand(OS, false);
2191 return OS.str();
2192}
2193
2195 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2196 assert(MaxCount > 0 && "Bad max count");
2197 uint64_t Scale = calculateCountScale(MaxCount);
2199 for (const auto &ECI : EdgeCounts)
2200 Weights.push_back(scaleBranchCount(ECI, Scale));
2201
2202 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2203 : Weights) {
2204 dbgs() << W << " ";
2205 } dbgs() << "\n";);
2206
2207 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2208
2209 setBranchWeights(*TI, Weights);
2211 std::string BrCondStr = getBranchCondString(TI);
2212 if (BrCondStr.empty())
2213 return;
2214
2215 uint64_t WSum =
2216 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2217 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2218 uint64_t TotalCount =
2219 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2220 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2221 Scale = calculateCountScale(WSum);
2222 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2223 scaleBranchCount(WSum, Scale));
2224 std::string BranchProbStr;
2225 raw_string_ostream OS(BranchProbStr);
2226 OS << BP;
2227 OS << " (total count : " << TotalCount << ")";
2228 OS.flush();
2229 Function *F = TI->getParent()->getParent();
2231 ORE.emit([&]() {
2232 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2233 << BrCondStr << " is true with probability : " << BranchProbStr;
2234 });
2235 }
2236}
2237
2238namespace llvm {
2239
2241 MDBuilder MDB(M->getContext());
2242 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2243 MDB.createIrrLoopHeaderWeight(Count));
2244}
2245
2246template <> struct GraphTraits<PGOUseFunc *> {
2247 using NodeRef = const BasicBlock *;
2250
2251 static NodeRef getEntryNode(const PGOUseFunc *G) {
2252 return &G->getFunc().front();
2253 }
2254
2256 return succ_begin(N);
2257 }
2258
2259 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2260
2261 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2262 return nodes_iterator(G->getFunc().begin());
2263 }
2264
2265 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2266 return nodes_iterator(G->getFunc().end());
2267 }
2268};
2269
2270template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2271 explicit DOTGraphTraits(bool isSimple = false)
2273
2274 static std::string getGraphName(const PGOUseFunc *G) {
2275 return std::string(G->getFunc().getName());
2276 }
2277
2278 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2279 std::string Result;
2280 raw_string_ostream OS(Result);
2281
2282 OS << getSimpleNodeName(Node) << ":\\l";
2283 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2284 OS << "Count : ";
2285 if (BI && BI->Count)
2286 OS << *BI->Count << "\\l";
2287 else
2288 OS << "Unknown\\l";
2289
2290 if (!PGOInstrSelect)
2291 return Result;
2292
2293 for (const Instruction &I : *Node) {
2294 if (!isa<SelectInst>(&I))
2295 continue;
2296 // Display scaled counts for SELECT instruction:
2297 OS << "SELECT : { T = ";
2298 uint64_t TC, FC;
2299 bool HasProf = extractBranchWeights(I, TC, FC);
2300 if (!HasProf)
2301 OS << "Unknown, F = Unknown }\\l";
2302 else
2303 OS << TC << ", F = " << FC << " }\\l";
2304 }
2305 return Result;
2306 }
2307};
2308
2309} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:957
Class for arbitrary precision integers.
Definition: APInt.h:76
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:47
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class to represent profile counts.
Definition: Function.h:278
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:525
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:491
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:410
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:250
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:812
const BasicBlock * getParent() const
Definition: Instruction.h:152
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:340
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:470
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:374
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1364
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:363
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:970
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:462
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1229
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:266
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1424
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1447
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:808
std::vector< uint64_t > Counts
Definition: InstrProf.h:809
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:919
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1038
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1019