LLVM 18.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <string>
123#include <unordered_map>
124#include <utility>
125#include <vector>
126
127using namespace llvm;
130
131#define DEBUG_TYPE "pgo-instrumentation"
132
133STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
134STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
135STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
136STATISTIC(NumOfPGOEdge, "Number of edges.");
137STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
138STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
139STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
140STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
141STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
142STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
143STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
144STATISTIC(NumOfCSPGOSelectInsts,
145 "Number of select instruction instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOMemIntrinsics,
147 "Number of mem intrinsics instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
149STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
150STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
151STATISTIC(NumOfCSPGOFunc,
152 "Number of functions having valid profile counts in CSPGO.");
153STATISTIC(NumOfCSPGOMismatch,
154 "Number of functions having mismatch profile in CSPGO.");
155STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
156STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
157
158// Command line option to specify the file to read profile from. This is
159// mainly used for testing.
161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
162 cl::value_desc("filename"),
163 cl::desc("Specify the path of profile data file. This is"
164 "mainly for test purpose."));
166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
167 cl::value_desc("filename"),
168 cl::desc("Specify the path of profile remapping file. This is mainly for "
169 "test purpose."));
170
171// Command line option to disable value profiling. The default is false:
172// i.e. value profiling is enabled by default. This is for debug purpose.
173static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
175 cl::desc("Disable Value Profiling"));
176
177// Command line option to set the maximum number of VP annotations to write to
178// the metadata for a single indirect call callsite.
180 "icp-max-annotations", cl::init(3), cl::Hidden,
181 cl::desc("Max number of annotations for a single indirect "
182 "call callsite"));
183
184// Command line option to set the maximum number of value annotations
185// to write to the metadata for a single memop intrinsic.
187 "memop-max-annotations", cl::init(4), cl::Hidden,
188 cl::desc("Max number of preicise value annotations for a single memop"
189 "intrinsic"));
190
191// Command line option to control appending FunctionHash to the name of a COMDAT
192// function. This is to avoid the hash mismatch caused by the preinliner.
194 "do-comdat-renaming", cl::init(false), cl::Hidden,
195 cl::desc("Append function hash to the name of COMDAT function to avoid "
196 "function hash mismatch due to the preinliner"));
197
198namespace llvm {
199// Command line option to enable/disable the warning about missing profile
200// information.
201cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
203 cl::desc("Use this option to turn on/off "
204 "warnings about missing profile data for "
205 "functions."));
206
207// Command line option to enable/disable the warning about a hash mismatch in
208// the profile data.
210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
211 cl::desc("Use this option to turn off/on "
212 "warnings about profile cfg mismatch."));
213
214// Command line option to enable/disable the warning about a hash mismatch in
215// the profile data for Comdat functions, which often turns out to be false
216// positive due to the pre-instrumentation inline.
218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
219 cl::desc("The option is used to turn on/off "
220 "warnings about hash mismatch for comdat "
221 "or weak functions."));
222} // namespace llvm
223
224// Command line option to enable/disable select instruction instrumentation.
225static cl::opt<bool>
226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
227 cl::desc("Use this option to turn on/off SELECT "
228 "instruction instrumentation. "));
229
230// Command line option to turn on CFG dot or text dump of raw profile counts
232 "pgo-view-raw-counts", cl::Hidden,
233 cl::desc("A boolean option to show CFG dag or text "
234 "with raw profile counts from "
235 "profile data. See also option "
236 "-pgo-view-counts. To limit graph "
237 "display to only one function, use "
238 "filtering option -view-bfi-func-name."),
239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
241 clEnumValN(PGOVCT_Text, "text", "show in text.")));
242
243// Command line option to enable/disable memop intrinsic call.size profiling.
244static cl::opt<bool>
245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
246 cl::desc("Use this option to turn on/off "
247 "memory intrinsic size profiling."));
248
249// Emit branch probability as optimization remarks.
250static cl::opt<bool>
251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
252 cl::desc("When this option is on, the annotated "
253 "branch probability will be emitted as "
254 "optimization remarks: -{Rpass|"
255 "pass-remarks}=pgo-instrumentation"));
256
258 "pgo-instrument-entry", cl::init(false), cl::Hidden,
259 cl::desc("Force to instrument function entry basicblock."));
260
262 "pgo-function-entry-coverage", cl::Hidden,
263 cl::desc(
264 "Use this option to enable function entry coverage instrumentation."));
265
267 "pgo-block-coverage",
268 cl::desc("Use this option to enable basic block coverage instrumentation"));
269
270static cl::opt<bool>
271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
272 cl::desc("Create a dot file of CFGs with block "
273 "coverage inference information"));
274
276 "pgo-temporal-instrumentation",
277 cl::desc("Use this option to enable temporal instrumentation"));
278
279static cl::opt<bool>
280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
281 cl::desc("Fix function entry count in profile use."));
282
284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
285 cl::desc("Print out the non-match BFI count if a hot raw profile count "
286 "becomes non-hot, or a cold raw profile count becomes hot. "
287 "The print is enabled under -Rpass-analysis=pgo, or "
288 "internal option -pass-remakrs-analysis=pgo."));
289
291 "pgo-verify-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out mismatched BFI counts after setting profile metadata "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remakrs-analysis=pgo."));
295
297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
298 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
299 "mismatched BFI if the difference percentage is greater than "
300 "this value (in percentage)."));
301
303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
305 "profile count value is below."));
306
308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
309 cl::value_desc("function name"),
310 cl::desc("Trace the hash of the function with this name."));
311
313 "pgo-function-size-threshold", cl::Hidden,
314 cl::desc("Do not instrument functions smaller than this threshold."));
315
317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
318 cl::desc("Do not instrument functions with the number of critical edges "
319 " greater than this threshold."));
320
321namespace llvm {
322// Command line option to turn on CFG dot dump after profile annotation.
323// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
325
326// Command line option to specify the name of the function for CFG dump
327// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
329
331} // namespace llvm
332
333static cl::opt<bool>
334 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
335 cl::desc("Use the old CFG function hashing"));
336
337// Return a string describing the branch condition that can be
338// used in static branch probability heuristics:
339static std::string getBranchCondString(Instruction *TI) {
340 BranchInst *BI = dyn_cast<BranchInst>(TI);
341 if (!BI || !BI->isConditional())
342 return std::string();
343
344 Value *Cond = BI->getCondition();
345 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
346 if (!CI)
347 return std::string();
348
349 std::string result;
350 raw_string_ostream OS(result);
351 OS << CI->getPredicate() << "_";
352 CI->getOperand(0)->getType()->print(OS, true);
353
354 Value *RHS = CI->getOperand(1);
355 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
356 if (CV) {
357 if (CV->isZero())
358 OS << "_Zero";
359 else if (CV->isOne())
360 OS << "_One";
361 else if (CV->isMinusOne())
362 OS << "_MinusOne";
363 else
364 OS << "_Const";
365 }
366 OS.flush();
367 return result;
368}
369
370static const char *ValueProfKindDescr[] = {
371#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
373};
374
375// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
376// aware this is an ir_level profile so it can set the version flag.
378 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
379 Type *IntTy64 = Type::getInt64Ty(M.getContext());
380 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
381 if (IsCS)
382 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
384 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
386 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
388 ProfileVersion |=
389 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
391 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
393 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
394 auto IRLevelVersionVariable = new GlobalVariable(
395 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
396 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
397 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
398 Triple TT(M.getTargetTriple());
399 if (TT.supportsCOMDAT()) {
400 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
401 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
402 }
403 return IRLevelVersionVariable;
404}
405
406namespace {
407
408/// The select instruction visitor plays three roles specified
409/// by the mode. In \c VM_counting mode, it simply counts the number of
410/// select instructions. In \c VM_instrument mode, it inserts code to count
411/// the number times TrueValue of select is taken. In \c VM_annotate mode,
412/// it reads the profile data and annotate the select instruction with metadata.
413enum VisitMode { VM_counting, VM_instrument, VM_annotate };
414class PGOUseFunc;
415
416/// Instruction Visitor class to visit select instructions.
417struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
418 Function &F;
419 unsigned NSIs = 0; // Number of select instructions instrumented.
420 VisitMode Mode = VM_counting; // Visiting mode.
421 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
422 unsigned TotalNumCtrs = 0; // Total number of counters
423 GlobalVariable *FuncNameVar = nullptr;
424 uint64_t FuncHash = 0;
425 PGOUseFunc *UseFunc = nullptr;
426 bool HasSingleByteCoverage;
427
428 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
429 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
430
431 void countSelects() {
432 NSIs = 0;
433 Mode = VM_counting;
434 visit(F);
435 }
436
437 // Visit the IR stream and instrument all select instructions. \p
438 // Ind is a pointer to the counter index variable; \p TotalNC
439 // is the total number of counters; \p FNV is the pointer to the
440 // PGO function name var; \p FHash is the function hash.
441 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
442 uint64_t FHash) {
443 Mode = VM_instrument;
444 CurCtrIdx = Ind;
445 TotalNumCtrs = TotalNC;
446 FuncHash = FHash;
447 FuncNameVar = FNV;
448 visit(F);
449 }
450
451 // Visit the IR stream and annotate all select instructions.
452 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
453 Mode = VM_annotate;
454 UseFunc = UF;
455 CurCtrIdx = Ind;
456 visit(F);
457 }
458
459 void instrumentOneSelectInst(SelectInst &SI);
460 void annotateOneSelectInst(SelectInst &SI);
461
462 // Visit \p SI instruction and perform tasks according to visit mode.
463 void visitSelectInst(SelectInst &SI);
464
465 // Return the number of select instructions. This needs be called after
466 // countSelects().
467 unsigned getNumOfSelectInsts() const { return NSIs; }
468};
469
470/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
471/// based instrumentation.
472/// Note that the CFG can be a multi-graph. So there might be multiple edges
473/// with the same SrcBB and DestBB.
474struct PGOEdge {
475 BasicBlock *SrcBB;
476 BasicBlock *DestBB;
477 uint64_t Weight;
478 bool InMST = false;
479 bool Removed = false;
480 bool IsCritical = false;
481
482 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
483 : SrcBB(Src), DestBB(Dest), Weight(W) {}
484
485 /// Return the information string of an edge.
486 std::string infoString() const {
487 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
488 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
489 .str();
490 }
491};
492
493/// This class stores the auxiliary information for each BB in the MST.
494struct PGOBBInfo {
495 PGOBBInfo *Group;
497 uint32_t Rank = 0;
498
499 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
500
501 /// Return the information string of this object.
502 std::string infoString() const {
503 return (Twine("Index=") + Twine(Index)).str();
504 }
505};
506
507// This class implements the CFG edges. Note the CFG can be a multi-graph.
508template <class Edge, class BBInfo> class FuncPGOInstrumentation {
509private:
510 Function &F;
511
512 // Is this is context-sensitive instrumentation.
513 bool IsCS;
514
515 // A map that stores the Comdat group in function F.
516 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
517
519
520 void computeCFGHash();
521 void renameComdatFunction();
522
523public:
524 const TargetLibraryInfo &TLI;
525 std::vector<std::vector<VPCandidateInfo>> ValueSites;
526 SelectInstVisitor SIVisitor;
527 std::string FuncName;
528 std::string DeprecatedFuncName;
529 GlobalVariable *FuncNameVar;
530
531 // CFG hash value for this function.
532 uint64_t FunctionHash = 0;
533
534 // The Minimum Spanning Tree of function CFG.
536
537 const std::optional<BlockCoverageInference> BCI;
538
539 static std::optional<BlockCoverageInference>
540 constructBCI(Function &Func, bool HasSingleByteCoverage,
541 bool InstrumentFuncEntry) {
542 if (HasSingleByteCoverage)
543 return BlockCoverageInference(Func, InstrumentFuncEntry);
544 return {};
545 }
546
547 // Collect all the BBs that will be instrumented, and store them in
548 // InstrumentBBs.
549 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
550
551 // Give an edge, find the BB that will be instrumented.
552 // Return nullptr if there is no BB to be instrumented.
553 BasicBlock *getInstrBB(Edge *E);
554
555 // Return the auxiliary BB information.
556 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
557
558 // Return the auxiliary BB information if available.
559 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
560
561 // Dump edges and BB information.
562 void dumpInfo(StringRef Str = "") const {
563 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
564 " Hash: " + Twine(FunctionHash) + "\t" + Str);
565 }
566
567 FuncPGOInstrumentation(
568 Function &Func, TargetLibraryInfo &TLI,
569 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
570 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
571 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
572 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
573 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
574 TLI(TLI), ValueSites(IPVK_Last + 1),
575 SIVisitor(Func, HasSingleByteCoverage),
576 MST(F, InstrumentFuncEntry, BPI, BFI),
577 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
578 if (BCI && PGOViewBlockCoverageGraph)
579 BCI->viewBlockCoverageGraph();
580 // This should be done before CFG hash computation.
581 SIVisitor.countSelects();
582 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
583 if (!IsCS) {
584 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
585 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
586 NumOfPGOBB += MST.BBInfos.size();
587 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
588 } else {
589 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
590 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
591 NumOfCSPGOBB += MST.BBInfos.size();
592 }
593
594 FuncName = getIRPGOFuncName(F);
595 DeprecatedFuncName = getPGOFuncName(F);
596 computeCFGHash();
597 if (!ComdatMembers.empty())
598 renameComdatFunction();
599 LLVM_DEBUG(dumpInfo("after CFGMST"));
600
601 for (auto &E : MST.AllEdges) {
602 if (E->Removed)
603 continue;
604 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
605 if (!E->InMST)
606 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
607 }
608
609 if (CreateGlobalVar)
610 FuncNameVar = createPGOFuncNameVar(F, FuncName);
611 }
612};
613
614} // end anonymous namespace
615
616// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
617// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
618// of selects, indirect calls, mem ops and edges.
619template <class Edge, class BBInfo>
620void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
621 std::vector<uint8_t> Indexes;
622 JamCRC JC;
623 for (auto &BB : F) {
624 const Instruction *TI = BB.getTerminator();
625 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
626 BasicBlock *Succ = TI->getSuccessor(I);
627 auto BI = findBBInfo(Succ);
628 if (BI == nullptr)
629 continue;
630 uint32_t Index = BI->Index;
631 for (int J = 0; J < 4; J++)
632 Indexes.push_back((uint8_t)(Index >> (J * 8)));
633 }
634 }
635 JC.update(Indexes);
636
637 JamCRC JCH;
638 if (PGOOldCFGHashing) {
639 // Hash format for context sensitive profile. Reserve 4 bits for other
640 // information.
641 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
642 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
643 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
644 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
645 } else {
646 // The higher 32 bits.
647 auto updateJCH = [&JCH](uint64_t Num) {
648 uint8_t Data[8];
650 JCH.update(Data);
651 };
652 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
653 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
654 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
655 if (BCI) {
656 updateJCH(BCI->getInstrumentedBlocksHash());
657 } else {
658 updateJCH((uint64_t)MST.AllEdges.size());
659 }
660
661 // Hash format for context sensitive profile. Reserve 4 bits for other
662 // information.
663 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
664 }
665
666 // Reserve bit 60-63 for other information purpose.
667 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
668 if (IsCS)
670 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
671 << " CRC = " << JC.getCRC()
672 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
673 << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
674 << ValueSites[IPVK_IndirectCallTarget].size());
675 if (!PGOOldCFGHashing) {
676 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
677 << ", High32 CRC = " << JCH.getCRC());
678 }
679 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
680
681 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
682 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
683 << " in building " << F.getParent()->getSourceFileName() << "\n";
684}
685
686// Check if we can safely rename this Comdat function.
687static bool canRenameComdat(
688 Function &F,
689 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
690 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
691 return false;
692
693 // FIXME: Current only handle those Comdat groups that only containing one
694 // function.
695 // (1) For a Comdat group containing multiple functions, we need to have a
696 // unique postfix based on the hashes for each function. There is a
697 // non-trivial code refactoring to do this efficiently.
698 // (2) Variables can not be renamed, so we can not rename Comdat function in a
699 // group including global vars.
700 Comdat *C = F.getComdat();
701 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
702 assert(!isa<GlobalAlias>(CM.second));
703 Function *FM = dyn_cast<Function>(CM.second);
704 if (FM != &F)
705 return false;
706 }
707 return true;
708}
709
710// Append the CFGHash to the Comdat function name.
711template <class Edge, class BBInfo>
712void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
713 if (!canRenameComdat(F, ComdatMembers))
714 return;
715 std::string OrigName = F.getName().str();
716 std::string NewFuncName =
717 Twine(F.getName() + "." + Twine(FunctionHash)).str();
718 F.setName(Twine(NewFuncName));
720 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
721 Comdat *NewComdat;
722 Module *M = F.getParent();
723 // For AvailableExternallyLinkage functions, change the linkage to
724 // LinkOnceODR and put them into comdat. This is because after renaming, there
725 // is no backup external copy available for the function.
726 if (!F.hasComdat()) {
728 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
730 F.setComdat(NewComdat);
731 return;
732 }
733
734 // This function belongs to a single function Comdat group.
735 Comdat *OrigComdat = F.getComdat();
736 std::string NewComdatName =
737 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
738 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
739 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
740
741 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
742 // Must be a function.
743 cast<Function>(CM.second)->setComdat(NewComdat);
744 }
745}
746
747/// Collect all the BBs that will be instruments and add them to
748/// `InstrumentBBs`.
749template <class Edge, class BBInfo>
750void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
751 std::vector<BasicBlock *> &InstrumentBBs) {
752 if (BCI) {
753 for (auto &BB : F)
754 if (BCI->shouldInstrumentBlock(BB))
755 InstrumentBBs.push_back(&BB);
756 return;
757 }
758
759 // Use a worklist as we will update the vector during the iteration.
760 std::vector<Edge *> EdgeList;
761 EdgeList.reserve(MST.AllEdges.size());
762 for (auto &E : MST.AllEdges)
763 EdgeList.push_back(E.get());
764
765 for (auto &E : EdgeList) {
766 BasicBlock *InstrBB = getInstrBB(E);
767 if (InstrBB)
768 InstrumentBBs.push_back(InstrBB);
769 }
770}
771
772// Given a CFG E to be instrumented, find which BB to place the instrumented
773// code. The function will split the critical edge if necessary.
774template <class Edge, class BBInfo>
775BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
776 if (E->InMST || E->Removed)
777 return nullptr;
778
779 BasicBlock *SrcBB = E->SrcBB;
780 BasicBlock *DestBB = E->DestBB;
781 // For a fake edge, instrument the real BB.
782 if (SrcBB == nullptr)
783 return DestBB;
784 if (DestBB == nullptr)
785 return SrcBB;
786
787 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
788 // There are basic blocks (such as catchswitch) cannot be instrumented.
789 // If the returned first insertion point is the end of BB, skip this BB.
790 if (BB->getFirstInsertionPt() == BB->end())
791 return nullptr;
792 return BB;
793 };
794
795 // Instrument the SrcBB if it has a single successor,
796 // otherwise, the DestBB if this is not a critical edge.
797 Instruction *TI = SrcBB->getTerminator();
798 if (TI->getNumSuccessors() <= 1)
799 return canInstrument(SrcBB);
800 if (!E->IsCritical)
801 return canInstrument(DestBB);
802
803 // Some IndirectBr critical edges cannot be split by the previous
804 // SplitIndirectBrCriticalEdges call. Bail out.
805 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
806 BasicBlock *InstrBB =
807 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
808 if (!InstrBB) {
810 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
811 return nullptr;
812 }
813 // For a critical edge, we have to split. Instrument the newly
814 // created BB.
815 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
816 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
817 << " --> " << getBBInfo(DestBB).Index << "\n");
818 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
819 MST.addEdge(SrcBB, InstrBB, 0);
820 // Second one: Add new edge of InstrBB->DestBB.
821 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
822 NewEdge1.InMST = true;
823 E->Removed = true;
824
825 return canInstrument(InstrBB);
826}
827
828// When generating value profiling calls on Windows routines that make use of
829// handler funclets for exception processing an operand bundle needs to attached
830// to the called function. This routine will set \p OpBundles to contain the
831// funclet information, if any is needed, that should be placed on the generated
832// value profiling call for the value profile candidate call.
833static void
837 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
838 if (!OrigCall)
839 return;
840
841 if (!isa<IntrinsicInst>(OrigCall)) {
842 // The instrumentation call should belong to the same funclet as a
843 // non-intrinsic call, so just copy the operand bundle, if any exists.
844 std::optional<OperandBundleUse> ParentFunclet =
845 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
846 if (ParentFunclet)
847 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
848 } else {
849 // Intrinsics or other instructions do not get funclet information from the
850 // front-end. Need to use the BlockColors that was computed by the routine
851 // colorEHFunclets to determine whether a funclet is needed.
852 if (!BlockColors.empty()) {
853 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
854 assert(CV.size() == 1 && "non-unique color for block!");
855 Instruction *EHPad = CV.front()->getFirstNonPHI();
856 if (EHPad->isEHPad())
857 OpBundles.emplace_back("funclet", EHPad);
858 }
859 }
860}
861
862// Visit all edge and instrument the edges not in MST, and do value profiling.
863// Critical edges will be split.
867 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
868 bool IsCS) {
869 if (!PGOBlockCoverage) {
870 // Split indirectbr critical edges here before computing the MST rather than
871 // later in getInstrBB() to avoid invalidating it.
872 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
873 }
874
875 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
876 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
878
879 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
880 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
881 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
882 FuncInfo.FunctionHash);
884 auto &EntryBB = F.getEntryBlock();
885 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
886 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
887 // i32 <index>)
888 Builder.CreateCall(
889 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
890 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
891 return;
892 }
893
894 std::vector<BasicBlock *> InstrumentBBs;
895 FuncInfo.getInstrumentBBs(InstrumentBBs);
896 unsigned NumCounters =
897 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
898
899 uint32_t I = 0;
901 NumCounters += PGOBlockCoverage ? 8 : 1;
902 auto &EntryBB = F.getEntryBlock();
903 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
904 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
905 // i32 <index>)
906 Builder.CreateCall(
907 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
908 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
909 I += PGOBlockCoverage ? 8 : 1;
910 }
911
912 for (auto *InstrBB : InstrumentBBs) {
913 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
914 assert(Builder.GetInsertPoint() != InstrBB->end() &&
915 "Cannot get the Instrumentation point");
916 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
917 // i32 <index>)
918 Builder.CreateCall(
920 ? Intrinsic::instrprof_cover
921 : Intrinsic::instrprof_increment),
922 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
923 }
924
925 // Now instrument select instructions:
926 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
927 FuncInfo.FunctionHash);
928 assert(I == NumCounters);
929
931 return;
932
933 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
934
935 // Intrinsic function calls do not have funclet operand bundles needed for
936 // Windows exception handling attached to them. However, if value profiling is
937 // inserted for one of these calls, then a funclet value will need to be set
938 // on the instrumentation call based on the funclet coloring.
940 if (F.hasPersonalityFn() &&
942 BlockColors = colorEHFunclets(F);
943
944 // For each VP Kind, walk the VP candidates and instrument each one.
945 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
946 unsigned SiteIndex = 0;
947 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
948 continue;
949
950 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
951 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
952 << " site: CallSite Index = " << SiteIndex << "\n");
953
954 IRBuilder<> Builder(Cand.InsertPt);
955 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
956 "Cannot get the Instrumentation point");
957
958 Value *ToProfile = nullptr;
959 if (Cand.V->getType()->isIntegerTy())
960 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
961 else if (Cand.V->getType()->isPointerTy())
962 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
963 assert(ToProfile && "value profiling Value is of unexpected type");
964
966 populateEHOperandBundle(Cand, BlockColors, OpBundles);
967 Builder.CreateCall(
968 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
969 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
970 Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
971 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
972 OpBundles);
973 }
974 } // IPVK_First <= Kind <= IPVK_Last
975}
976
977namespace {
978
979// This class represents a CFG edge in profile use compilation.
980struct PGOUseEdge : public PGOEdge {
981 using PGOEdge::PGOEdge;
982
983 bool CountValid = false;
984 uint64_t CountValue = 0;
985
986 // Set edge count value
987 void setEdgeCount(uint64_t Value) {
988 CountValue = Value;
989 CountValid = true;
990 }
991
992 // Return the information string for this object.
993 std::string infoString() const {
994 if (!CountValid)
995 return PGOEdge::infoString();
996 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
997 .str();
998 }
999};
1000
1001using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1002
1003// This class stores the auxiliary information for each BB.
1004struct PGOUseBBInfo : public PGOBBInfo {
1005 uint64_t CountValue = 0;
1006 bool CountValid;
1007 int32_t UnknownCountInEdge = 0;
1008 int32_t UnknownCountOutEdge = 0;
1009 DirectEdges InEdges;
1010 DirectEdges OutEdges;
1011
1012 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {}
1013
1014 // Set the profile count value for this BB.
1015 void setBBInfoCount(uint64_t Value) {
1016 CountValue = Value;
1017 CountValid = true;
1018 }
1019
1020 // Return the information string of this object.
1021 std::string infoString() const {
1022 if (!CountValid)
1023 return PGOBBInfo::infoString();
1024 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue))
1025 .str();
1026 }
1027
1028 // Add an OutEdge and update the edge count.
1029 void addOutEdge(PGOUseEdge *E) {
1030 OutEdges.push_back(E);
1031 UnknownCountOutEdge++;
1032 }
1033
1034 // Add an InEdge and update the edge count.
1035 void addInEdge(PGOUseEdge *E) {
1036 InEdges.push_back(E);
1037 UnknownCountInEdge++;
1038 }
1039};
1040
1041} // end anonymous namespace
1042
1043// Sum up the count values for all the edges.
1045 uint64_t Total = 0;
1046 for (const auto &E : Edges) {
1047 if (E->Removed)
1048 continue;
1049 Total += E->CountValue;
1050 }
1051 return Total;
1052}
1053
1054namespace {
1055
1056class PGOUseFunc {
1057public:
1058 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1059 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1061 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1062 bool HasSingleByteCoverage)
1063 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1064 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1065 InstrumentFuncEntry, HasSingleByteCoverage),
1066 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1067
1068 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1069
1070 // Read counts for the instrumented BB from profile.
1071 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1073
1074 // Populate the counts for all BBs.
1075 void populateCounters();
1076
1077 // Set block coverage based on profile coverage values.
1078 void populateCoverage(IndexedInstrProfReader *PGOReader);
1079
1080 // Set the branch weights based on the count values.
1081 void setBranchWeights();
1082
1083 // Annotate the value profile call sites for all value kind.
1084 void annotateValueSites();
1085
1086 // Annotate the value profile call sites for one value kind.
1087 void annotateValueSites(uint32_t Kind);
1088
1089 // Annotate the irreducible loop header weights.
1090 void annotateIrrLoopHeaderWeights();
1091
1092 // The hotness of the function from the profile count.
1093 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1094
1095 // Return the function hotness from the profile.
1096 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1097
1098 // Return the function hash.
1099 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1100
1101 // Return the profile record for this function;
1102 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1103
1104 // Return the auxiliary BB information.
1105 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1106 return FuncInfo.getBBInfo(BB);
1107 }
1108
1109 // Return the auxiliary BB information if available.
1110 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1111 return FuncInfo.findBBInfo(BB);
1112 }
1113
1114 Function &getFunc() const { return F; }
1115
1116 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1117
1118 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1119
1120private:
1121 Function &F;
1122 Module *M;
1124 ProfileSummaryInfo *PSI;
1125
1126 // This member stores the shared information with class PGOGenFunc.
1127 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1128
1129 // The maximum count value in the profile. This is only used in PGO use
1130 // compilation.
1131 uint64_t ProgramMaxCount;
1132
1133 // Position of counter that remains to be read.
1134 uint32_t CountPosition = 0;
1135
1136 // Total size of the profile count for this function.
1137 uint32_t ProfileCountSize = 0;
1138
1139 // ProfileRecord for this function.
1140 InstrProfRecord ProfileRecord;
1141
1142 // Function hotness info derived from profile.
1143 FuncFreqAttr FreqAttr;
1144
1145 // Is to use the context sensitive profile.
1146 bool IsCS;
1147
1148 // Find the Instrumented BB and set the value. Return false on error.
1149 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1150
1151 // Set the edge counter value for the unknown edge -- there should be only
1152 // one unknown edge.
1153 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1154
1155 // Set the hot/cold inline hints based on the count values.
1156 // FIXME: This function should be removed once the functionality in
1157 // the inliner is implemented.
1158 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1159 if (PSI->isHotCount(EntryCount))
1160 FreqAttr = FFA_Hot;
1161 else if (PSI->isColdCount(MaxCount))
1162 FreqAttr = FFA_Cold;
1163 }
1164};
1165
1166} // end anonymous namespace
1167
1168/// Set up InEdges/OutEdges for all BBs in the MST.
1169static void
1170setupBBInfoEdges(FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1171 // This is not required when there is block coverage inference.
1172 if (FuncInfo.BCI)
1173 return;
1174 for (auto &E : FuncInfo.MST.AllEdges) {
1175 if (E->Removed)
1176 continue;
1177 const BasicBlock *SrcBB = E->SrcBB;
1178 const BasicBlock *DestBB = E->DestBB;
1179 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1180 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1181 SrcInfo.addOutEdge(E.get());
1182 DestInfo.addInEdge(E.get());
1183 }
1184}
1185
1186// Visit all the edges and assign the count value for the instrumented
1187// edges and the BB. Return false on error.
1188bool PGOUseFunc::setInstrumentedCounts(
1189 const std::vector<uint64_t> &CountFromProfile) {
1190
1191 std::vector<BasicBlock *> InstrumentBBs;
1192 FuncInfo.getInstrumentBBs(InstrumentBBs);
1193
1194 setupBBInfoEdges(FuncInfo);
1195
1196 unsigned NumCounters =
1197 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1198 // The number of counters here should match the number of counters
1199 // in profile. Return if they mismatch.
1200 if (NumCounters != CountFromProfile.size()) {
1201 return false;
1202 }
1203 auto *FuncEntry = &*F.begin();
1204
1205 // Set the profile count to the Instrumented BBs.
1206 uint32_t I = 0;
1207 for (BasicBlock *InstrBB : InstrumentBBs) {
1208 uint64_t CountValue = CountFromProfile[I++];
1209 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1210 // If we reach here, we know that we have some nonzero count
1211 // values in this function. The entry count should not be 0.
1212 // Fix it if necessary.
1213 if (InstrBB == FuncEntry && CountValue == 0)
1214 CountValue = 1;
1215 Info.setBBInfoCount(CountValue);
1216 }
1217 ProfileCountSize = CountFromProfile.size();
1218 CountPosition = I;
1219
1220 // Set the edge count and update the count of unknown edges for BBs.
1221 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1222 E->setEdgeCount(Value);
1223 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1224 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1225 };
1226
1227 // Set the profile count the Instrumented edges. There are BBs that not in
1228 // MST but not instrumented. Need to set the edge count value so that we can
1229 // populate the profile counts later.
1230 for (auto &E : FuncInfo.MST.AllEdges) {
1231 if (E->Removed || E->InMST)
1232 continue;
1233 const BasicBlock *SrcBB = E->SrcBB;
1234 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1235
1236 // If only one out-edge, the edge profile count should be the same as BB
1237 // profile count.
1238 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1239 setEdgeCount(E.get(), SrcInfo.CountValue);
1240 else {
1241 const BasicBlock *DestBB = E->DestBB;
1242 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1243 // If only one in-edge, the edge profile count should be the same as BB
1244 // profile count.
1245 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1246 setEdgeCount(E.get(), DestInfo.CountValue);
1247 }
1248 if (E->CountValid)
1249 continue;
1250 // E's count should have been set from profile. If not, this meenas E skips
1251 // the instrumentation. We set the count to 0.
1252 setEdgeCount(E.get(), 0);
1253 }
1254 return true;
1255}
1256
1257// Set the count value for the unknown edge. There should be one and only one
1258// unknown edge in Edges vector.
1259void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1260 for (auto &E : Edges) {
1261 if (E->CountValid)
1262 continue;
1263 E->setEdgeCount(Value);
1264
1265 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1266 getBBInfo(E->DestBB).UnknownCountInEdge--;
1267 return;
1268 }
1269 llvm_unreachable("Cannot find the unknown count edge");
1270}
1271
1272// Emit function metadata indicating PGO profile mismatch.
1274 const char MetadataName[] = "instr_prof_hash_mismatch";
1276 // If this metadata already exists, ignore.
1277 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1278 if (Existing) {
1279 MDTuple *Tuple = cast<MDTuple>(Existing);
1280 for (const auto &N : Tuple->operands()) {
1281 if (N.equalsStr(MetadataName))
1282 return;
1283 Names.push_back(N.get());
1284 }
1285 }
1286
1287 MDBuilder MDB(ctx);
1288 Names.push_back(MDB.createString(MetadataName));
1289 MDNode *MD = MDTuple::get(ctx, Names);
1290 F.setMetadata(LLVMContext::MD_annotation, MD);
1291}
1292
1293void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1294 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1295 auto &Ctx = M->getContext();
1296 auto Err = IPE.get();
1297 bool SkipWarning = false;
1298 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1299 << FuncInfo.FuncName << ": ");
1300 if (Err == instrprof_error::unknown_function) {
1301 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1302 SkipWarning = !PGOWarnMissing;
1303 LLVM_DEBUG(dbgs() << "unknown function");
1304 } else if (Err == instrprof_error::hash_mismatch ||
1305 Err == instrprof_error::malformed) {
1306 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1307 SkipWarning =
1310 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1312 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1313 << " skip=" << SkipWarning << ")");
1314 // Emit function metadata indicating PGO profile mismatch.
1315 annotateFunctionWithHashMismatch(F, M->getContext());
1316 }
1317
1318 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1319 if (SkipWarning)
1320 return;
1321
1322 std::string Msg =
1323 IPE.message() + std::string(" ") + F.getName().str() +
1324 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1325 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1326 std::string(" count discarded");
1327
1328 Ctx.diagnose(
1329 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1330 });
1331}
1332
1333// Read the profile from ProfileFileName and assign the value to the
1334// instrumented BB and the edges. This function also updates ProgramMaxCount.
1335// Return true if the profile are successfully read, and false on errors.
1336bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1338 auto &Ctx = M->getContext();
1339 uint64_t MismatchedFuncSum = 0;
1341 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1342 &MismatchedFuncSum);
1343 if (Error E = Result.takeError()) {
1344 handleInstrProfError(std::move(E), MismatchedFuncSum);
1345 return false;
1346 }
1347 ProfileRecord = std::move(Result.get());
1348 PseudoKind = ProfileRecord.getCountPseudoKind();
1349 if (PseudoKind != InstrProfRecord::NotPseudo) {
1350 return true;
1351 }
1352 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1353
1354 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1355 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1356
1357 uint64_t ValueSum = 0;
1358 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1359 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1360 ValueSum += CountFromProfile[I];
1361 }
1362 AllZeros = (ValueSum == 0);
1363
1364 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1365
1366 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1367 getBBInfo(nullptr).UnknownCountInEdge = 2;
1368
1369 if (!setInstrumentedCounts(CountFromProfile)) {
1370 LLVM_DEBUG(
1371 dbgs() << "Inconsistent number of counts, skipping this function");
1372 Ctx.diagnose(DiagnosticInfoPGOProfile(
1373 M->getName().data(),
1374 Twine("Inconsistent number of counts in ") + F.getName().str() +
1375 Twine(": the profile may be stale or there is a function name "
1376 "collision."),
1377 DS_Warning));
1378 return false;
1379 }
1380 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1381 return true;
1382}
1383
1384void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1385 uint64_t MismatchedFuncSum = 0;
1387 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1388 &MismatchedFuncSum);
1389 if (auto Err = Result.takeError()) {
1390 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1391 return;
1392 }
1393
1394 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1396 unsigned Index = 0;
1397 for (auto &BB : F)
1398 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1399 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1400 assert(Index == CountsFromProfile.size());
1401
1402 // For each B in InverseDependencies[A], if A is covered then B is covered.
1404 InverseDependencies;
1405 for (auto &BB : F) {
1406 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1407 // If Dep is covered then BB is covered.
1408 InverseDependencies[Dep].insert(&BB);
1409 }
1410 }
1411
1412 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1413 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1414 for (auto &[BB, IsCovered] : Coverage)
1415 if (IsCovered)
1416 CoveredBlocksToProcess.push(BB);
1417
1418 while (!CoveredBlocksToProcess.empty()) {
1419 auto *CoveredBlock = CoveredBlocksToProcess.top();
1420 assert(Coverage[CoveredBlock]);
1421 CoveredBlocksToProcess.pop();
1422 for (auto *BB : InverseDependencies[CoveredBlock]) {
1423 // If CoveredBlock is covered then BB is covered.
1424 if (Coverage[BB])
1425 continue;
1426 Coverage[BB] = true;
1427 CoveredBlocksToProcess.push(BB);
1428 }
1429 }
1430
1431 // Annotate block coverage.
1432 MDBuilder MDB(F.getContext());
1433 // We set the entry count to 10000 if the entry block is covered so that BFI
1434 // can propagate a fraction of this count to the other covered blocks.
1435 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1436 for (auto &BB : F) {
1437 // For a block A and its successor B, we set the edge weight as follows:
1438 // If A is covered and B is covered, set weight=1.
1439 // If A is covered and B is uncovered, set weight=0.
1440 // If A is uncovered, set weight=1.
1441 // This setup will allow BFI to give nonzero profile counts to only covered
1442 // blocks.
1444 for (auto *Succ : successors(&BB))
1445 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1446 if (Weights.size() >= 2)
1447 BB.getTerminator()->setMetadata(LLVMContext::MD_prof,
1448 MDB.createBranchWeights(Weights));
1449 }
1450
1451 unsigned NumCorruptCoverage = 0;
1452 DominatorTree DT(F);
1453 LoopInfo LI(DT);
1454 BranchProbabilityInfo BPI(F, LI);
1455 BlockFrequencyInfo BFI(F, BPI, LI);
1456 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1457 if (auto C = BFI.getBlockProfileCount(&BB))
1458 return C == 0;
1459 return {};
1460 };
1461 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1462 for (auto &BB : F) {
1463 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1464 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1465 << "\n");
1466 // In some cases it is possible to find a covered block that has no covered
1467 // successors, e.g., when a block calls a function that may call exit(). In
1468 // those cases, BFI could find its successor to be covered while BCI could
1469 // find its successor to be dead.
1470 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1471 LLVM_DEBUG(
1472 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1473 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1474 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1475 ++NumCorruptCoverage;
1476 }
1477 if (Coverage[&BB])
1478 ++NumCoveredBlocks;
1479 }
1480 if (PGOVerifyBFI && NumCorruptCoverage) {
1481 auto &Ctx = M->getContext();
1482 Ctx.diagnose(DiagnosticInfoPGOProfile(
1483 M->getName().data(),
1484 Twine("Found inconsistent block coverage for function ") + F.getName() +
1485 " in " + Twine(NumCorruptCoverage) + " blocks.",
1486 DS_Warning));
1487 }
1489 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1490}
1491
1492// Populate the counters from instrumented BBs to all BBs.
1493// In the end of this operation, all BBs should have a valid count value.
1494void PGOUseFunc::populateCounters() {
1495 bool Changes = true;
1496 unsigned NumPasses = 0;
1497 while (Changes) {
1498 NumPasses++;
1499 Changes = false;
1500
1501 // For efficient traversal, it's better to start from the end as most
1502 // of the instrumented edges are at the end.
1503 for (auto &BB : reverse(F)) {
1504 PGOUseBBInfo *Count = findBBInfo(&BB);
1505 if (Count == nullptr)
1506 continue;
1507 if (!Count->CountValid) {
1508 if (Count->UnknownCountOutEdge == 0) {
1509 Count->CountValue = sumEdgeCount(Count->OutEdges);
1510 Count->CountValid = true;
1511 Changes = true;
1512 } else if (Count->UnknownCountInEdge == 0) {
1513 Count->CountValue = sumEdgeCount(Count->InEdges);
1514 Count->CountValid = true;
1515 Changes = true;
1516 }
1517 }
1518 if (Count->CountValid) {
1519 if (Count->UnknownCountOutEdge == 1) {
1520 uint64_t Total = 0;
1521 uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1522 // If the one of the successor block can early terminate (no-return),
1523 // we can end up with situation where out edge sum count is larger as
1524 // the source BB's count is collected by a post-dominated block.
1525 if (Count->CountValue > OutSum)
1526 Total = Count->CountValue - OutSum;
1527 setEdgeCount(Count->OutEdges, Total);
1528 Changes = true;
1529 }
1530 if (Count->UnknownCountInEdge == 1) {
1531 uint64_t Total = 0;
1532 uint64_t InSum = sumEdgeCount(Count->InEdges);
1533 if (Count->CountValue > InSum)
1534 Total = Count->CountValue - InSum;
1535 setEdgeCount(Count->InEdges, Total);
1536 Changes = true;
1537 }
1538 }
1539 }
1540 }
1541
1542 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1543 (void)NumPasses;
1544#ifndef NDEBUG
1545 // Assert every BB has a valid counter.
1546 for (auto &BB : F) {
1547 auto BI = findBBInfo(&BB);
1548 if (BI == nullptr)
1549 continue;
1550 assert(BI->CountValid && "BB count is not valid");
1551 }
1552#endif
1553 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1554 uint64_t FuncMaxCount = FuncEntryCount;
1555 for (auto &BB : F) {
1556 auto BI = findBBInfo(&BB);
1557 if (BI == nullptr)
1558 continue;
1559 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1560 }
1561
1562 // Fix the obviously inconsistent entry count.
1563 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1564 FuncEntryCount = 1;
1565 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1566 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1567
1568 // Now annotate select instructions
1569 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1570 assert(CountPosition == ProfileCountSize);
1571
1572 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1573}
1574
1575// Assign the scaled count values to the BB with multiple out edges.
1576void PGOUseFunc::setBranchWeights() {
1577 // Generate MD_prof metadata for every branch instruction.
1578 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1579 << " IsCS=" << IsCS << "\n");
1580 for (auto &BB : F) {
1581 Instruction *TI = BB.getTerminator();
1582 if (TI->getNumSuccessors() < 2)
1583 continue;
1584 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1585 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1586 isa<CallBrInst>(TI)))
1587 continue;
1588
1589 if (getBBInfo(&BB).CountValue == 0)
1590 continue;
1591
1592 // We have a non-zero Branch BB.
1593 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1594 unsigned Size = BBCountInfo.OutEdges.size();
1595 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1596 uint64_t MaxCount = 0;
1597 for (unsigned s = 0; s < Size; s++) {
1598 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1599 const BasicBlock *SrcBB = E->SrcBB;
1600 const BasicBlock *DestBB = E->DestBB;
1601 if (DestBB == nullptr)
1602 continue;
1603 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1604 uint64_t EdgeCount = E->CountValue;
1605 if (EdgeCount > MaxCount)
1606 MaxCount = EdgeCount;
1607 EdgeCounts[SuccNum] = EdgeCount;
1608 }
1609
1610 if (MaxCount)
1611 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1612 else {
1613 // A zero MaxCount can come about when we have a BB with a positive
1614 // count, and whose successor blocks all have 0 count. This can happen
1615 // when there is no exit block and the code exits via a noreturn function.
1616 auto &Ctx = M->getContext();
1617 Ctx.diagnose(DiagnosticInfoPGOProfile(
1618 M->getName().data(),
1619 Twine("Profile in ") + F.getName().str() +
1620 Twine(" partially ignored") +
1621 Twine(", possibly due to the lack of a return path."),
1622 DS_Warning));
1623 }
1624 }
1625}
1626
1628 for (BasicBlock *Pred : predecessors(BB)) {
1629 if (isa<IndirectBrInst>(Pred->getTerminator()))
1630 return true;
1631 }
1632 return false;
1633}
1634
1635void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1636 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1637 // Find irr loop headers
1638 for (auto &BB : F) {
1639 // As a heuristic also annotate indrectbr targets as they have a high chance
1640 // to become an irreducible loop header after the indirectbr tail
1641 // duplication.
1642 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1643 Instruction *TI = BB.getTerminator();
1644 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1645 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1646 }
1647 }
1648}
1649
1650void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1651 Module *M = F.getParent();
1652 IRBuilder<> Builder(&SI);
1653 Type *Int64Ty = Builder.getInt64Ty();
1654 Type *I8PtrTy = Builder.getInt8PtrTy();
1655 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1656 Builder.CreateCall(
1657 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1658 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1659 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1660 Builder.getInt32(*CurCtrIdx), Step});
1661 ++(*CurCtrIdx);
1662}
1663
1664void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1665 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1666 assert(*CurCtrIdx < CountFromProfile.size() &&
1667 "Out of bound access of counters");
1668 uint64_t SCounts[2];
1669 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1670 ++(*CurCtrIdx);
1671 uint64_t TotalCount = 0;
1672 auto BI = UseFunc->findBBInfo(SI.getParent());
1673 if (BI != nullptr)
1674 TotalCount = BI->CountValue;
1675 // False Count
1676 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1677 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1678 if (MaxCount)
1679 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1680}
1681
1682void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1683 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1684 return;
1685 // FIXME: do not handle this yet.
1686 if (SI.getCondition()->getType()->isVectorTy())
1687 return;
1688
1689 switch (Mode) {
1690 case VM_counting:
1691 NSIs++;
1692 return;
1693 case VM_instrument:
1694 instrumentOneSelectInst(SI);
1695 return;
1696 case VM_annotate:
1697 annotateOneSelectInst(SI);
1698 return;
1699 }
1700
1701 llvm_unreachable("Unknown visiting mode");
1702}
1703
1704// Traverse all valuesites and annotate the instructions for all value kind.
1705void PGOUseFunc::annotateValueSites() {
1707 return;
1708
1709 // Create the PGOFuncName meta data.
1710 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1711
1712 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1713 annotateValueSites(Kind);
1714}
1715
1716// Annotate the instructions for a specific value kind.
1717void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1718 assert(Kind <= IPVK_Last);
1719 unsigned ValueSiteIndex = 0;
1720 auto &ValueSites = FuncInfo.ValueSites[Kind];
1721 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1722 if (NumValueSites != ValueSites.size()) {
1723 auto &Ctx = M->getContext();
1724 Ctx.diagnose(DiagnosticInfoPGOProfile(
1725 M->getName().data(),
1726 Twine("Inconsistent number of value sites for ") +
1727 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1728 F.getName().str() +
1729 Twine("\", possibly due to the use of a stale profile."),
1730 DS_Warning));
1731 return;
1732 }
1733
1734 for (VPCandidateInfo &I : ValueSites) {
1735 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1736 << "): Index = " << ValueSiteIndex << " out of "
1737 << NumValueSites << "\n");
1738 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1739 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1740 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1742 ValueSiteIndex++;
1743 }
1744}
1745
1746// Collect the set of members for each Comdat in module M and store
1747// in ComdatMembers.
1749 Module &M,
1750 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1751 if (!DoComdatRenaming)
1752 return;
1753 for (Function &F : M)
1754 if (Comdat *C = F.getComdat())
1755 ComdatMembers.insert(std::make_pair(C, &F));
1756 for (GlobalVariable &GV : M.globals())
1757 if (Comdat *C = GV.getComdat())
1758 ComdatMembers.insert(std::make_pair(C, &GV));
1759 for (GlobalAlias &GA : M.aliases())
1760 if (Comdat *C = GA.getComdat())
1761 ComdatMembers.insert(std::make_pair(C, &GA));
1762}
1763
1764// Don't perform PGO instrumeatnion / profile-use.
1765static bool skipPGO(const Function &F) {
1766 if (F.isDeclaration())
1767 return true;
1768 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1769 return true;
1770 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1771 return true;
1772 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1773 return true;
1774
1775 // If there are too many critical edges, PGO might cause
1776 // compiler time problem. Skip PGO if the number of
1777 // critical edges execeed the threshold.
1778 unsigned NumCriticalEdges = 0;
1779 for (auto &BB : F) {
1780 const Instruction *TI = BB.getTerminator();
1781 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1782 if (isCriticalEdge(TI, I))
1783 NumCriticalEdges++;
1784 }
1785 }
1786 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1787 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1788 << ", NumCriticalEdges=" << NumCriticalEdges
1789 << " exceed the threshold. Skip PGO.\n");
1790 return true;
1791 }
1792
1793 return false;
1794}
1795
1797 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1799 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1800 // For the context-sensitve instrumentation, we should have a separated pass
1801 // (before LTO/ThinLTO linking) to create these variables.
1802 if (!IsCS)
1803 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1804 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1805 collectComdatMembers(M, ComdatMembers);
1806
1807 for (auto &F : M) {
1808 if (skipPGO(F))
1809 continue;
1810 auto &TLI = LookupTLI(F);
1811 auto *BPI = LookupBPI(F);
1812 auto *BFI = LookupBFI(F);
1813 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1814 }
1815 return true;
1816}
1817
1820 createProfileFileNameVar(M, CSInstrName);
1821 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1822 // will be retained.
1827 return PA;
1828}
1829
1832 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1833 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1835 };
1836 auto LookupBPI = [&FAM](Function &F) {
1838 };
1839 auto LookupBFI = [&FAM](Function &F) {
1841 };
1842
1843 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1844 return PreservedAnalyses::all();
1845
1846 return PreservedAnalyses::none();
1847}
1848
1849// Using the ratio b/w sums of profile count values and BFI count values to
1850// adjust the func entry count.
1851static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1852 BranchProbabilityInfo &NBPI) {
1853 Function &F = Func.getFunc();
1854 BlockFrequencyInfo NBFI(F, NBPI, LI);
1855#ifndef NDEBUG
1856 auto BFIEntryCount = F.getEntryCount();
1857 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1858 "Invalid BFI Entrycount");
1859#endif
1860 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1861 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1862 for (auto &BBI : F) {
1863 uint64_t CountValue = 0;
1864 uint64_t BFICountValue = 0;
1865 if (!Func.findBBInfo(&BBI))
1866 continue;
1867 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1868 CountValue = Func.getBBInfo(&BBI).CountValue;
1869 BFICountValue = *BFICount;
1870 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1871 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1872 }
1873 if (SumCount.isZero())
1874 return;
1875
1876 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1877 "Incorrect sum of BFI counts");
1878 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1879 return;
1880 double Scale = (SumCount / SumBFICount).convertToDouble();
1881 if (Scale < 1.001 && Scale > 0.999)
1882 return;
1883
1884 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1885 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1886 if (NewEntryCount == 0)
1887 NewEntryCount = 1;
1888 if (NewEntryCount != FuncEntryCount) {
1889 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1890 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1891 << ", entry_count " << FuncEntryCount << " --> "
1892 << NewEntryCount << "\n");
1893 }
1894}
1895
1896// Compare the profile count values with BFI count values, and print out
1897// the non-matching ones.
1898static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1900 uint64_t HotCountThreshold,
1902 Function &F = Func.getFunc();
1903 BlockFrequencyInfo NBFI(F, NBPI, LI);
1904 // bool PrintFunc = false;
1905 bool HotBBOnly = PGOVerifyHotBFI;
1906 StringRef Msg;
1908
1909 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1910 for (auto &BBI : F) {
1911 uint64_t CountValue = 0;
1912 uint64_t BFICountValue = 0;
1913
1914 if (Func.getBBInfo(&BBI).CountValid)
1915 CountValue = Func.getBBInfo(&BBI).CountValue;
1916
1917 BBNum++;
1918 if (CountValue)
1919 NonZeroBBNum++;
1920 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1921 if (BFICount)
1922 BFICountValue = *BFICount;
1923
1924 if (HotBBOnly) {
1925 bool rawIsHot = CountValue >= HotCountThreshold;
1926 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1927 bool rawIsCold = CountValue <= ColdCountThreshold;
1928 bool ShowCount = false;
1929 if (rawIsHot && !BFIIsHot) {
1930 Msg = "raw-Hot to BFI-nonHot";
1931 ShowCount = true;
1932 } else if (rawIsCold && BFIIsHot) {
1933 Msg = "raw-Cold to BFI-Hot";
1934 ShowCount = true;
1935 }
1936 if (!ShowCount)
1937 continue;
1938 } else {
1939 if ((CountValue < PGOVerifyBFICutoff) &&
1940 (BFICountValue < PGOVerifyBFICutoff))
1941 continue;
1942 uint64_t Diff = (BFICountValue >= CountValue)
1943 ? BFICountValue - CountValue
1944 : CountValue - BFICountValue;
1945 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1946 continue;
1947 }
1948 BBMisMatchNum++;
1949
1950 ORE.emit([&]() {
1952 F.getSubprogram(), &BBI);
1953 Remark << "BB " << ore::NV("Block", BBI.getName())
1954 << " Count=" << ore::NV("Count", CountValue)
1955 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1956 if (!Msg.empty())
1957 Remark << " (" << Msg << ")";
1958 return Remark;
1959 });
1960 }
1961 if (BBMisMatchNum)
1962 ORE.emit([&]() {
1963 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1964 F.getSubprogram(), &F.getEntryBlock())
1965 << "In Func " << ore::NV("Function", F.getName())
1966 << ": Num_of_BB=" << ore::NV("Count", BBNum)
1967 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1968 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1969 });
1970}
1971
1973 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1974 vfs::FileSystem &FS,
1975 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1978 ProfileSummaryInfo *PSI, bool IsCS) {
1979 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1980 auto &Ctx = M.getContext();
1981 // Read the counter array from file.
1982 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
1983 ProfileRemappingFileName);
1984 if (Error E = ReaderOrErr.takeError()) {
1985 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1986 Ctx.diagnose(
1987 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1988 });
1989 return false;
1990 }
1991
1992 std::unique_ptr<IndexedInstrProfReader> PGOReader =
1993 std::move(ReaderOrErr.get());
1994 if (!PGOReader) {
1995 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1996 StringRef("Cannot get PGOReader")));
1997 return false;
1998 }
1999 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2000 return false;
2001
2002 // TODO: might need to change the warning once the clang option is finalized.
2003 if (!PGOReader->isIRLevelProfile()) {
2004 Ctx.diagnose(DiagnosticInfoPGOProfile(
2005 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2006 return false;
2007 }
2008 if (PGOReader->functionEntryOnly()) {
2009 Ctx.diagnose(DiagnosticInfoPGOProfile(
2010 ProfileFileName.data(),
2011 "Function entry profiles are not yet supported for optimization"));
2012 return false;
2013 }
2014
2015 // Add the profile summary (read from the header of the indexed summary) here
2016 // so that we can use it below when reading counters (which checks if the
2017 // function should be marked with a cold or inlinehint attribute).
2018 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2021 PSI->refresh();
2022
2023 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2024 collectComdatMembers(M, ComdatMembers);
2025 std::vector<Function *> HotFunctions;
2026 std::vector<Function *> ColdFunctions;
2027
2028 // If the profile marked as always instrument the entry BB, do the
2029 // same. Note this can be overwritten by the internal option in CFGMST.h
2030 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2031 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2032 InstrumentFuncEntry = PGOInstrumentEntry;
2033 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2034 for (auto &F : M) {
2035 if (skipPGO(F))
2036 continue;
2037 auto &TLI = LookupTLI(F);
2038 auto *BPI = LookupBPI(F);
2039 auto *BFI = LookupBFI(F);
2040 if (!HasSingleByteCoverage) {
2041 // Split indirectbr critical edges here before computing the MST rather
2042 // than later in getInstrBB() to avoid invalidating it.
2043 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2044 BFI);
2045 }
2046 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2047 InstrumentFuncEntry, HasSingleByteCoverage);
2048 if (HasSingleByteCoverage) {
2049 Func.populateCoverage(PGOReader.get());
2050 continue;
2051 }
2052 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2053 // it means the profile for the function is unrepresentative and this
2054 // function is actually hot / warm. We will reset the function hot / cold
2055 // attribute and drop all the profile counters.
2057 bool AllZeros = false;
2058 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2059 continue;
2060 if (AllZeros) {
2061 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2062 if (Func.getProgramMaxCount() != 0)
2063 ColdFunctions.push_back(&F);
2064 continue;
2065 }
2066 if (PseudoKind != InstrProfRecord::NotPseudo) {
2067 // Clear function attribute cold.
2068 if (F.hasFnAttribute(Attribute::Cold))
2069 F.removeFnAttr(Attribute::Cold);
2070 // Set function attribute as hot.
2071 if (PseudoKind == InstrProfRecord::PseudoHot)
2072 F.addFnAttr(Attribute::Hot);
2073 continue;
2074 }
2075 Func.populateCounters();
2076 Func.setBranchWeights();
2077 Func.annotateValueSites();
2078 Func.annotateIrrLoopHeaderWeights();
2079 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2080 if (FreqAttr == PGOUseFunc::FFA_Cold)
2081 ColdFunctions.push_back(&F);
2082 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2083 HotFunctions.push_back(&F);
2084 if (PGOViewCounts != PGOVCT_None &&
2085 (ViewBlockFreqFuncName.empty() ||
2086 F.getName().equals(ViewBlockFreqFuncName))) {
2088 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2089 std::make_unique<BranchProbabilityInfo>(F, LI);
2090 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2091 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2093 NewBFI->view();
2094 else if (PGOViewCounts == PGOVCT_Text) {
2095 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2096 NewBFI->print(dbgs());
2097 }
2098 }
2100 (ViewBlockFreqFuncName.empty() ||
2101 F.getName().equals(ViewBlockFreqFuncName))) {
2103 if (ViewBlockFreqFuncName.empty())
2104 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2105 else
2106 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2107 else if (PGOViewRawCounts == PGOVCT_Text) {
2108 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2109 Func.dumpInfo();
2110 }
2111 }
2112
2115 BranchProbabilityInfo NBPI(F, LI);
2116
2117 // Fix func entry count.
2118 if (PGOFixEntryCount)
2119 fixFuncEntryCount(Func, LI, NBPI);
2120
2121 // Verify BlockFrequency information.
2122 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2123 if (PGOVerifyHotBFI) {
2124 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2126 }
2127 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2128 }
2129 }
2130
2131 // Set function hotness attribute from the profile.
2132 // We have to apply these attributes at the end because their presence
2133 // can affect the BranchProbabilityInfo of any callers, resulting in an
2134 // inconsistent MST between prof-gen and prof-use.
2135 for (auto &F : HotFunctions) {
2136 F->addFnAttr(Attribute::InlineHint);
2137 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2138 << "\n");
2139 }
2140 for (auto &F : ColdFunctions) {
2141 // Only set when there is no Attribute::Hot set by the user. For Hot
2142 // attribute, user's annotation has the precedence over the profile.
2143 if (F->hasFnAttribute(Attribute::Hot)) {
2144 auto &Ctx = M.getContext();
2145 std::string Msg = std::string("Function ") + F->getName().str() +
2146 std::string(" is annotated as a hot function but"
2147 " the profile is cold");
2148 Ctx.diagnose(
2149 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2150 continue;
2151 }
2152 F->addFnAttr(Attribute::Cold);
2153 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2154 << "\n");
2155 }
2156 return true;
2157}
2158
2160 std::string Filename, std::string RemappingFilename, bool IsCS,
2162 : ProfileFileName(std::move(Filename)),
2163 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2164 FS(std::move(VFS)) {
2165 if (!PGOTestProfileFile.empty())
2166 ProfileFileName = PGOTestProfileFile;
2167 if (!PGOTestProfileRemappingFile.empty())
2168 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2169 if (!FS)
2171}
2172
2175
2176 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2177 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2179 };
2180 auto LookupBPI = [&FAM](Function &F) {
2182 };
2183 auto LookupBFI = [&FAM](Function &F) {
2185 };
2186
2187 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2188
2189 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2190 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2191 return PreservedAnalyses::all();
2192
2193 return PreservedAnalyses::none();
2194}
2195
2196static std::string getSimpleNodeName(const BasicBlock *Node) {
2197 if (!Node->getName().empty())
2198 return Node->getName().str();
2199
2200 std::string SimpleNodeName;
2201 raw_string_ostream OS(SimpleNodeName);
2202 Node->printAsOperand(OS, false);
2203 return OS.str();
2204}
2205
2207 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2208 MDBuilder MDB(M->getContext());
2209 assert(MaxCount > 0 && "Bad max count");
2210 uint64_t Scale = calculateCountScale(MaxCount);
2212 for (const auto &ECI : EdgeCounts)
2213 Weights.push_back(scaleBranchCount(ECI, Scale));
2214
2215 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2216 : Weights) {
2217 dbgs() << W << " ";
2218 } dbgs() << "\n";);
2219
2220 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2221
2222 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2224 std::string BrCondStr = getBranchCondString(TI);
2225 if (BrCondStr.empty())
2226 return;
2227
2228 uint64_t WSum =
2229 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2230 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2231 uint64_t TotalCount =
2232 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2233 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2234 Scale = calculateCountScale(WSum);
2235 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2236 scaleBranchCount(WSum, Scale));
2237 std::string BranchProbStr;
2238 raw_string_ostream OS(BranchProbStr);
2239 OS << BP;
2240 OS << " (total count : " << TotalCount << ")";
2241 OS.flush();
2242 Function *F = TI->getParent()->getParent();
2244 ORE.emit([&]() {
2245 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2246 << BrCondStr << " is true with probability : " << BranchProbStr;
2247 });
2248 }
2249}
2250
2251namespace llvm {
2252
2254 MDBuilder MDB(M->getContext());
2255 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2256 MDB.createIrrLoopHeaderWeight(Count));
2257}
2258
2259template <> struct GraphTraits<PGOUseFunc *> {
2260 using NodeRef = const BasicBlock *;
2263
2264 static NodeRef getEntryNode(const PGOUseFunc *G) {
2265 return &G->getFunc().front();
2266 }
2267
2269 return succ_begin(N);
2270 }
2271
2272 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2273
2274 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2275 return nodes_iterator(G->getFunc().begin());
2276 }
2277
2278 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2279 return nodes_iterator(G->getFunc().end());
2280 }
2281};
2282
2283template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2284 explicit DOTGraphTraits(bool isSimple = false)
2286
2287 static std::string getGraphName(const PGOUseFunc *G) {
2288 return std::string(G->getFunc().getName());
2289 }
2290
2291 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2292 std::string Result;
2293 raw_string_ostream OS(Result);
2294
2295 OS << getSimpleNodeName(Node) << ":\\l";
2296 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2297 OS << "Count : ";
2298 if (BI && BI->CountValid)
2299 OS << BI->CountValue << "\\l";
2300 else
2301 OS << "Unknown\\l";
2302
2303 if (!PGOInstrSelect)
2304 return Result;
2305
2306 for (const Instruction &I : *Node) {
2307 if (!isa<SelectInst>(&I))
2308 continue;
2309 // Display scaled counts for SELECT instruction:
2310 OS << "SELECT : { T = ";
2311 uint64_t TC, FC;
2312 bool HasProf = extractBranchWeights(I, TC, FC);
2313 if (!HasProf)
2314 OS << "Unknown, F = Unknown }\\l";
2315 else
2316 OS << TC << ", F = " << FC << " }\\l";
2317 }
2318 return Result;
2319 }
2320};
2321
2322} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static bool skipPGO(const Function &F)
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static void setupBBInfoEdges(FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:955
Class for arbitrary precision integers.
Definition: APInt.h:76
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: PassManager.h:90
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:801
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2213
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:209
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:203
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:197
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:386
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class to represent profile counts.
Definition: Function.h:254
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:506
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2628
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:382
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:237
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:734
const BasicBlock * getParent() const
Definition: Instruction.h:90
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:330
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:950
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1301
Tuple of metadata.
Definition: Metadata.h:1345
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1373
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:415
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1685
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:342
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1223
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:313
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:970
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:423
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1118
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
InstrProfValueKind
Definition: InstrProf.h:254
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1283
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1306
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1854
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > DebugInfoCorrelate("debug-info-correlate", cl::desc("Use debug info to correlate profiles."), cl::init(false))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:691
std::vector< uint64_t > Counts
Definition: InstrProf.h:692
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:797
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:906
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:887