LLVM 20.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
117#include <algorithm>
118#include <cassert>
119#include <cstdint>
120#include <memory>
121#include <numeric>
122#include <optional>
123#include <stack>
124#include <string>
125#include <unordered_map>
126#include <utility>
127#include <vector>
128
129using namespace llvm;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
163 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is"
166 "mainly for test purpose."));
168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
182 "icp-max-annotations", cl::init(3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
189 "memop-max-annotations", cl::init(4), cl::Hidden,
190 cl::desc("Max number of preicise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
196 "do-comdat-renaming", cl::init(false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224} // namespace llvm
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
264 "pgo-function-entry-coverage", cl::Hidden,
265 cl::desc(
266 "Use this option to enable function entry coverage instrumentation."));
267
269 "pgo-block-coverage",
270 cl::desc("Use this option to enable basic block coverage instrumentation"));
271
272static cl::opt<bool>
273 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274 cl::desc("Create a dot file of CFGs with block "
275 "coverage inference information"));
276
278 "pgo-temporal-instrumentation",
279 cl::desc("Use this option to enable temporal instrumentation"));
280
281static cl::opt<bool>
282 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283 cl::desc("Fix function entry count in profile use."));
284
286 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287 cl::desc("Print out the non-match BFI count if a hot raw profile count "
288 "becomes non-hot, or a cold raw profile count becomes hot. "
289 "The print is enabled under -Rpass-analysis=pgo, or "
290 "internal option -pass-remakrs-analysis=pgo."));
291
293 "pgo-verify-bfi", cl::init(false), cl::Hidden,
294 cl::desc("Print out mismatched BFI counts after setting profile metadata "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remakrs-analysis=pgo."));
297
299 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
301 "mismatched BFI if the difference percentage is greater than "
302 "this value (in percentage)."));
303
305 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307 "profile count value is below."));
308
310 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311 cl::value_desc("function name"),
312 cl::desc("Trace the hash of the function with this name."));
313
315 "pgo-function-size-threshold", cl::Hidden,
316 cl::desc("Do not instrument functions smaller than this threshold."));
317
319 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320 cl::desc("Do not instrument functions with the number of critical edges "
321 " greater than this threshold."));
322
325
326namespace llvm {
327// Command line option to turn on CFG dot dump after profile annotation.
328// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
330
331// Command line option to specify the name of the function for CFG dump
332// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
334
335// Command line option to enable vtable value profiling. Defined in
336// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
340} // namespace llvm
341
344 !UseCtxProfile.empty();
345}
348}
349
350// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
351// profiling implicitly captures indirect call cases, but not other values.
352// Supporting other values is relatively straight-forward - just another counter
353// range within the context.
356}
357
358// Return a string describing the branch condition that can be
359// used in static branch probability heuristics:
360static std::string getBranchCondString(Instruction *TI) {
361 BranchInst *BI = dyn_cast<BranchInst>(TI);
362 if (!BI || !BI->isConditional())
363 return std::string();
364
365 Value *Cond = BI->getCondition();
366 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
367 if (!CI)
368 return std::string();
369
370 std::string result;
371 raw_string_ostream OS(result);
372 OS << CI->getPredicate() << "_";
373 CI->getOperand(0)->getType()->print(OS, true);
374
375 Value *RHS = CI->getOperand(1);
376 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
377 if (CV) {
378 if (CV->isZero())
379 OS << "_Zero";
380 else if (CV->isOne())
381 OS << "_One";
382 else if (CV->isMinusOne())
383 OS << "_MinusOne";
384 else
385 OS << "_Const";
386 }
387 OS.flush();
388 return result;
389}
390
391static const char *ValueProfKindDescr[] = {
392#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
394};
395
396// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
397// aware this is an ir_level profile so it can set the version flag.
399 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
400 Type *IntTy64 = Type::getInt64Ty(M.getContext());
401 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
402 if (IsCS)
403 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
405 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
407 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
409 ProfileVersion |=
410 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
412 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
414 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
415 auto IRLevelVersionVariable = new GlobalVariable(
416 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
417 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
418 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
419 Triple TT(M.getTargetTriple());
420 if (TT.supportsCOMDAT()) {
421 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
422 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
423 }
424 return IRLevelVersionVariable;
425}
426
427namespace {
428
429/// The select instruction visitor plays three roles specified
430/// by the mode. In \c VM_counting mode, it simply counts the number of
431/// select instructions. In \c VM_instrument mode, it inserts code to count
432/// the number times TrueValue of select is taken. In \c VM_annotate mode,
433/// it reads the profile data and annotate the select instruction with metadata.
434enum VisitMode { VM_counting, VM_instrument, VM_annotate };
435class PGOUseFunc;
436
437/// Instruction Visitor class to visit select instructions.
438struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
439 Function &F;
440 unsigned NSIs = 0; // Number of select instructions instrumented.
441 VisitMode Mode = VM_counting; // Visiting mode.
442 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
443 unsigned TotalNumCtrs = 0; // Total number of counters
444 GlobalVariable *FuncNameVar = nullptr;
445 uint64_t FuncHash = 0;
446 PGOUseFunc *UseFunc = nullptr;
447 bool HasSingleByteCoverage;
448
449 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
450 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
451
452 void countSelects() {
453 NSIs = 0;
454 Mode = VM_counting;
455 visit(F);
456 }
457
458 // Visit the IR stream and instrument all select instructions. \p
459 // Ind is a pointer to the counter index variable; \p TotalNC
460 // is the total number of counters; \p FNV is the pointer to the
461 // PGO function name var; \p FHash is the function hash.
462 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
463 uint64_t FHash) {
464 Mode = VM_instrument;
465 CurCtrIdx = Ind;
466 TotalNumCtrs = TotalNC;
467 FuncHash = FHash;
468 FuncNameVar = FNV;
469 visit(F);
470 }
471
472 // Visit the IR stream and annotate all select instructions.
473 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
474 Mode = VM_annotate;
475 UseFunc = UF;
476 CurCtrIdx = Ind;
477 visit(F);
478 }
479
480 void instrumentOneSelectInst(SelectInst &SI);
481 void annotateOneSelectInst(SelectInst &SI);
482
483 // Visit \p SI instruction and perform tasks according to visit mode.
484 void visitSelectInst(SelectInst &SI);
485
486 // Return the number of select instructions. This needs be called after
487 // countSelects().
488 unsigned getNumOfSelectInsts() const { return NSIs; }
489};
490
491/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
492/// based instrumentation.
493/// Note that the CFG can be a multi-graph. So there might be multiple edges
494/// with the same SrcBB and DestBB.
495struct PGOEdge {
496 BasicBlock *SrcBB;
497 BasicBlock *DestBB;
498 uint64_t Weight;
499 bool InMST = false;
500 bool Removed = false;
501 bool IsCritical = false;
502
503 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
504 : SrcBB(Src), DestBB(Dest), Weight(W) {}
505
506 /// Return the information string of an edge.
507 std::string infoString() const {
508 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
509 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
510 .str();
511 }
512};
513
514/// This class stores the auxiliary information for each BB in the MST.
515struct PGOBBInfo {
516 PGOBBInfo *Group;
518 uint32_t Rank = 0;
519
520 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
521
522 /// Return the information string of this object.
523 std::string infoString() const {
524 return (Twine("Index=") + Twine(Index)).str();
525 }
526};
527
528// This class implements the CFG edges. Note the CFG can be a multi-graph.
529template <class Edge, class BBInfo> class FuncPGOInstrumentation {
530private:
531 Function &F;
532
533 // Is this is context-sensitive instrumentation.
534 bool IsCS;
535
536 // A map that stores the Comdat group in function F.
537 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
538
540
541 void computeCFGHash();
542 void renameComdatFunction();
543
544public:
545 const TargetLibraryInfo &TLI;
546 std::vector<std::vector<VPCandidateInfo>> ValueSites;
547 SelectInstVisitor SIVisitor;
548 std::string FuncName;
549 std::string DeprecatedFuncName;
550 GlobalVariable *FuncNameVar;
551
552 // CFG hash value for this function.
553 uint64_t FunctionHash = 0;
554
555 // The Minimum Spanning Tree of function CFG.
557
558 const std::optional<BlockCoverageInference> BCI;
559
560 static std::optional<BlockCoverageInference>
561 constructBCI(Function &Func, bool HasSingleByteCoverage,
562 bool InstrumentFuncEntry) {
563 if (HasSingleByteCoverage)
564 return BlockCoverageInference(Func, InstrumentFuncEntry);
565 return {};
566 }
567
568 // Collect all the BBs that will be instrumented, and store them in
569 // InstrumentBBs.
570 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
571
572 // Give an edge, find the BB that will be instrumented.
573 // Return nullptr if there is no BB to be instrumented.
574 BasicBlock *getInstrBB(Edge *E);
575
576 // Return the auxiliary BB information.
577 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
578
579 // Return the auxiliary BB information if available.
580 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
581
582 // Dump edges and BB information.
583 void dumpInfo(StringRef Str = "") const {
584 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
585 " Hash: " + Twine(FunctionHash) + "\t" + Str);
586 }
587
588 FuncPGOInstrumentation(
589 Function &Func, TargetLibraryInfo &TLI,
590 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
591 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
592 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
593 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
594 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
595 TLI(TLI), ValueSites(IPVK_Last + 1),
596 SIVisitor(Func, HasSingleByteCoverage),
597 MST(F, InstrumentFuncEntry, BPI, BFI),
598 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
599 if (BCI && PGOViewBlockCoverageGraph)
600 BCI->viewBlockCoverageGraph();
601 // This should be done before CFG hash computation.
602 SIVisitor.countSelects();
603 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
604 if (!IsCS) {
605 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
606 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
607 NumOfPGOBB += MST.bbInfoSize();
608 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
610 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
611 } else {
612 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
613 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
614 NumOfCSPGOBB += MST.bbInfoSize();
615 }
616
617 FuncName = getIRPGOFuncName(F);
618 DeprecatedFuncName = getPGOFuncName(F);
619 computeCFGHash();
620 if (!ComdatMembers.empty())
621 renameComdatFunction();
622 LLVM_DEBUG(dumpInfo("after CFGMST"));
623
624 for (const auto &E : MST.allEdges()) {
625 if (E->Removed)
626 continue;
627 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
628 if (!E->InMST)
629 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
630 }
631
632 if (CreateGlobalVar)
633 FuncNameVar = createPGOFuncNameVar(F, FuncName);
634 }
635};
636
637} // end anonymous namespace
638
639// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
640// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
641// of selects, indirect calls, mem ops and edges.
642template <class Edge, class BBInfo>
643void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
644 std::vector<uint8_t> Indexes;
645 JamCRC JC;
646 for (auto &BB : F) {
647 for (BasicBlock *Succ : successors(&BB)) {
648 auto BI = findBBInfo(Succ);
649 if (BI == nullptr)
650 continue;
651 uint32_t Index = BI->Index;
652 for (int J = 0; J < 4; J++)
653 Indexes.push_back((uint8_t)(Index >> (J * 8)));
654 }
655 }
656 JC.update(Indexes);
657
658 JamCRC JCH;
659 // The higher 32 bits.
660 auto updateJCH = [&JCH](uint64_t Num) {
661 uint8_t Data[8];
663 JCH.update(Data);
664 };
665 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
666 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
667 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
668 if (BCI) {
669 updateJCH(BCI->getInstrumentedBlocksHash());
670 } else {
671 updateJCH((uint64_t)MST.numEdges());
672 }
673
674 // Hash format for context sensitive profile. Reserve 4 bits for other
675 // information.
676 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
677
678 // Reserve bit 60-63 for other information purpose.
679 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
680 if (IsCS)
682 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
683 << " CRC = " << JC.getCRC()
684 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
685 << ", Edges = " << MST.numEdges() << ", ICSites = "
686 << ValueSites[IPVK_IndirectCallTarget].size()
687 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
688 << ", High32 CRC = " << JCH.getCRC()
689 << ", Hash = " << FunctionHash << "\n";);
690
691 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
692 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
693 << " in building " << F.getParent()->getSourceFileName() << "\n";
694}
695
696// Check if we can safely rename this Comdat function.
697static bool canRenameComdat(
698 Function &F,
699 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
700 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
701 return false;
702
703 // FIXME: Current only handle those Comdat groups that only containing one
704 // function.
705 // (1) For a Comdat group containing multiple functions, we need to have a
706 // unique postfix based on the hashes for each function. There is a
707 // non-trivial code refactoring to do this efficiently.
708 // (2) Variables can not be renamed, so we can not rename Comdat function in a
709 // group including global vars.
710 Comdat *C = F.getComdat();
711 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
712 assert(!isa<GlobalAlias>(CM.second));
713 Function *FM = dyn_cast<Function>(CM.second);
714 if (FM != &F)
715 return false;
716 }
717 return true;
718}
719
720// Append the CFGHash to the Comdat function name.
721template <class Edge, class BBInfo>
722void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
723 if (!canRenameComdat(F, ComdatMembers))
724 return;
725 std::string OrigName = F.getName().str();
726 std::string NewFuncName =
727 Twine(F.getName() + "." + Twine(FunctionHash)).str();
728 F.setName(Twine(NewFuncName));
730 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
731 Comdat *NewComdat;
732 Module *M = F.getParent();
733 // For AvailableExternallyLinkage functions, change the linkage to
734 // LinkOnceODR and put them into comdat. This is because after renaming, there
735 // is no backup external copy available for the function.
736 if (!F.hasComdat()) {
738 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
740 F.setComdat(NewComdat);
741 return;
742 }
743
744 // This function belongs to a single function Comdat group.
745 Comdat *OrigComdat = F.getComdat();
746 std::string NewComdatName =
747 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
748 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
749 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
750
751 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
752 // Must be a function.
753 cast<Function>(CM.second)->setComdat(NewComdat);
754 }
755}
756
757/// Collect all the BBs that will be instruments and add them to
758/// `InstrumentBBs`.
759template <class Edge, class BBInfo>
760void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
761 std::vector<BasicBlock *> &InstrumentBBs) {
762 if (BCI) {
763 for (auto &BB : F)
764 if (BCI->shouldInstrumentBlock(BB))
765 InstrumentBBs.push_back(&BB);
766 return;
767 }
768
769 // Use a worklist as we will update the vector during the iteration.
770 std::vector<Edge *> EdgeList;
771 EdgeList.reserve(MST.numEdges());
772 for (const auto &E : MST.allEdges())
773 EdgeList.push_back(E.get());
774
775 for (auto &E : EdgeList) {
776 BasicBlock *InstrBB = getInstrBB(E);
777 if (InstrBB)
778 InstrumentBBs.push_back(InstrBB);
779 }
780}
781
782// Given a CFG E to be instrumented, find which BB to place the instrumented
783// code. The function will split the critical edge if necessary.
784template <class Edge, class BBInfo>
785BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
786 if (E->InMST || E->Removed)
787 return nullptr;
788
789 BasicBlock *SrcBB = E->SrcBB;
790 BasicBlock *DestBB = E->DestBB;
791 // For a fake edge, instrument the real BB.
792 if (SrcBB == nullptr)
793 return DestBB;
794 if (DestBB == nullptr)
795 return SrcBB;
796
797 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
798 // There are basic blocks (such as catchswitch) cannot be instrumented.
799 // If the returned first insertion point is the end of BB, skip this BB.
800 if (BB->getFirstInsertionPt() == BB->end())
801 return nullptr;
802 return BB;
803 };
804
805 // Instrument the SrcBB if it has a single successor,
806 // otherwise, the DestBB if this is not a critical edge.
807 Instruction *TI = SrcBB->getTerminator();
808 if (TI->getNumSuccessors() <= 1)
809 return canInstrument(SrcBB);
810 if (!E->IsCritical)
811 return canInstrument(DestBB);
812
813 // Some IndirectBr critical edges cannot be split by the previous
814 // SplitIndirectBrCriticalEdges call. Bail out.
815 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
816 BasicBlock *InstrBB =
817 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
818 if (!InstrBB) {
820 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
821 return nullptr;
822 }
823 // For a critical edge, we have to split. Instrument the newly
824 // created BB.
825 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
826 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
827 << " --> " << getBBInfo(DestBB).Index << "\n");
828 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
829 MST.addEdge(SrcBB, InstrBB, 0);
830 // Second one: Add new edge of InstrBB->DestBB.
831 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
832 NewEdge1.InMST = true;
833 E->Removed = true;
834
835 return canInstrument(InstrBB);
836}
837
838// When generating value profiling calls on Windows routines that make use of
839// handler funclets for exception processing an operand bundle needs to attached
840// to the called function. This routine will set \p OpBundles to contain the
841// funclet information, if any is needed, that should be placed on the generated
842// value profiling call for the value profile candidate call.
843static void
847 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
848 if (!OrigCall)
849 return;
850
851 if (!isa<IntrinsicInst>(OrigCall)) {
852 // The instrumentation call should belong to the same funclet as a
853 // non-intrinsic call, so just copy the operand bundle, if any exists.
854 std::optional<OperandBundleUse> ParentFunclet =
855 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
856 if (ParentFunclet)
857 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
858 } else {
859 // Intrinsics or other instructions do not get funclet information from the
860 // front-end. Need to use the BlockColors that was computed by the routine
861 // colorEHFunclets to determine whether a funclet is needed.
862 if (!BlockColors.empty()) {
863 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
864 assert(CV.size() == 1 && "non-unique color for block!");
865 Instruction *EHPad = CV.front()->getFirstNonPHI();
866 if (EHPad->isEHPad())
867 OpBundles.emplace_back("funclet", EHPad);
868 }
869 }
870}
871
872// Visit all edge and instrument the edges not in MST, and do value profiling.
873// Critical edges will be split.
877 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
878 bool IsCS) {
879 if (!PGOBlockCoverage) {
880 // Split indirectbr critical edges here before computing the MST rather than
881 // later in getInstrBB() to avoid invalidating it.
882 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
883 }
884
885 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
886 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
888
889 auto Name = FuncInfo.FuncNameVar;
890 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
891 FuncInfo.FunctionHash);
893 auto &EntryBB = F.getEntryBlock();
894 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
895 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
896 // i32 <index>)
897 Builder.CreateCall(
898 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
899 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
900 return;
901 }
902
903 std::vector<BasicBlock *> InstrumentBBs;
904 FuncInfo.getInstrumentBBs(InstrumentBBs);
905 unsigned NumCounters =
906 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
907
909 auto *CSIntrinsic =
910 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
911 // We want to count the instrumentable callsites, then instrument them. This
912 // is because the llvm.instrprof.callsite intrinsic has an argument (like
913 // the other instrprof intrinsics) capturing the total number of
914 // instrumented objects (counters, or callsites, in this case). In this
915 // case, we want that value so we can readily pass it to the compiler-rt
916 // APIs that may have to allocate memory based on the nr of callsites.
917 // The traversal logic is the same for both counting and instrumentation,
918 // just needs to be done in succession.
919 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
920 for (auto &BB : F)
921 for (auto &Instr : BB)
922 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
923 if ((CS->getCalledFunction() &&
924 CS->getCalledFunction()->isIntrinsic()) ||
925 dyn_cast<InlineAsm>(CS->getCalledOperand()))
926 continue;
927 Visitor(CS);
928 }
929 };
930 // First, count callsites.
931 uint32_t TotalNrCallsites = 0;
932 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
933
934 // Now instrument.
935 uint32_t CallsiteIndex = 0;
936 Visit([&](auto *CB) {
937 IRBuilder<> Builder(CB);
938 Builder.CreateCall(CSIntrinsic,
939 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
940 Builder.getInt32(CallsiteIndex++),
941 CB->getCalledOperand()});
942 });
943 }
944
945 uint32_t I = 0;
947 NumCounters += PGOBlockCoverage ? 8 : 1;
948 auto &EntryBB = F.getEntryBlock();
949 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
950 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
951 // i32 <index>)
952 Builder.CreateCall(
953 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
954 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
955 I += PGOBlockCoverage ? 8 : 1;
956 }
957
958 for (auto *InstrBB : InstrumentBBs) {
959 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
960 assert(Builder.GetInsertPoint() != InstrBB->end() &&
961 "Cannot get the Instrumentation point");
962 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
963 // i32 <index>)
964 Builder.CreateCall(
966 ? Intrinsic::instrprof_cover
967 : Intrinsic::instrprof_increment),
968 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
969 }
970
971 // Now instrument select instructions:
972 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
973 FuncInfo.FunctionHash);
974 assert(I == NumCounters);
975
977 return;
978
979 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
980
981 // Intrinsic function calls do not have funclet operand bundles needed for
982 // Windows exception handling attached to them. However, if value profiling is
983 // inserted for one of these calls, then a funclet value will need to be set
984 // on the instrumentation call based on the funclet coloring.
986 if (F.hasPersonalityFn() &&
987 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
988 BlockColors = colorEHFunclets(F);
989
990 // For each VP Kind, walk the VP candidates and instrument each one.
991 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
992 unsigned SiteIndex = 0;
993 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
994 continue;
995
996 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
997 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
998 << " site: CallSite Index = " << SiteIndex << "\n");
999
1000 IRBuilder<> Builder(Cand.InsertPt);
1001 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1002 "Cannot get the Instrumentation point");
1003
1004 Value *ToProfile = nullptr;
1005 if (Cand.V->getType()->isIntegerTy())
1006 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1007 else if (Cand.V->getType()->isPointerTy())
1008 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1009 assert(ToProfile && "value profiling Value is of unexpected type");
1010
1012 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1013 Builder.CreateCall(
1014 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1015 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1016 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1017 OpBundles);
1018 }
1019 } // IPVK_First <= Kind <= IPVK_Last
1020}
1021
1022namespace {
1023
1024// This class represents a CFG edge in profile use compilation.
1025struct PGOUseEdge : public PGOEdge {
1026 using PGOEdge::PGOEdge;
1027
1028 std::optional<uint64_t> Count;
1029
1030 // Set edge count value
1031 void setEdgeCount(uint64_t Value) { Count = Value; }
1032
1033 // Return the information string for this object.
1034 std::string infoString() const {
1035 if (!Count)
1036 return PGOEdge::infoString();
1037 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1038 }
1039};
1040
1041using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1042
1043// This class stores the auxiliary information for each BB.
1044struct PGOUseBBInfo : public PGOBBInfo {
1045 std::optional<uint64_t> Count;
1046 int32_t UnknownCountInEdge = 0;
1047 int32_t UnknownCountOutEdge = 0;
1048 DirectEdges InEdges;
1049 DirectEdges OutEdges;
1050
1051 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1052
1053 // Set the profile count value for this BB.
1054 void setBBInfoCount(uint64_t Value) { Count = Value; }
1055
1056 // Return the information string of this object.
1057 std::string infoString() const {
1058 if (!Count)
1059 return PGOBBInfo::infoString();
1060 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1061 }
1062
1063 // Add an OutEdge and update the edge count.
1064 void addOutEdge(PGOUseEdge *E) {
1065 OutEdges.push_back(E);
1066 UnknownCountOutEdge++;
1067 }
1068
1069 // Add an InEdge and update the edge count.
1070 void addInEdge(PGOUseEdge *E) {
1071 InEdges.push_back(E);
1072 UnknownCountInEdge++;
1073 }
1074};
1075
1076} // end anonymous namespace
1077
1078// Sum up the count values for all the edges.
1080 uint64_t Total = 0;
1081 for (const auto &E : Edges) {
1082 if (E->Removed)
1083 continue;
1084 if (E->Count)
1085 Total += *E->Count;
1086 }
1087 return Total;
1088}
1089
1090namespace {
1091
1092class PGOUseFunc {
1093public:
1094 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1095 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1097 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1098 bool HasSingleByteCoverage)
1099 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1100 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1101 InstrumentFuncEntry, HasSingleByteCoverage),
1102 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1103
1104 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1105
1106 // Read counts for the instrumented BB from profile.
1107 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1109
1110 // Populate the counts for all BBs.
1111 void populateCounters();
1112
1113 // Set block coverage based on profile coverage values.
1114 void populateCoverage(IndexedInstrProfReader *PGOReader);
1115
1116 // Set the branch weights based on the count values.
1117 void setBranchWeights();
1118
1119 // Annotate the value profile call sites for all value kind.
1120 void annotateValueSites();
1121
1122 // Annotate the value profile call sites for one value kind.
1123 void annotateValueSites(uint32_t Kind);
1124
1125 // Annotate the irreducible loop header weights.
1126 void annotateIrrLoopHeaderWeights();
1127
1128 // The hotness of the function from the profile count.
1129 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1130
1131 // Return the function hotness from the profile.
1132 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1133
1134 // Return the function hash.
1135 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1136
1137 // Return the profile record for this function;
1138 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1139
1140 // Return the auxiliary BB information.
1141 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1142 return FuncInfo.getBBInfo(BB);
1143 }
1144
1145 // Return the auxiliary BB information if available.
1146 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1147 return FuncInfo.findBBInfo(BB);
1148 }
1149
1150 Function &getFunc() const { return F; }
1151
1152 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1153
1154 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1155
1156private:
1157 Function &F;
1158 Module *M;
1160 ProfileSummaryInfo *PSI;
1161
1162 // This member stores the shared information with class PGOGenFunc.
1163 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1164
1165 // The maximum count value in the profile. This is only used in PGO use
1166 // compilation.
1167 uint64_t ProgramMaxCount;
1168
1169 // Position of counter that remains to be read.
1170 uint32_t CountPosition = 0;
1171
1172 // Total size of the profile count for this function.
1173 uint32_t ProfileCountSize = 0;
1174
1175 // ProfileRecord for this function.
1176 InstrProfRecord ProfileRecord;
1177
1178 // Function hotness info derived from profile.
1179 FuncFreqAttr FreqAttr;
1180
1181 // Is to use the context sensitive profile.
1182 bool IsCS;
1183
1185
1186 // Find the Instrumented BB and set the value. Return false on error.
1187 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1188
1189 // Set the edge counter value for the unknown edge -- there should be only
1190 // one unknown edge.
1191 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1192
1193 // Set the hot/cold inline hints based on the count values.
1194 // FIXME: This function should be removed once the functionality in
1195 // the inliner is implemented.
1196 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1197 if (PSI->isHotCount(EntryCount))
1198 FreqAttr = FFA_Hot;
1199 else if (PSI->isColdCount(MaxCount))
1200 FreqAttr = FFA_Cold;
1201 }
1202};
1203
1204} // end anonymous namespace
1205
1206/// Set up InEdges/OutEdges for all BBs in the MST.
1208 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1209 // This is not required when there is block coverage inference.
1210 if (FuncInfo.BCI)
1211 return;
1212 for (const auto &E : FuncInfo.MST.allEdges()) {
1213 if (E->Removed)
1214 continue;
1215 const BasicBlock *SrcBB = E->SrcBB;
1216 const BasicBlock *DestBB = E->DestBB;
1217 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1218 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1219 SrcInfo.addOutEdge(E.get());
1220 DestInfo.addInEdge(E.get());
1221 }
1222}
1223
1224// Visit all the edges and assign the count value for the instrumented
1225// edges and the BB. Return false on error.
1226bool PGOUseFunc::setInstrumentedCounts(
1227 const std::vector<uint64_t> &CountFromProfile) {
1228
1229 std::vector<BasicBlock *> InstrumentBBs;
1230 FuncInfo.getInstrumentBBs(InstrumentBBs);
1231
1232 setupBBInfoEdges(FuncInfo);
1233
1234 unsigned NumCounters =
1235 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1236 // The number of counters here should match the number of counters
1237 // in profile. Return if they mismatch.
1238 if (NumCounters != CountFromProfile.size()) {
1239 return false;
1240 }
1241 auto *FuncEntry = &*F.begin();
1242
1243 // Set the profile count to the Instrumented BBs.
1244 uint32_t I = 0;
1245 for (BasicBlock *InstrBB : InstrumentBBs) {
1246 uint64_t CountValue = CountFromProfile[I++];
1247 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1248 // If we reach here, we know that we have some nonzero count
1249 // values in this function. The entry count should not be 0.
1250 // Fix it if necessary.
1251 if (InstrBB == FuncEntry && CountValue == 0)
1252 CountValue = 1;
1253 Info.setBBInfoCount(CountValue);
1254 }
1255 ProfileCountSize = CountFromProfile.size();
1256 CountPosition = I;
1257
1258 // Set the edge count and update the count of unknown edges for BBs.
1259 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1260 E->setEdgeCount(Value);
1261 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1262 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1263 };
1264
1265 // Set the profile count the Instrumented edges. There are BBs that not in
1266 // MST but not instrumented. Need to set the edge count value so that we can
1267 // populate the profile counts later.
1268 for (const auto &E : FuncInfo.MST.allEdges()) {
1269 if (E->Removed || E->InMST)
1270 continue;
1271 const BasicBlock *SrcBB = E->SrcBB;
1272 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1273
1274 // If only one out-edge, the edge profile count should be the same as BB
1275 // profile count.
1276 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1277 setEdgeCount(E.get(), *SrcInfo.Count);
1278 else {
1279 const BasicBlock *DestBB = E->DestBB;
1280 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1281 // If only one in-edge, the edge profile count should be the same as BB
1282 // profile count.
1283 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1284 setEdgeCount(E.get(), *DestInfo.Count);
1285 }
1286 if (E->Count)
1287 continue;
1288 // E's count should have been set from profile. If not, this meenas E skips
1289 // the instrumentation. We set the count to 0.
1290 setEdgeCount(E.get(), 0);
1291 }
1292 return true;
1293}
1294
1295// Set the count value for the unknown edge. There should be one and only one
1296// unknown edge in Edges vector.
1297void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1298 for (auto &E : Edges) {
1299 if (E->Count)
1300 continue;
1301 E->setEdgeCount(Value);
1302
1303 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1304 getBBInfo(E->DestBB).UnknownCountInEdge--;
1305 return;
1306 }
1307 llvm_unreachable("Cannot find the unknown count edge");
1308}
1309
1310// Emit function metadata indicating PGO profile mismatch.
1312 const char MetadataName[] = "instr_prof_hash_mismatch";
1314 // If this metadata already exists, ignore.
1315 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1316 if (Existing) {
1317 MDTuple *Tuple = cast<MDTuple>(Existing);
1318 for (const auto &N : Tuple->operands()) {
1319 if (N.equalsStr(MetadataName))
1320 return;
1321 Names.push_back(N.get());
1322 }
1323 }
1324
1325 MDBuilder MDB(ctx);
1326 Names.push_back(MDB.createString(MetadataName));
1327 MDNode *MD = MDTuple::get(ctx, Names);
1328 F.setMetadata(LLVMContext::MD_annotation, MD);
1329}
1330
1331void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1332 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1333 auto &Ctx = M->getContext();
1334 auto Err = IPE.get();
1335 bool SkipWarning = false;
1336 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1337 << FuncInfo.FuncName << ": ");
1338 if (Err == instrprof_error::unknown_function) {
1339 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1340 SkipWarning = !PGOWarnMissing;
1341 LLVM_DEBUG(dbgs() << "unknown function");
1342 } else if (Err == instrprof_error::hash_mismatch ||
1343 Err == instrprof_error::malformed) {
1344 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1345 SkipWarning =
1348 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1350 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1351 << " skip=" << SkipWarning << ")");
1352 // Emit function metadata indicating PGO profile mismatch.
1353 annotateFunctionWithHashMismatch(F, M->getContext());
1354 }
1355
1356 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1357 if (SkipWarning)
1358 return;
1359
1360 std::string Msg =
1361 IPE.message() + std::string(" ") + F.getName().str() +
1362 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1363 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1364 std::string(" count discarded");
1365
1366 Ctx.diagnose(
1367 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1368 });
1369}
1370
1371// Read the profile from ProfileFileName and assign the value to the
1372// instrumented BB and the edges. This function also updates ProgramMaxCount.
1373// Return true if the profile are successfully read, and false on errors.
1374bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1376 auto &Ctx = M->getContext();
1377 uint64_t MismatchedFuncSum = 0;
1379 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1380 &MismatchedFuncSum);
1381 if (Error E = Result.takeError()) {
1382 handleInstrProfError(std::move(E), MismatchedFuncSum);
1383 return false;
1384 }
1385 ProfileRecord = std::move(Result.get());
1386 PseudoKind = ProfileRecord.getCountPseudoKind();
1387 if (PseudoKind != InstrProfRecord::NotPseudo) {
1388 return true;
1389 }
1390 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1391
1392 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1393 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1394
1395 uint64_t ValueSum = 0;
1396 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1397 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1398 ValueSum += CountFromProfile[I];
1399 }
1400 AllZeros = (ValueSum == 0);
1401
1402 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1403
1404 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1405 getBBInfo(nullptr).UnknownCountInEdge = 2;
1406
1407 if (!setInstrumentedCounts(CountFromProfile)) {
1408 LLVM_DEBUG(
1409 dbgs() << "Inconsistent number of counts, skipping this function");
1410 Ctx.diagnose(DiagnosticInfoPGOProfile(
1411 M->getName().data(),
1412 Twine("Inconsistent number of counts in ") + F.getName().str() +
1413 Twine(": the profile may be stale or there is a function name "
1414 "collision."),
1415 DS_Warning));
1416 return false;
1417 }
1418 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1419 return true;
1420}
1421
1422void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1423 uint64_t MismatchedFuncSum = 0;
1425 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1426 &MismatchedFuncSum);
1427 if (auto Err = Result.takeError()) {
1428 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1429 return;
1430 }
1431 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1432
1433 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1435 unsigned Index = 0;
1436 for (auto &BB : F)
1437 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1438 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1439 assert(Index == CountsFromProfile.size());
1440
1441 // For each B in InverseDependencies[A], if A is covered then B is covered.
1443 InverseDependencies;
1444 for (auto &BB : F) {
1445 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1446 // If Dep is covered then BB is covered.
1447 InverseDependencies[Dep].insert(&BB);
1448 }
1449 }
1450
1451 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1452 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1453 for (auto &[BB, IsCovered] : Coverage)
1454 if (IsCovered)
1455 CoveredBlocksToProcess.push(BB);
1456
1457 while (!CoveredBlocksToProcess.empty()) {
1458 auto *CoveredBlock = CoveredBlocksToProcess.top();
1459 assert(Coverage[CoveredBlock]);
1460 CoveredBlocksToProcess.pop();
1461 for (auto *BB : InverseDependencies[CoveredBlock]) {
1462 // If CoveredBlock is covered then BB is covered.
1463 if (Coverage[BB])
1464 continue;
1465 Coverage[BB] = true;
1466 CoveredBlocksToProcess.push(BB);
1467 }
1468 }
1469
1470 // Annotate block coverage.
1471 MDBuilder MDB(F.getContext());
1472 // We set the entry count to 10000 if the entry block is covered so that BFI
1473 // can propagate a fraction of this count to the other covered blocks.
1474 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1475 for (auto &BB : F) {
1476 // For a block A and its successor B, we set the edge weight as follows:
1477 // If A is covered and B is covered, set weight=1.
1478 // If A is covered and B is uncovered, set weight=0.
1479 // If A is uncovered, set weight=1.
1480 // This setup will allow BFI to give nonzero profile counts to only covered
1481 // blocks.
1483 for (auto *Succ : successors(&BB))
1484 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1485 if (Weights.size() >= 2)
1486 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1487 /*IsExpected=*/false);
1488 }
1489
1490 unsigned NumCorruptCoverage = 0;
1491 DominatorTree DT(F);
1492 LoopInfo LI(DT);
1493 BranchProbabilityInfo BPI(F, LI);
1494 BlockFrequencyInfo BFI(F, BPI, LI);
1495 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1496 if (auto C = BFI.getBlockProfileCount(&BB))
1497 return C == 0;
1498 return {};
1499 };
1500 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1501 for (auto &BB : F) {
1502 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1503 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1504 << "\n");
1505 // In some cases it is possible to find a covered block that has no covered
1506 // successors, e.g., when a block calls a function that may call exit(). In
1507 // those cases, BFI could find its successor to be covered while BCI could
1508 // find its successor to be dead.
1509 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1510 LLVM_DEBUG(
1511 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1512 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1513 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1514 ++NumCorruptCoverage;
1515 }
1516 if (Coverage[&BB])
1517 ++NumCoveredBlocks;
1518 }
1519 if (PGOVerifyBFI && NumCorruptCoverage) {
1520 auto &Ctx = M->getContext();
1521 Ctx.diagnose(DiagnosticInfoPGOProfile(
1522 M->getName().data(),
1523 Twine("Found inconsistent block coverage for function ") + F.getName() +
1524 " in " + Twine(NumCorruptCoverage) + " blocks.",
1525 DS_Warning));
1526 }
1528 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1529}
1530
1531// Populate the counters from instrumented BBs to all BBs.
1532// In the end of this operation, all BBs should have a valid count value.
1533void PGOUseFunc::populateCounters() {
1534 bool Changes = true;
1535 unsigned NumPasses = 0;
1536 while (Changes) {
1537 NumPasses++;
1538 Changes = false;
1539
1540 // For efficient traversal, it's better to start from the end as most
1541 // of the instrumented edges are at the end.
1542 for (auto &BB : reverse(F)) {
1543 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1544 if (UseBBInfo == nullptr)
1545 continue;
1546 if (!UseBBInfo->Count) {
1547 if (UseBBInfo->UnknownCountOutEdge == 0) {
1548 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1549 Changes = true;
1550 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1551 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1552 Changes = true;
1553 }
1554 }
1555 if (UseBBInfo->Count) {
1556 if (UseBBInfo->UnknownCountOutEdge == 1) {
1557 uint64_t Total = 0;
1558 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1559 // If the one of the successor block can early terminate (no-return),
1560 // we can end up with situation where out edge sum count is larger as
1561 // the source BB's count is collected by a post-dominated block.
1562 if (*UseBBInfo->Count > OutSum)
1563 Total = *UseBBInfo->Count - OutSum;
1564 setEdgeCount(UseBBInfo->OutEdges, Total);
1565 Changes = true;
1566 }
1567 if (UseBBInfo->UnknownCountInEdge == 1) {
1568 uint64_t Total = 0;
1569 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1570 if (*UseBBInfo->Count > InSum)
1571 Total = *UseBBInfo->Count - InSum;
1572 setEdgeCount(UseBBInfo->InEdges, Total);
1573 Changes = true;
1574 }
1575 }
1576 }
1577 }
1578
1579 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1580 (void)NumPasses;
1581#ifndef NDEBUG
1582 // Assert every BB has a valid counter.
1583 for (auto &BB : F) {
1584 auto BI = findBBInfo(&BB);
1585 if (BI == nullptr)
1586 continue;
1587 assert(BI->Count && "BB count is not valid");
1588 }
1589#endif
1590 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1591 uint64_t FuncMaxCount = FuncEntryCount;
1592 for (auto &BB : F) {
1593 auto BI = findBBInfo(&BB);
1594 if (BI == nullptr)
1595 continue;
1596 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1597 }
1598
1599 // Fix the obviously inconsistent entry count.
1600 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1601 FuncEntryCount = 1;
1603 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1604
1605 // Now annotate select instructions
1606 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1607 assert(CountPosition == ProfileCountSize);
1608
1609 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1610}
1611
1612// Assign the scaled count values to the BB with multiple out edges.
1613void PGOUseFunc::setBranchWeights() {
1614 // Generate MD_prof metadata for every branch instruction.
1615 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1616 << " IsCS=" << IsCS << "\n");
1617 for (auto &BB : F) {
1618 Instruction *TI = BB.getTerminator();
1619 if (TI->getNumSuccessors() < 2)
1620 continue;
1621 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1622 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1623 isa<CallBrInst>(TI)))
1624 continue;
1625
1626 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1627 if (!*BBCountInfo.Count)
1628 continue;
1629
1630 // We have a non-zero Branch BB.
1631 unsigned Size = BBCountInfo.OutEdges.size();
1632 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1633 uint64_t MaxCount = 0;
1634 for (unsigned s = 0; s < Size; s++) {
1635 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1636 const BasicBlock *SrcBB = E->SrcBB;
1637 const BasicBlock *DestBB = E->DestBB;
1638 if (DestBB == nullptr)
1639 continue;
1640 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1641 uint64_t EdgeCount = *E->Count;
1642 if (EdgeCount > MaxCount)
1643 MaxCount = EdgeCount;
1644 EdgeCounts[SuccNum] = EdgeCount;
1645 }
1646
1647 if (MaxCount)
1648 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1649 else {
1650 // A zero MaxCount can come about when we have a BB with a positive
1651 // count, and whose successor blocks all have 0 count. This can happen
1652 // when there is no exit block and the code exits via a noreturn function.
1653 auto &Ctx = M->getContext();
1654 Ctx.diagnose(DiagnosticInfoPGOProfile(
1655 M->getName().data(),
1656 Twine("Profile in ") + F.getName().str() +
1657 Twine(" partially ignored") +
1658 Twine(", possibly due to the lack of a return path."),
1659 DS_Warning));
1660 }
1661 }
1662}
1663
1665 for (BasicBlock *Pred : predecessors(BB)) {
1666 if (isa<IndirectBrInst>(Pred->getTerminator()))
1667 return true;
1668 }
1669 return false;
1670}
1671
1672void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1673 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1674 // Find irr loop headers
1675 for (auto &BB : F) {
1676 // As a heuristic also annotate indrectbr targets as they have a high chance
1677 // to become an irreducible loop header after the indirectbr tail
1678 // duplication.
1679 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1680 Instruction *TI = BB.getTerminator();
1681 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1682 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1683 }
1684 }
1685}
1686
1687void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1688 Module *M = F.getParent();
1689 IRBuilder<> Builder(&SI);
1690 Type *Int64Ty = Builder.getInt64Ty();
1691 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1692 Builder.CreateCall(
1693 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1694 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1695 Builder.getInt32(*CurCtrIdx), Step});
1696 ++(*CurCtrIdx);
1697}
1698
1699void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1700 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1701 assert(*CurCtrIdx < CountFromProfile.size() &&
1702 "Out of bound access of counters");
1703 uint64_t SCounts[2];
1704 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1705 ++(*CurCtrIdx);
1706 uint64_t TotalCount = 0;
1707 auto BI = UseFunc->findBBInfo(SI.getParent());
1708 if (BI != nullptr)
1709 TotalCount = *BI->Count;
1710 // False Count
1711 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1712 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1713 if (MaxCount)
1714 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1715}
1716
1717void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1718 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1719 return;
1720 // FIXME: do not handle this yet.
1721 if (SI.getCondition()->getType()->isVectorTy())
1722 return;
1723
1724 switch (Mode) {
1725 case VM_counting:
1726 NSIs++;
1727 return;
1728 case VM_instrument:
1729 instrumentOneSelectInst(SI);
1730 return;
1731 case VM_annotate:
1732 annotateOneSelectInst(SI);
1733 return;
1734 }
1735
1736 llvm_unreachable("Unknown visiting mode");
1737}
1738
1740 if (ValueProfKind == IPVK_MemOPSize)
1742 if (ValueProfKind == llvm::IPVK_VTableTarget)
1744 return MaxNumAnnotations;
1745}
1746
1747// Traverse all valuesites and annotate the instructions for all value kind.
1748void PGOUseFunc::annotateValueSites() {
1750 return;
1751
1752 // Create the PGOFuncName meta data.
1753 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1754
1755 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1756 annotateValueSites(Kind);
1757}
1758
1759// Annotate the instructions for a specific value kind.
1760void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1761 assert(Kind <= IPVK_Last);
1762 unsigned ValueSiteIndex = 0;
1763
1764 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1765
1766 // Since there isn't a reliable or fast way for profile reader to tell if a
1767 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1768 // value profile collector over the function IR to find the instrumented sites
1769 // iff function profile records shows the number of instrumented vtable sites
1770 // is not zero. Function cfg already takes the number of instrumented
1771 // indirect call sites into account so it doesn't hash the number of
1772 // instrumented vtables; as a side effect it makes it easier to enable
1773 // profiling and profile use in two steps if needed.
1774 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1775 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1776 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1778 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1779 auto &ValueSites = FuncInfo.ValueSites[Kind];
1780 if (NumValueSites != ValueSites.size()) {
1781 auto &Ctx = M->getContext();
1782 Ctx.diagnose(DiagnosticInfoPGOProfile(
1783 M->getName().data(),
1784 Twine("Inconsistent number of value sites for ") +
1785 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1786 F.getName().str() +
1787 Twine("\", possibly due to the use of a stale profile."),
1788 DS_Warning));
1789 return;
1790 }
1791
1792 for (VPCandidateInfo &I : ValueSites) {
1793 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1794 << "): Index = " << ValueSiteIndex << " out of "
1795 << NumValueSites << "\n");
1797 *M, *I.AnnotatedInst, ProfileRecord,
1798 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1799 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1800 ValueSiteIndex++;
1801 }
1802}
1803
1804// Collect the set of members for each Comdat in module M and store
1805// in ComdatMembers.
1807 Module &M,
1808 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1809 if (!DoComdatRenaming)
1810 return;
1811 for (Function &F : M)
1812 if (Comdat *C = F.getComdat())
1813 ComdatMembers.insert(std::make_pair(C, &F));
1814 for (GlobalVariable &GV : M.globals())
1815 if (Comdat *C = GV.getComdat())
1816 ComdatMembers.insert(std::make_pair(C, &GV));
1817 for (GlobalAlias &GA : M.aliases())
1818 if (Comdat *C = GA.getComdat())
1819 ComdatMembers.insert(std::make_pair(C, &GA));
1820}
1821
1822// Return true if we should not find instrumentation data for this function
1823static bool skipPGOUse(const Function &F) {
1824 if (F.isDeclaration())
1825 return true;
1826 // If there are too many critical edges, PGO might cause
1827 // compiler time problem. Skip PGO if the number of
1828 // critical edges execeed the threshold.
1829 unsigned NumCriticalEdges = 0;
1830 for (auto &BB : F) {
1831 const Instruction *TI = BB.getTerminator();
1832 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1833 if (isCriticalEdge(TI, I))
1834 NumCriticalEdges++;
1835 }
1836 }
1837 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1838 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1839 << ", NumCriticalEdges=" << NumCriticalEdges
1840 << " exceed the threshold. Skip PGO.\n");
1841 return true;
1842 }
1843 return false;
1844}
1845
1846// Return true if we should not instrument this function
1847static bool skipPGOGen(const Function &F) {
1848 if (skipPGOUse(F))
1849 return true;
1850 if (F.hasFnAttribute(llvm::Attribute::Naked))
1851 return true;
1852 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1853 return true;
1854 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1855 return true;
1856 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1857 return true;
1858 return false;
1859}
1860
1862 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1864 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1865 // For the context-sensitve instrumentation, we should have a separated pass
1866 // (before LTO/ThinLTO linking) to create these variables.
1867 if (!IsCS && !shouldInstrumentForCtxProf())
1868 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1869
1870 Triple TT(M.getTargetTriple());
1871 LLVMContext &Ctx = M.getContext();
1872 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1874 M.getName().data(),
1875 Twine("VTable value profiling is presently not "
1876 "supported for non-ELF object formats"),
1877 DS_Warning));
1878 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1879 collectComdatMembers(M, ComdatMembers);
1880
1881 for (auto &F : M) {
1882 if (skipPGOGen(F))
1883 continue;
1884 auto &TLI = LookupTLI(F);
1885 auto *BPI = LookupBPI(F);
1886 auto *BFI = LookupBFI(F);
1887 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1888 }
1889 return true;
1890}
1891
1894 createProfileFileNameVar(M, CSInstrName);
1895 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1896 // will be retained.
1898 if (ProfileSampling)
1903 return PA;
1904}
1905
1908 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1909 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1911 };
1912 auto LookupBPI = [&FAM](Function &F) {
1914 };
1915 auto LookupBFI = [&FAM](Function &F) {
1917 };
1918
1919 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1920 return PreservedAnalyses::all();
1921
1922 return PreservedAnalyses::none();
1923}
1924
1925// Using the ratio b/w sums of profile count values and BFI count values to
1926// adjust the func entry count.
1927static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1928 BranchProbabilityInfo &NBPI) {
1929 Function &F = Func.getFunc();
1930 BlockFrequencyInfo NBFI(F, NBPI, LI);
1931#ifndef NDEBUG
1932 auto BFIEntryCount = F.getEntryCount();
1933 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1934 "Invalid BFI Entrycount");
1935#endif
1936 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1937 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1938 for (auto &BBI : F) {
1939 uint64_t CountValue = 0;
1940 uint64_t BFICountValue = 0;
1941 if (!Func.findBBInfo(&BBI))
1942 continue;
1943 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1944 CountValue = *Func.getBBInfo(&BBI).Count;
1945 BFICountValue = *BFICount;
1946 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1947 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1948 }
1949 if (SumCount.isZero())
1950 return;
1951
1952 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1953 "Incorrect sum of BFI counts");
1954 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1955 return;
1956 double Scale = (SumCount / SumBFICount).convertToDouble();
1957 if (Scale < 1.001 && Scale > 0.999)
1958 return;
1959
1960 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1961 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1962 if (NewEntryCount == 0)
1963 NewEntryCount = 1;
1964 if (NewEntryCount != FuncEntryCount) {
1965 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1966 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1967 << ", entry_count " << FuncEntryCount << " --> "
1968 << NewEntryCount << "\n");
1969 }
1970}
1971
1972// Compare the profile count values with BFI count values, and print out
1973// the non-matching ones.
1974static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1976 uint64_t HotCountThreshold,
1978 Function &F = Func.getFunc();
1979 BlockFrequencyInfo NBFI(F, NBPI, LI);
1980 // bool PrintFunc = false;
1981 bool HotBBOnly = PGOVerifyHotBFI;
1982 StringRef Msg;
1984
1985 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1986 for (auto &BBI : F) {
1987 uint64_t CountValue = 0;
1988 uint64_t BFICountValue = 0;
1989
1990 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1991
1992 BBNum++;
1993 if (CountValue)
1994 NonZeroBBNum++;
1995 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1996 if (BFICount)
1997 BFICountValue = *BFICount;
1998
1999 if (HotBBOnly) {
2000 bool rawIsHot = CountValue >= HotCountThreshold;
2001 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2002 bool rawIsCold = CountValue <= ColdCountThreshold;
2003 bool ShowCount = false;
2004 if (rawIsHot && !BFIIsHot) {
2005 Msg = "raw-Hot to BFI-nonHot";
2006 ShowCount = true;
2007 } else if (rawIsCold && BFIIsHot) {
2008 Msg = "raw-Cold to BFI-Hot";
2009 ShowCount = true;
2010 }
2011 if (!ShowCount)
2012 continue;
2013 } else {
2014 if ((CountValue < PGOVerifyBFICutoff) &&
2015 (BFICountValue < PGOVerifyBFICutoff))
2016 continue;
2017 uint64_t Diff = (BFICountValue >= CountValue)
2018 ? BFICountValue - CountValue
2019 : CountValue - BFICountValue;
2020 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2021 continue;
2022 }
2023 BBMisMatchNum++;
2024
2025 ORE.emit([&]() {
2027 F.getSubprogram(), &BBI);
2028 Remark << "BB " << ore::NV("Block", BBI.getName())
2029 << " Count=" << ore::NV("Count", CountValue)
2030 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2031 if (!Msg.empty())
2032 Remark << " (" << Msg << ")";
2033 return Remark;
2034 });
2035 }
2036 if (BBMisMatchNum)
2037 ORE.emit([&]() {
2038 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2039 F.getSubprogram(), &F.getEntryBlock())
2040 << "In Func " << ore::NV("Function", F.getName())
2041 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2042 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2043 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2044 });
2045}
2046
2048 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2049 vfs::FileSystem &FS,
2050 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2053 ProfileSummaryInfo *PSI, bool IsCS) {
2054 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2055 auto &Ctx = M.getContext();
2056 // Read the counter array from file.
2057 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2058 ProfileRemappingFileName);
2059 if (Error E = ReaderOrErr.takeError()) {
2060 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2061 Ctx.diagnose(
2062 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2063 });
2064 return false;
2065 }
2066
2067 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2068 std::move(ReaderOrErr.get());
2069 if (!PGOReader) {
2070 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2071 StringRef("Cannot get PGOReader")));
2072 return false;
2073 }
2074 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2075 return false;
2076
2077 // TODO: might need to change the warning once the clang option is finalized.
2078 if (!PGOReader->isIRLevelProfile()) {
2079 Ctx.diagnose(DiagnosticInfoPGOProfile(
2080 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2081 return false;
2082 }
2083 if (PGOReader->functionEntryOnly()) {
2084 Ctx.diagnose(DiagnosticInfoPGOProfile(
2085 ProfileFileName.data(),
2086 "Function entry profiles are not yet supported for optimization"));
2087 return false;
2088 }
2089
2091 for (GlobalVariable &G : M.globals()) {
2092 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2093 continue;
2094
2095 // Create the PGOFuncName meta data.
2096 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2097 }
2098 }
2099
2100 // Add the profile summary (read from the header of the indexed summary) here
2101 // so that we can use it below when reading counters (which checks if the
2102 // function should be marked with a cold or inlinehint attribute).
2103 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2106 PSI->refresh();
2107
2108 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2109 collectComdatMembers(M, ComdatMembers);
2110 std::vector<Function *> HotFunctions;
2111 std::vector<Function *> ColdFunctions;
2112
2113 // If the profile marked as always instrument the entry BB, do the
2114 // same. Note this can be overwritten by the internal option in CFGMST.h
2115 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2116 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2117 InstrumentFuncEntry = PGOInstrumentEntry;
2118 InstrumentFuncEntry |= shouldInstrumentForCtxProf();
2119
2120 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2121 for (auto &F : M) {
2122 if (skipPGOUse(F))
2123 continue;
2124 auto &TLI = LookupTLI(F);
2125 auto *BPI = LookupBPI(F);
2126 auto *BFI = LookupBFI(F);
2127 if (!HasSingleByteCoverage) {
2128 // Split indirectbr critical edges here before computing the MST rather
2129 // than later in getInstrBB() to avoid invalidating it.
2130 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2131 BFI);
2132 }
2133 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2134 InstrumentFuncEntry, HasSingleByteCoverage);
2135 if (HasSingleByteCoverage) {
2136 Func.populateCoverage(PGOReader.get());
2137 continue;
2138 }
2139 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2140 // it means the profile for the function is unrepresentative and this
2141 // function is actually hot / warm. We will reset the function hot / cold
2142 // attribute and drop all the profile counters.
2144 bool AllZeros = false;
2145 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2146 continue;
2147 if (AllZeros) {
2148 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2149 if (Func.getProgramMaxCount() != 0)
2150 ColdFunctions.push_back(&F);
2151 continue;
2152 }
2153 if (PseudoKind != InstrProfRecord::NotPseudo) {
2154 // Clear function attribute cold.
2155 if (F.hasFnAttribute(Attribute::Cold))
2156 F.removeFnAttr(Attribute::Cold);
2157 // Set function attribute as hot.
2158 if (PseudoKind == InstrProfRecord::PseudoHot)
2159 F.addFnAttr(Attribute::Hot);
2160 continue;
2161 }
2162 Func.populateCounters();
2163 Func.setBranchWeights();
2164 Func.annotateValueSites();
2165 Func.annotateIrrLoopHeaderWeights();
2166 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2167 if (FreqAttr == PGOUseFunc::FFA_Cold)
2168 ColdFunctions.push_back(&F);
2169 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2170 HotFunctions.push_back(&F);
2171 if (PGOViewCounts != PGOVCT_None &&
2172 (ViewBlockFreqFuncName.empty() ||
2173 F.getName() == ViewBlockFreqFuncName)) {
2175 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2176 std::make_unique<BranchProbabilityInfo>(F, LI);
2177 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2178 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2180 NewBFI->view();
2181 else if (PGOViewCounts == PGOVCT_Text) {
2182 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2183 NewBFI->print(dbgs());
2184 }
2185 }
2187 (ViewBlockFreqFuncName.empty() ||
2188 F.getName() == ViewBlockFreqFuncName)) {
2190 if (ViewBlockFreqFuncName.empty())
2191 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2192 else
2193 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2194 else if (PGOViewRawCounts == PGOVCT_Text) {
2195 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2196 Func.dumpInfo();
2197 }
2198 }
2199
2202 BranchProbabilityInfo NBPI(F, LI);
2203
2204 // Fix func entry count.
2205 if (PGOFixEntryCount)
2206 fixFuncEntryCount(Func, LI, NBPI);
2207
2208 // Verify BlockFrequency information.
2209 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2210 if (PGOVerifyHotBFI) {
2211 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2213 }
2214 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2215 }
2216 }
2217
2218 // Set function hotness attribute from the profile.
2219 // We have to apply these attributes at the end because their presence
2220 // can affect the BranchProbabilityInfo of any callers, resulting in an
2221 // inconsistent MST between prof-gen and prof-use.
2222 for (auto &F : HotFunctions) {
2223 F->addFnAttr(Attribute::InlineHint);
2224 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2225 << "\n");
2226 }
2227 for (auto &F : ColdFunctions) {
2228 // Only set when there is no Attribute::Hot set by the user. For Hot
2229 // attribute, user's annotation has the precedence over the profile.
2230 if (F->hasFnAttribute(Attribute::Hot)) {
2231 auto &Ctx = M.getContext();
2232 std::string Msg = std::string("Function ") + F->getName().str() +
2233 std::string(" is annotated as a hot function but"
2234 " the profile is cold");
2235 Ctx.diagnose(
2236 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2237 continue;
2238 }
2239 F->addFnAttr(Attribute::Cold);
2240 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2241 << "\n");
2242 }
2243 return true;
2244}
2245
2247 std::string Filename, std::string RemappingFilename, bool IsCS,
2249 : ProfileFileName(std::move(Filename)),
2250 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2251 FS(std::move(VFS)) {
2252 if (!PGOTestProfileFile.empty())
2253 ProfileFileName = PGOTestProfileFile;
2254 if (!PGOTestProfileRemappingFile.empty())
2255 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2256 if (!FS)
2258}
2259
2262
2263 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2264 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2266 };
2267 auto LookupBPI = [&FAM](Function &F) {
2269 };
2270 auto LookupBFI = [&FAM](Function &F) {
2272 };
2273
2274 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2275 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2276 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2277 return PreservedAnalyses::all();
2278
2279 return PreservedAnalyses::none();
2280}
2281
2282static std::string getSimpleNodeName(const BasicBlock *Node) {
2283 if (!Node->getName().empty())
2284 return Node->getName().str();
2285
2286 std::string SimpleNodeName;
2287 raw_string_ostream OS(SimpleNodeName);
2288 Node->printAsOperand(OS, false);
2289 return SimpleNodeName;
2290}
2291
2293 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2294 assert(MaxCount > 0 && "Bad max count");
2295 uint64_t Scale = calculateCountScale(MaxCount);
2297 for (const auto &ECI : EdgeCounts)
2298 Weights.push_back(scaleBranchCount(ECI, Scale));
2299
2300 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2301 : Weights) {
2302 dbgs() << W << " ";
2303 } dbgs() << "\n";);
2304
2305 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2306
2307 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2309 std::string BrCondStr = getBranchCondString(TI);
2310 if (BrCondStr.empty())
2311 return;
2312
2313 uint64_t WSum =
2314 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2315 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2316 uint64_t TotalCount =
2317 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2318 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2319 Scale = calculateCountScale(WSum);
2320 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2321 scaleBranchCount(WSum, Scale));
2322 std::string BranchProbStr;
2323 raw_string_ostream OS(BranchProbStr);
2324 OS << BP;
2325 OS << " (total count : " << TotalCount << ")";
2326 OS.flush();
2327 Function *F = TI->getParent()->getParent();
2329 ORE.emit([&]() {
2330 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2331 << BrCondStr << " is true with probability : " << BranchProbStr;
2332 });
2333 }
2334}
2335
2336namespace llvm {
2337
2339 MDBuilder MDB(M->getContext());
2340 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2341 MDB.createIrrLoopHeaderWeight(Count));
2342}
2343
2344template <> struct GraphTraits<PGOUseFunc *> {
2345 using NodeRef = const BasicBlock *;
2348
2349 static NodeRef getEntryNode(const PGOUseFunc *G) {
2350 return &G->getFunc().front();
2351 }
2352
2354 return succ_begin(N);
2355 }
2356
2357 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2358
2359 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2360 return nodes_iterator(G->getFunc().begin());
2361 }
2362
2363 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2364 return nodes_iterator(G->getFunc().end());
2365 }
2366};
2367
2368template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2369 explicit DOTGraphTraits(bool isSimple = false)
2371
2372 static std::string getGraphName(const PGOUseFunc *G) {
2373 return std::string(G->getFunc().getName());
2374 }
2375
2376 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2377 std::string Result;
2378 raw_string_ostream OS(Result);
2379
2380 OS << getSimpleNodeName(Node) << ":\\l";
2381 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2382 OS << "Count : ";
2383 if (BI && BI->Count)
2384 OS << *BI->Count << "\\l";
2385 else
2386 OS << "Unknown\\l";
2387
2388 if (!PGOInstrSelect)
2389 return Result;
2390
2391 for (const Instruction &I : *Node) {
2392 if (!isa<SelectInst>(&I))
2393 continue;
2394 // Display scaled counts for SELECT instruction:
2395 OS << "SELECT : { T = ";
2396 uint64_t TC, FC;
2397 bool HasProf = extractBranchWeights(I, TC, FC);
2398 if (!HasProf)
2399 OS << "Unknown, F = Unknown }\\l";
2400 else
2401 OS << TC << ", F = " << FC << " }\\l";
2402 }
2403 return Result;
2404 }
2405};
2406
2407} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
bool shouldInstrumentForCtxProf()
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:994
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:296
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2047
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:488
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2125
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2420
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:413
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1069
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
Tuple of metadata.
Definition: Metadata.h:1472
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1499
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:204
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1408
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1412
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:467
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1282
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:271
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1464
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1487
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:827
std::vector< uint64_t > Counts
Definition: InstrProf.h:828
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:925
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1027
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1008