LLVM 20.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <stack>
123#include <string>
124#include <unordered_map>
125#include <utility>
126#include <vector>
127
128using namespace llvm;
131
132#define DEBUG_TYPE "pgo-instrumentation"
133
134STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
137STATISTIC(NumOfPGOEdge, "Number of edges.");
138STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
139STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
143STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
145STATISTIC(NumOfCSPGOSelectInsts,
146 "Number of select instruction instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOMemIntrinsics,
148 "Number of mem intrinsics instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
150STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
152STATISTIC(NumOfCSPGOFunc,
153 "Number of functions having valid profile counts in CSPGO.");
154STATISTIC(NumOfCSPGOMismatch,
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
158
159// Command line option to specify the file to read profile from. This is
160// mainly used for testing.
162 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
163 cl::value_desc("filename"),
164 cl::desc("Specify the path of profile data file. This is"
165 "mainly for test purpose."));
167 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
168 cl::value_desc("filename"),
169 cl::desc("Specify the path of profile remapping file. This is mainly for "
170 "test purpose."));
171
172// Command line option to disable value profiling. The default is false:
173// i.e. value profiling is enabled by default. This is for debug purpose.
174static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176 cl::desc("Disable Value Profiling"));
177
178// Command line option to set the maximum number of VP annotations to write to
179// the metadata for a single indirect call callsite.
181 "icp-max-annotations", cl::init(3), cl::Hidden,
182 cl::desc("Max number of annotations for a single indirect "
183 "call callsite"));
184
185// Command line option to set the maximum number of value annotations
186// to write to the metadata for a single memop intrinsic.
188 "memop-max-annotations", cl::init(4), cl::Hidden,
189 cl::desc("Max number of preicise value annotations for a single memop"
190 "intrinsic"));
191
192// Command line option to control appending FunctionHash to the name of a COMDAT
193// function. This is to avoid the hash mismatch caused by the preinliner.
195 "do-comdat-renaming", cl::init(false), cl::Hidden,
196 cl::desc("Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
198
199namespace llvm {
200// Command line option to enable/disable the warning about missing profile
201// information.
202cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204 cl::desc("Use this option to turn on/off "
205 "warnings about missing profile data for "
206 "functions."));
207
208// Command line option to enable/disable the warning about a hash mismatch in
209// the profile data.
211 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
212 cl::desc("Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
214
215// Command line option to enable/disable the warning about a hash mismatch in
216// the profile data for Comdat functions, which often turns out to be false
217// positive due to the pre-instrumentation inline.
219 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
220 cl::desc("The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
223} // namespace llvm
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
259 "pgo-instrument-entry", cl::init(false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
263 "pgo-function-entry-coverage", cl::Hidden,
264 cl::desc(
265 "Use this option to enable function entry coverage instrumentation."));
266
268 "pgo-block-coverage",
269 cl::desc("Use this option to enable basic block coverage instrumentation"));
270
271static cl::opt<bool>
272 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
273 cl::desc("Create a dot file of CFGs with block "
274 "coverage inference information"));
275
277 "pgo-temporal-instrumentation",
278 cl::desc("Use this option to enable temporal instrumentation"));
279
280static cl::opt<bool>
281 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
282 cl::desc("Fix function entry count in profile use."));
283
285 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
286 cl::desc("Print out the non-match BFI count if a hot raw profile count "
287 "becomes non-hot, or a cold raw profile count becomes hot. "
288 "The print is enabled under -Rpass-analysis=pgo, or "
289 "internal option -pass-remakrs-analysis=pgo."));
290
292 "pgo-verify-bfi", cl::init(false), cl::Hidden,
293 cl::desc("Print out mismatched BFI counts after setting profile metadata "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remakrs-analysis=pgo."));
296
298 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
299 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
300 "mismatched BFI if the difference percentage is greater than "
301 "this value (in percentage)."));
302
304 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
306 "profile count value is below."));
307
309 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
310 cl::value_desc("function name"),
311 cl::desc("Trace the hash of the function with this name."));
312
314 "pgo-function-size-threshold", cl::Hidden,
315 cl::desc("Do not instrument functions smaller than this threshold."));
316
318 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
319 cl::desc("Do not instrument functions with the number of critical edges "
320 " greater than this threshold."));
321
323
324namespace llvm {
325// Command line option to turn on CFG dot dump after profile annotation.
326// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
328
329// Command line option to specify the name of the function for CFG dump
330// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
332
333// Command line option to enable vtable value profiling. Defined in
334// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
338} // namespace llvm
339
340namespace {
341class FunctionInstrumenter final {
342 Module &M;
343 Function &F;
345 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
346 BranchProbabilityInfo *const BPI;
347 BlockFrequencyInfo *const BFI;
348
349 const PGOInstrumentationType InstrumentationType;
350
351 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
352 // Ctx profiling implicitly captures indirect call cases, but not other
353 // values. Supporting other values is relatively straight-forward - just
354 // another counter range within the context.
355 bool isValueProfilingDisabled() const {
356 return DisableValueProfiling ||
357 InstrumentationType == PGOInstrumentationType::CTXPROF;
358 }
359
360 bool shouldInstrumentEntryBB() const {
361 return PGOInstrumentEntry ||
362 InstrumentationType == PGOInstrumentationType::CTXPROF;
363 }
364
365public:
366 FunctionInstrumenter(
368 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
369 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
370 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
371 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
372 InstrumentationType(InstrumentationType) {}
373
374 void instrument();
375};
376} // namespace
377
378// Return a string describing the branch condition that can be
379// used in static branch probability heuristics:
380static std::string getBranchCondString(Instruction *TI) {
381 BranchInst *BI = dyn_cast<BranchInst>(TI);
382 if (!BI || !BI->isConditional())
383 return std::string();
384
385 Value *Cond = BI->getCondition();
386 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
387 if (!CI)
388 return std::string();
389
390 std::string result;
391 raw_string_ostream OS(result);
392 OS << CI->getPredicate() << "_";
393 CI->getOperand(0)->getType()->print(OS, true);
394
395 Value *RHS = CI->getOperand(1);
396 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
397 if (CV) {
398 if (CV->isZero())
399 OS << "_Zero";
400 else if (CV->isOne())
401 OS << "_One";
402 else if (CV->isMinusOne())
403 OS << "_MinusOne";
404 else
405 OS << "_Const";
406 }
407 OS.flush();
408 return result;
409}
410
411static const char *ValueProfKindDescr[] = {
412#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
414};
415
416// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
417// aware this is an ir_level profile so it can set the version flag.
418static GlobalVariable *
420 PGOInstrumentationType InstrumentationType) {
421 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
422 Type *IntTy64 = Type::getInt64Ty(M.getContext());
423 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
424 if (InstrumentationType == PGOInstrumentationType::CSFDO)
425 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
426 if (PGOInstrumentEntry ||
427 InstrumentationType == PGOInstrumentationType::CTXPROF)
428 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
430 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
432 ProfileVersion |=
433 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
435 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
437 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
438 auto IRLevelVersionVariable = new GlobalVariable(
439 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
440 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
441 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
442 Triple TT(M.getTargetTriple());
443 if (TT.supportsCOMDAT()) {
444 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
445 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
446 }
447 return IRLevelVersionVariable;
448}
449
450namespace {
451
452/// The select instruction visitor plays three roles specified
453/// by the mode. In \c VM_counting mode, it simply counts the number of
454/// select instructions. In \c VM_instrument mode, it inserts code to count
455/// the number times TrueValue of select is taken. In \c VM_annotate mode,
456/// it reads the profile data and annotate the select instruction with metadata.
457enum VisitMode { VM_counting, VM_instrument, VM_annotate };
458class PGOUseFunc;
459
460/// Instruction Visitor class to visit select instructions.
461struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
462 Function &F;
463 unsigned NSIs = 0; // Number of select instructions instrumented.
464 VisitMode Mode = VM_counting; // Visiting mode.
465 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
466 unsigned TotalNumCtrs = 0; // Total number of counters
467 GlobalVariable *FuncNameVar = nullptr;
468 uint64_t FuncHash = 0;
469 PGOUseFunc *UseFunc = nullptr;
470 bool HasSingleByteCoverage;
471
472 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
473 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
474
475 void countSelects() {
476 NSIs = 0;
477 Mode = VM_counting;
478 visit(F);
479 }
480
481 // Visit the IR stream and instrument all select instructions. \p
482 // Ind is a pointer to the counter index variable; \p TotalNC
483 // is the total number of counters; \p FNV is the pointer to the
484 // PGO function name var; \p FHash is the function hash.
485 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
486 uint64_t FHash) {
487 Mode = VM_instrument;
488 CurCtrIdx = Ind;
489 TotalNumCtrs = TotalNC;
490 FuncHash = FHash;
491 FuncNameVar = FNV;
492 visit(F);
493 }
494
495 // Visit the IR stream and annotate all select instructions.
496 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
497 Mode = VM_annotate;
498 UseFunc = UF;
499 CurCtrIdx = Ind;
500 visit(F);
501 }
502
503 void instrumentOneSelectInst(SelectInst &SI);
504 void annotateOneSelectInst(SelectInst &SI);
505
506 // Visit \p SI instruction and perform tasks according to visit mode.
507 void visitSelectInst(SelectInst &SI);
508
509 // Return the number of select instructions. This needs be called after
510 // countSelects().
511 unsigned getNumOfSelectInsts() const { return NSIs; }
512};
513
514/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
515/// based instrumentation.
516/// Note that the CFG can be a multi-graph. So there might be multiple edges
517/// with the same SrcBB and DestBB.
518struct PGOEdge {
519 BasicBlock *SrcBB;
520 BasicBlock *DestBB;
521 uint64_t Weight;
522 bool InMST = false;
523 bool Removed = false;
524 bool IsCritical = false;
525
526 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
527 : SrcBB(Src), DestBB(Dest), Weight(W) {}
528
529 /// Return the information string of an edge.
530 std::string infoString() const {
531 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
532 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
533 .str();
534 }
535};
536
537/// This class stores the auxiliary information for each BB in the MST.
538struct PGOBBInfo {
539 PGOBBInfo *Group;
541 uint32_t Rank = 0;
542
543 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
544
545 /// Return the information string of this object.
546 std::string infoString() const {
547 return (Twine("Index=") + Twine(Index)).str();
548 }
549};
550
551// This class implements the CFG edges. Note the CFG can be a multi-graph.
552template <class Edge, class BBInfo> class FuncPGOInstrumentation {
553private:
554 Function &F;
555
556 // Is this is context-sensitive instrumentation.
557 bool IsCS;
558
559 // A map that stores the Comdat group in function F.
560 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
561
563
564 void computeCFGHash();
565 void renameComdatFunction();
566
567public:
568 const TargetLibraryInfo &TLI;
569 std::vector<std::vector<VPCandidateInfo>> ValueSites;
570 SelectInstVisitor SIVisitor;
571 std::string FuncName;
572 std::string DeprecatedFuncName;
573 GlobalVariable *FuncNameVar;
574
575 // CFG hash value for this function.
576 uint64_t FunctionHash = 0;
577
578 // The Minimum Spanning Tree of function CFG.
580
581 const std::optional<BlockCoverageInference> BCI;
582
583 static std::optional<BlockCoverageInference>
584 constructBCI(Function &Func, bool HasSingleByteCoverage,
585 bool InstrumentFuncEntry) {
586 if (HasSingleByteCoverage)
587 return BlockCoverageInference(Func, InstrumentFuncEntry);
588 return {};
589 }
590
591 // Collect all the BBs that will be instrumented, and store them in
592 // InstrumentBBs.
593 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
594
595 // Give an edge, find the BB that will be instrumented.
596 // Return nullptr if there is no BB to be instrumented.
597 BasicBlock *getInstrBB(Edge *E);
598
599 // Return the auxiliary BB information.
600 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
601
602 // Return the auxiliary BB information if available.
603 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
604
605 // Dump edges and BB information.
606 void dumpInfo(StringRef Str = "") const {
607 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
608 " Hash: " + Twine(FunctionHash) + "\t" + Str);
609 }
610
611 FuncPGOInstrumentation(
612 Function &Func, TargetLibraryInfo &TLI,
613 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
614 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
615 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
616 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
617 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
618 TLI(TLI), ValueSites(IPVK_Last + 1),
619 SIVisitor(Func, HasSingleByteCoverage),
620 MST(F, InstrumentFuncEntry, BPI, BFI),
621 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
622 if (BCI && PGOViewBlockCoverageGraph)
623 BCI->viewBlockCoverageGraph();
624 // This should be done before CFG hash computation.
625 SIVisitor.countSelects();
626 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
627 if (!IsCS) {
628 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
629 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
630 NumOfPGOBB += MST.bbInfoSize();
631 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
633 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
634 } else {
635 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
636 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
637 NumOfCSPGOBB += MST.bbInfoSize();
638 }
639
640 FuncName = getIRPGOFuncName(F);
641 DeprecatedFuncName = getPGOFuncName(F);
642 computeCFGHash();
643 if (!ComdatMembers.empty())
644 renameComdatFunction();
645 LLVM_DEBUG(dumpInfo("after CFGMST"));
646
647 for (const auto &E : MST.allEdges()) {
648 if (E->Removed)
649 continue;
650 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
651 if (!E->InMST)
652 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
653 }
654
655 if (CreateGlobalVar)
656 FuncNameVar = createPGOFuncNameVar(F, FuncName);
657 }
658};
659
660} // end anonymous namespace
661
662// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
663// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
664// of selects, indirect calls, mem ops and edges.
665template <class Edge, class BBInfo>
666void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
667 std::vector<uint8_t> Indexes;
668 JamCRC JC;
669 for (auto &BB : F) {
670 for (BasicBlock *Succ : successors(&BB)) {
671 auto BI = findBBInfo(Succ);
672 if (BI == nullptr)
673 continue;
674 uint32_t Index = BI->Index;
675 for (int J = 0; J < 4; J++)
676 Indexes.push_back((uint8_t)(Index >> (J * 8)));
677 }
678 }
679 JC.update(Indexes);
680
681 JamCRC JCH;
682 // The higher 32 bits.
683 auto updateJCH = [&JCH](uint64_t Num) {
684 uint8_t Data[8];
686 JCH.update(Data);
687 };
688 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
689 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
690 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
691 if (BCI) {
692 updateJCH(BCI->getInstrumentedBlocksHash());
693 } else {
694 updateJCH((uint64_t)MST.numEdges());
695 }
696
697 // Hash format for context sensitive profile. Reserve 4 bits for other
698 // information.
699 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
700
701 // Reserve bit 60-63 for other information purpose.
702 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
703 if (IsCS)
705 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
706 << " CRC = " << JC.getCRC()
707 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
708 << ", Edges = " << MST.numEdges() << ", ICSites = "
709 << ValueSites[IPVK_IndirectCallTarget].size()
710 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
711 << ", High32 CRC = " << JCH.getCRC()
712 << ", Hash = " << FunctionHash << "\n";);
713
714 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
715 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
716 << " in building " << F.getParent()->getSourceFileName() << "\n";
717}
718
719// Check if we can safely rename this Comdat function.
720static bool canRenameComdat(
721 Function &F,
722 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
723 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
724 return false;
725
726 // FIXME: Current only handle those Comdat groups that only containing one
727 // function.
728 // (1) For a Comdat group containing multiple functions, we need to have a
729 // unique postfix based on the hashes for each function. There is a
730 // non-trivial code refactoring to do this efficiently.
731 // (2) Variables can not be renamed, so we can not rename Comdat function in a
732 // group including global vars.
733 Comdat *C = F.getComdat();
734 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
735 assert(!isa<GlobalAlias>(CM.second));
736 Function *FM = dyn_cast<Function>(CM.second);
737 if (FM != &F)
738 return false;
739 }
740 return true;
741}
742
743// Append the CFGHash to the Comdat function name.
744template <class Edge, class BBInfo>
745void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
746 if (!canRenameComdat(F, ComdatMembers))
747 return;
748 std::string OrigName = F.getName().str();
749 std::string NewFuncName =
750 Twine(F.getName() + "." + Twine(FunctionHash)).str();
751 F.setName(Twine(NewFuncName));
753 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
754 Comdat *NewComdat;
755 Module *M = F.getParent();
756 // For AvailableExternallyLinkage functions, change the linkage to
757 // LinkOnceODR and put them into comdat. This is because after renaming, there
758 // is no backup external copy available for the function.
759 if (!F.hasComdat()) {
761 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
763 F.setComdat(NewComdat);
764 return;
765 }
766
767 // This function belongs to a single function Comdat group.
768 Comdat *OrigComdat = F.getComdat();
769 std::string NewComdatName =
770 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
771 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
772 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
773
774 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
775 // Must be a function.
776 cast<Function>(CM.second)->setComdat(NewComdat);
777 }
778}
779
780/// Collect all the BBs that will be instruments and add them to
781/// `InstrumentBBs`.
782template <class Edge, class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
784 std::vector<BasicBlock *> &InstrumentBBs) {
785 if (BCI) {
786 for (auto &BB : F)
787 if (BCI->shouldInstrumentBlock(BB))
788 InstrumentBBs.push_back(&BB);
789 return;
790 }
791
792 // Use a worklist as we will update the vector during the iteration.
793 std::vector<Edge *> EdgeList;
794 EdgeList.reserve(MST.numEdges());
795 for (const auto &E : MST.allEdges())
796 EdgeList.push_back(E.get());
797
798 for (auto &E : EdgeList) {
799 BasicBlock *InstrBB = getInstrBB(E);
800 if (InstrBB)
801 InstrumentBBs.push_back(InstrBB);
802 }
803}
804
805// Given a CFG E to be instrumented, find which BB to place the instrumented
806// code. The function will split the critical edge if necessary.
807template <class Edge, class BBInfo>
808BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
809 if (E->InMST || E->Removed)
810 return nullptr;
811
812 BasicBlock *SrcBB = E->SrcBB;
813 BasicBlock *DestBB = E->DestBB;
814 // For a fake edge, instrument the real BB.
815 if (SrcBB == nullptr)
816 return DestBB;
817 if (DestBB == nullptr)
818 return SrcBB;
819
820 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
821 // There are basic blocks (such as catchswitch) cannot be instrumented.
822 // If the returned first insertion point is the end of BB, skip this BB.
823 if (BB->getFirstInsertionPt() == BB->end())
824 return nullptr;
825 return BB;
826 };
827
828 // Instrument the SrcBB if it has a single successor,
829 // otherwise, the DestBB if this is not a critical edge.
830 Instruction *TI = SrcBB->getTerminator();
831 if (TI->getNumSuccessors() <= 1)
832 return canInstrument(SrcBB);
833 if (!E->IsCritical)
834 return canInstrument(DestBB);
835
836 // Some IndirectBr critical edges cannot be split by the previous
837 // SplitIndirectBrCriticalEdges call. Bail out.
838 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
839 BasicBlock *InstrBB =
840 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
841 if (!InstrBB) {
843 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
844 return nullptr;
845 }
846 // For a critical edge, we have to split. Instrument the newly
847 // created BB.
848 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
849 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
850 << " --> " << getBBInfo(DestBB).Index << "\n");
851 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
852 MST.addEdge(SrcBB, InstrBB, 0);
853 // Second one: Add new edge of InstrBB->DestBB.
854 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
855 NewEdge1.InMST = true;
856 E->Removed = true;
857
858 return canInstrument(InstrBB);
859}
860
861// When generating value profiling calls on Windows routines that make use of
862// handler funclets for exception processing an operand bundle needs to attached
863// to the called function. This routine will set \p OpBundles to contain the
864// funclet information, if any is needed, that should be placed on the generated
865// value profiling call for the value profile candidate call.
866static void
870 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
871 if (!OrigCall)
872 return;
873
874 if (!isa<IntrinsicInst>(OrigCall)) {
875 // The instrumentation call should belong to the same funclet as a
876 // non-intrinsic call, so just copy the operand bundle, if any exists.
877 std::optional<OperandBundleUse> ParentFunclet =
878 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
879 if (ParentFunclet)
880 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
881 } else {
882 // Intrinsics or other instructions do not get funclet information from the
883 // front-end. Need to use the BlockColors that was computed by the routine
884 // colorEHFunclets to determine whether a funclet is needed.
885 if (!BlockColors.empty()) {
886 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
887 assert(CV.size() == 1 && "non-unique color for block!");
888 Instruction *EHPad = CV.front()->getFirstNonPHI();
889 if (EHPad->isEHPad())
890 OpBundles.emplace_back("funclet", EHPad);
891 }
892 }
893}
894
895// Visit all edge and instrument the edges not in MST, and do value profiling.
896// Critical edges will be split.
897void FunctionInstrumenter::instrument() {
898 if (!PGOBlockCoverage) {
899 // Split indirectbr critical edges here before computing the MST rather than
900 // later in getInstrBB() to avoid invalidating it.
901 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
902 }
903
904 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
905 F, TLI, ComdatMembers, true, BPI, BFI,
906 InstrumentationType == PGOInstrumentationType::CSFDO,
907 shouldInstrumentEntryBB(), PGOBlockCoverage);
908
909 auto Name = FuncInfo.FuncNameVar;
910 auto CFGHash =
911 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
912 // Make sure that pointer to global is passed in with zero addrspace
913 // This is relevant during GPU profiling
914 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
915 Name, PointerType::get(M.getContext(), 0));
917 auto &EntryBB = F.getEntryBlock();
918 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
919 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
920 // i32 <index>)
921 Builder.CreateCall(
922 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_cover),
923 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
924 return;
925 }
926
927 std::vector<BasicBlock *> InstrumentBBs;
928 FuncInfo.getInstrumentBBs(InstrumentBBs);
929 unsigned NumCounters =
930 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
931
932 if (InstrumentationType == PGOInstrumentationType::CTXPROF) {
933 auto *CSIntrinsic =
934 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_callsite);
935 // We want to count the instrumentable callsites, then instrument them. This
936 // is because the llvm.instrprof.callsite intrinsic has an argument (like
937 // the other instrprof intrinsics) capturing the total number of
938 // instrumented objects (counters, or callsites, in this case). In this
939 // case, we want that value so we can readily pass it to the compiler-rt
940 // APIs that may have to allocate memory based on the nr of callsites.
941 // The traversal logic is the same for both counting and instrumentation,
942 // just needs to be done in succession.
943 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
944 for (auto &BB : F)
945 for (auto &Instr : BB)
946 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
947 if ((CS->getCalledFunction() &&
948 CS->getCalledFunction()->isIntrinsic()) ||
949 dyn_cast<InlineAsm>(CS->getCalledOperand()))
950 continue;
951 Visitor(CS);
952 }
953 };
954 // First, count callsites.
955 uint32_t TotalNrCallsites = 0;
956 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
957
958 // Now instrument.
959 uint32_t CallsiteIndex = 0;
960 Visit([&](auto *CB) {
961 IRBuilder<> Builder(CB);
962 Builder.CreateCall(CSIntrinsic,
963 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
964 Builder.getInt32(CallsiteIndex++),
965 CB->getCalledOperand()});
966 });
967 }
968
969 uint32_t I = 0;
971 NumCounters += PGOBlockCoverage ? 8 : 1;
972 auto &EntryBB = F.getEntryBlock();
973 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
974 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
975 // i32 <index>)
976 Builder.CreateCall(
977 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_timestamp),
978 {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
979 Builder.getInt32(I)});
980 I += PGOBlockCoverage ? 8 : 1;
981 }
982
983 for (auto *InstrBB : InstrumentBBs) {
984 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
985 assert(Builder.GetInsertPoint() != InstrBB->end() &&
986 "Cannot get the Instrumentation point");
987 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
988 // i32 <index>)
989 Builder.CreateCall(
991 ? Intrinsic::instrprof_cover
992 : Intrinsic::instrprof_increment),
993 {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
994 Builder.getInt32(I++)});
995 }
996
997 // Now instrument select instructions:
998 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
999 FuncInfo.FunctionHash);
1000 assert(I == NumCounters);
1001
1002 if (isValueProfilingDisabled())
1003 return;
1004
1005 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1006
1007 // Intrinsic function calls do not have funclet operand bundles needed for
1008 // Windows exception handling attached to them. However, if value profiling is
1009 // inserted for one of these calls, then a funclet value will need to be set
1010 // on the instrumentation call based on the funclet coloring.
1012 if (F.hasPersonalityFn() &&
1013 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1014 BlockColors = colorEHFunclets(F);
1015
1016 // For each VP Kind, walk the VP candidates and instrument each one.
1017 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1018 unsigned SiteIndex = 0;
1019 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1020 continue;
1021
1022 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1023 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1024 << " site: CallSite Index = " << SiteIndex << "\n");
1025
1026 IRBuilder<> Builder(Cand.InsertPt);
1027 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1028 "Cannot get the Instrumentation point");
1029
1030 Value *ToProfile = nullptr;
1031 if (Cand.V->getType()->isIntegerTy())
1032 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1033 else if (Cand.V->getType()->isPointerTy())
1034 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1035 assert(ToProfile && "value profiling Value is of unexpected type");
1036
1037 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1038 Name, PointerType::get(M.getContext(), 0));
1039
1041 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1042 Builder.CreateCall(
1043 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_value_profile),
1044 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1045 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1046 OpBundles);
1047 }
1048 } // IPVK_First <= Kind <= IPVK_Last
1049}
1050
1051namespace {
1052
1053// This class represents a CFG edge in profile use compilation.
1054struct PGOUseEdge : public PGOEdge {
1055 using PGOEdge::PGOEdge;
1056
1057 std::optional<uint64_t> Count;
1058
1059 // Set edge count value
1060 void setEdgeCount(uint64_t Value) { Count = Value; }
1061
1062 // Return the information string for this object.
1063 std::string infoString() const {
1064 if (!Count)
1065 return PGOEdge::infoString();
1066 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1067 }
1068};
1069
1070using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1071
1072// This class stores the auxiliary information for each BB.
1073struct PGOUseBBInfo : public PGOBBInfo {
1074 std::optional<uint64_t> Count;
1075 int32_t UnknownCountInEdge = 0;
1076 int32_t UnknownCountOutEdge = 0;
1077 DirectEdges InEdges;
1078 DirectEdges OutEdges;
1079
1080 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1081
1082 // Set the profile count value for this BB.
1083 void setBBInfoCount(uint64_t Value) { Count = Value; }
1084
1085 // Return the information string of this object.
1086 std::string infoString() const {
1087 if (!Count)
1088 return PGOBBInfo::infoString();
1089 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1090 }
1091
1092 // Add an OutEdge and update the edge count.
1093 void addOutEdge(PGOUseEdge *E) {
1094 OutEdges.push_back(E);
1095 UnknownCountOutEdge++;
1096 }
1097
1098 // Add an InEdge and update the edge count.
1099 void addInEdge(PGOUseEdge *E) {
1100 InEdges.push_back(E);
1101 UnknownCountInEdge++;
1102 }
1103};
1104
1105} // end anonymous namespace
1106
1107// Sum up the count values for all the edges.
1109 uint64_t Total = 0;
1110 for (const auto &E : Edges) {
1111 if (E->Removed)
1112 continue;
1113 if (E->Count)
1114 Total += *E->Count;
1115 }
1116 return Total;
1117}
1118
1119namespace {
1120
1121class PGOUseFunc {
1122public:
1123 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1124 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1126 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1127 bool HasSingleByteCoverage)
1128 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1129 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1130 InstrumentFuncEntry, HasSingleByteCoverage),
1131 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1132
1133 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1134
1135 // Read counts for the instrumented BB from profile.
1136 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1138
1139 // Populate the counts for all BBs.
1140 void populateCounters();
1141
1142 // Set block coverage based on profile coverage values.
1143 void populateCoverage(IndexedInstrProfReader *PGOReader);
1144
1145 // Set the branch weights based on the count values.
1146 void setBranchWeights();
1147
1148 // Annotate the value profile call sites for all value kind.
1149 void annotateValueSites();
1150
1151 // Annotate the value profile call sites for one value kind.
1152 void annotateValueSites(uint32_t Kind);
1153
1154 // Annotate the irreducible loop header weights.
1155 void annotateIrrLoopHeaderWeights();
1156
1157 // The hotness of the function from the profile count.
1158 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1159
1160 // Return the function hotness from the profile.
1161 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1162
1163 // Return the function hash.
1164 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1165
1166 // Return the profile record for this function;
1167 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1168
1169 // Return the auxiliary BB information.
1170 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1171 return FuncInfo.getBBInfo(BB);
1172 }
1173
1174 // Return the auxiliary BB information if available.
1175 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1176 return FuncInfo.findBBInfo(BB);
1177 }
1178
1179 Function &getFunc() const { return F; }
1180
1181 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1182
1183 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1184
1185private:
1186 Function &F;
1187 Module *M;
1189 ProfileSummaryInfo *PSI;
1190
1191 // This member stores the shared information with class PGOGenFunc.
1192 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1193
1194 // The maximum count value in the profile. This is only used in PGO use
1195 // compilation.
1196 uint64_t ProgramMaxCount;
1197
1198 // Position of counter that remains to be read.
1199 uint32_t CountPosition = 0;
1200
1201 // Total size of the profile count for this function.
1202 uint32_t ProfileCountSize = 0;
1203
1204 // ProfileRecord for this function.
1205 InstrProfRecord ProfileRecord;
1206
1207 // Function hotness info derived from profile.
1208 FuncFreqAttr FreqAttr;
1209
1210 // Is to use the context sensitive profile.
1211 bool IsCS;
1212
1214
1215 // Find the Instrumented BB and set the value. Return false on error.
1216 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1217
1218 // Set the edge counter value for the unknown edge -- there should be only
1219 // one unknown edge.
1220 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1221
1222 // Set the hot/cold inline hints based on the count values.
1223 // FIXME: This function should be removed once the functionality in
1224 // the inliner is implemented.
1225 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1226 if (PSI->isHotCount(EntryCount))
1227 FreqAttr = FFA_Hot;
1228 else if (PSI->isColdCount(MaxCount))
1229 FreqAttr = FFA_Cold;
1230 }
1231};
1232
1233} // end anonymous namespace
1234
1235/// Set up InEdges/OutEdges for all BBs in the MST.
1237 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1238 // This is not required when there is block coverage inference.
1239 if (FuncInfo.BCI)
1240 return;
1241 for (const auto &E : FuncInfo.MST.allEdges()) {
1242 if (E->Removed)
1243 continue;
1244 const BasicBlock *SrcBB = E->SrcBB;
1245 const BasicBlock *DestBB = E->DestBB;
1246 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1247 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1248 SrcInfo.addOutEdge(E.get());
1249 DestInfo.addInEdge(E.get());
1250 }
1251}
1252
1253// Visit all the edges and assign the count value for the instrumented
1254// edges and the BB. Return false on error.
1255bool PGOUseFunc::setInstrumentedCounts(
1256 const std::vector<uint64_t> &CountFromProfile) {
1257
1258 std::vector<BasicBlock *> InstrumentBBs;
1259 FuncInfo.getInstrumentBBs(InstrumentBBs);
1260
1261 setupBBInfoEdges(FuncInfo);
1262
1263 unsigned NumCounters =
1264 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1265 // The number of counters here should match the number of counters
1266 // in profile. Return if they mismatch.
1267 if (NumCounters != CountFromProfile.size()) {
1268 return false;
1269 }
1270 auto *FuncEntry = &*F.begin();
1271
1272 // Set the profile count to the Instrumented BBs.
1273 uint32_t I = 0;
1274 for (BasicBlock *InstrBB : InstrumentBBs) {
1275 uint64_t CountValue = CountFromProfile[I++];
1276 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1277 // If we reach here, we know that we have some nonzero count
1278 // values in this function. The entry count should not be 0.
1279 // Fix it if necessary.
1280 if (InstrBB == FuncEntry && CountValue == 0)
1281 CountValue = 1;
1282 Info.setBBInfoCount(CountValue);
1283 }
1284 ProfileCountSize = CountFromProfile.size();
1285 CountPosition = I;
1286
1287 // Set the edge count and update the count of unknown edges for BBs.
1288 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1289 E->setEdgeCount(Value);
1290 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1291 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1292 };
1293
1294 // Set the profile count the Instrumented edges. There are BBs that not in
1295 // MST but not instrumented. Need to set the edge count value so that we can
1296 // populate the profile counts later.
1297 for (const auto &E : FuncInfo.MST.allEdges()) {
1298 if (E->Removed || E->InMST)
1299 continue;
1300 const BasicBlock *SrcBB = E->SrcBB;
1301 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1302
1303 // If only one out-edge, the edge profile count should be the same as BB
1304 // profile count.
1305 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1306 setEdgeCount(E.get(), *SrcInfo.Count);
1307 else {
1308 const BasicBlock *DestBB = E->DestBB;
1309 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1310 // If only one in-edge, the edge profile count should be the same as BB
1311 // profile count.
1312 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1313 setEdgeCount(E.get(), *DestInfo.Count);
1314 }
1315 if (E->Count)
1316 continue;
1317 // E's count should have been set from profile. If not, this meenas E skips
1318 // the instrumentation. We set the count to 0.
1319 setEdgeCount(E.get(), 0);
1320 }
1321 return true;
1322}
1323
1324// Set the count value for the unknown edge. There should be one and only one
1325// unknown edge in Edges vector.
1326void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1327 for (auto &E : Edges) {
1328 if (E->Count)
1329 continue;
1330 E->setEdgeCount(Value);
1331
1332 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1333 getBBInfo(E->DestBB).UnknownCountInEdge--;
1334 return;
1335 }
1336 llvm_unreachable("Cannot find the unknown count edge");
1337}
1338
1339// Emit function metadata indicating PGO profile mismatch.
1341 const char MetadataName[] = "instr_prof_hash_mismatch";
1343 // If this metadata already exists, ignore.
1344 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1345 if (Existing) {
1346 MDTuple *Tuple = cast<MDTuple>(Existing);
1347 for (const auto &N : Tuple->operands()) {
1348 if (N.equalsStr(MetadataName))
1349 return;
1350 Names.push_back(N.get());
1351 }
1352 }
1353
1354 MDBuilder MDB(ctx);
1355 Names.push_back(MDB.createString(MetadataName));
1356 MDNode *MD = MDTuple::get(ctx, Names);
1357 F.setMetadata(LLVMContext::MD_annotation, MD);
1358}
1359
1360void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1361 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1362 auto &Ctx = M->getContext();
1363 auto Err = IPE.get();
1364 bool SkipWarning = false;
1365 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1366 << FuncInfo.FuncName << ": ");
1367 if (Err == instrprof_error::unknown_function) {
1368 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1369 SkipWarning = !PGOWarnMissing;
1370 LLVM_DEBUG(dbgs() << "unknown function");
1371 } else if (Err == instrprof_error::hash_mismatch ||
1372 Err == instrprof_error::malformed) {
1373 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1374 SkipWarning =
1377 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1379 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1380 << " skip=" << SkipWarning << ")");
1381 // Emit function metadata indicating PGO profile mismatch.
1382 annotateFunctionWithHashMismatch(F, M->getContext());
1383 }
1384
1385 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1386 if (SkipWarning)
1387 return;
1388
1389 std::string Msg =
1390 IPE.message() + std::string(" ") + F.getName().str() +
1391 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1392 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1393 std::string(" count discarded");
1394
1395 Ctx.diagnose(
1396 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1397 });
1398}
1399
1400// Read the profile from ProfileFileName and assign the value to the
1401// instrumented BB and the edges. This function also updates ProgramMaxCount.
1402// Return true if the profile are successfully read, and false on errors.
1403bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1405 auto &Ctx = M->getContext();
1406 uint64_t MismatchedFuncSum = 0;
1408 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1409 &MismatchedFuncSum);
1410 if (Error E = Result.takeError()) {
1411 handleInstrProfError(std::move(E), MismatchedFuncSum);
1412 return false;
1413 }
1414 ProfileRecord = std::move(Result.get());
1415 PseudoKind = ProfileRecord.getCountPseudoKind();
1416 if (PseudoKind != InstrProfRecord::NotPseudo) {
1417 return true;
1418 }
1419 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1420
1421 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1422 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1423
1424 uint64_t ValueSum = 0;
1425 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1426 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1427 ValueSum += CountFromProfile[I];
1428 }
1429 AllZeros = (ValueSum == 0);
1430
1431 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1432
1433 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1434 getBBInfo(nullptr).UnknownCountInEdge = 2;
1435
1436 if (!setInstrumentedCounts(CountFromProfile)) {
1437 LLVM_DEBUG(
1438 dbgs() << "Inconsistent number of counts, skipping this function");
1439 Ctx.diagnose(DiagnosticInfoPGOProfile(
1440 M->getName().data(),
1441 Twine("Inconsistent number of counts in ") + F.getName().str() +
1442 Twine(": the profile may be stale or there is a function name "
1443 "collision."),
1444 DS_Warning));
1445 return false;
1446 }
1447 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1448 return true;
1449}
1450
1451void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1452 uint64_t MismatchedFuncSum = 0;
1454 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1455 &MismatchedFuncSum);
1456 if (auto Err = Result.takeError()) {
1457 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1458 return;
1459 }
1460 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1461
1462 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1464 unsigned Index = 0;
1465 for (auto &BB : F)
1466 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1467 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1468 assert(Index == CountsFromProfile.size());
1469
1470 // For each B in InverseDependencies[A], if A is covered then B is covered.
1472 InverseDependencies;
1473 for (auto &BB : F) {
1474 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1475 // If Dep is covered then BB is covered.
1476 InverseDependencies[Dep].insert(&BB);
1477 }
1478 }
1479
1480 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1481 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1482 for (auto &[BB, IsCovered] : Coverage)
1483 if (IsCovered)
1484 CoveredBlocksToProcess.push(BB);
1485
1486 while (!CoveredBlocksToProcess.empty()) {
1487 auto *CoveredBlock = CoveredBlocksToProcess.top();
1488 assert(Coverage[CoveredBlock]);
1489 CoveredBlocksToProcess.pop();
1490 for (auto *BB : InverseDependencies[CoveredBlock]) {
1491 // If CoveredBlock is covered then BB is covered.
1492 if (Coverage[BB])
1493 continue;
1494 Coverage[BB] = true;
1495 CoveredBlocksToProcess.push(BB);
1496 }
1497 }
1498
1499 // Annotate block coverage.
1500 MDBuilder MDB(F.getContext());
1501 // We set the entry count to 10000 if the entry block is covered so that BFI
1502 // can propagate a fraction of this count to the other covered blocks.
1503 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1504 for (auto &BB : F) {
1505 // For a block A and its successor B, we set the edge weight as follows:
1506 // If A is covered and B is covered, set weight=1.
1507 // If A is covered and B is uncovered, set weight=0.
1508 // If A is uncovered, set weight=1.
1509 // This setup will allow BFI to give nonzero profile counts to only covered
1510 // blocks.
1512 for (auto *Succ : successors(&BB))
1513 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1514 if (Weights.size() >= 2)
1515 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1516 /*IsExpected=*/false);
1517 }
1518
1519 unsigned NumCorruptCoverage = 0;
1520 DominatorTree DT(F);
1521 LoopInfo LI(DT);
1522 BranchProbabilityInfo BPI(F, LI);
1523 BlockFrequencyInfo BFI(F, BPI, LI);
1524 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1525 if (auto C = BFI.getBlockProfileCount(&BB))
1526 return C == 0;
1527 return {};
1528 };
1529 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1530 for (auto &BB : F) {
1531 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1532 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1533 << "\n");
1534 // In some cases it is possible to find a covered block that has no covered
1535 // successors, e.g., when a block calls a function that may call exit(). In
1536 // those cases, BFI could find its successor to be covered while BCI could
1537 // find its successor to be dead.
1538 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1539 LLVM_DEBUG(
1540 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1541 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1542 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1543 ++NumCorruptCoverage;
1544 }
1545 if (Coverage[&BB])
1546 ++NumCoveredBlocks;
1547 }
1548 if (PGOVerifyBFI && NumCorruptCoverage) {
1549 auto &Ctx = M->getContext();
1550 Ctx.diagnose(DiagnosticInfoPGOProfile(
1551 M->getName().data(),
1552 Twine("Found inconsistent block coverage for function ") + F.getName() +
1553 " in " + Twine(NumCorruptCoverage) + " blocks.",
1554 DS_Warning));
1555 }
1557 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1558}
1559
1560// Populate the counters from instrumented BBs to all BBs.
1561// In the end of this operation, all BBs should have a valid count value.
1562void PGOUseFunc::populateCounters() {
1563 bool Changes = true;
1564 unsigned NumPasses = 0;
1565 while (Changes) {
1566 NumPasses++;
1567 Changes = false;
1568
1569 // For efficient traversal, it's better to start from the end as most
1570 // of the instrumented edges are at the end.
1571 for (auto &BB : reverse(F)) {
1572 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1573 if (UseBBInfo == nullptr)
1574 continue;
1575 if (!UseBBInfo->Count) {
1576 if (UseBBInfo->UnknownCountOutEdge == 0) {
1577 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1578 Changes = true;
1579 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1580 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1581 Changes = true;
1582 }
1583 }
1584 if (UseBBInfo->Count) {
1585 if (UseBBInfo->UnknownCountOutEdge == 1) {
1586 uint64_t Total = 0;
1587 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1588 // If the one of the successor block can early terminate (no-return),
1589 // we can end up with situation where out edge sum count is larger as
1590 // the source BB's count is collected by a post-dominated block.
1591 if (*UseBBInfo->Count > OutSum)
1592 Total = *UseBBInfo->Count - OutSum;
1593 setEdgeCount(UseBBInfo->OutEdges, Total);
1594 Changes = true;
1595 }
1596 if (UseBBInfo->UnknownCountInEdge == 1) {
1597 uint64_t Total = 0;
1598 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1599 if (*UseBBInfo->Count > InSum)
1600 Total = *UseBBInfo->Count - InSum;
1601 setEdgeCount(UseBBInfo->InEdges, Total);
1602 Changes = true;
1603 }
1604 }
1605 }
1606 }
1607
1608 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1609 (void)NumPasses;
1610#ifndef NDEBUG
1611 // Assert every BB has a valid counter.
1612 for (auto &BB : F) {
1613 auto BI = findBBInfo(&BB);
1614 if (BI == nullptr)
1615 continue;
1616 assert(BI->Count && "BB count is not valid");
1617 }
1618#endif
1619 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1620 uint64_t FuncMaxCount = FuncEntryCount;
1621 for (auto &BB : F) {
1622 auto BI = findBBInfo(&BB);
1623 if (BI == nullptr)
1624 continue;
1625 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1626 }
1627
1628 // Fix the obviously inconsistent entry count.
1629 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1630 FuncEntryCount = 1;
1632 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1633
1634 // Now annotate select instructions
1635 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1636 assert(CountPosition == ProfileCountSize);
1637
1638 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1639}
1640
1641// Assign the scaled count values to the BB with multiple out edges.
1642void PGOUseFunc::setBranchWeights() {
1643 // Generate MD_prof metadata for every branch instruction.
1644 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1645 << " IsCS=" << IsCS << "\n");
1646 for (auto &BB : F) {
1647 Instruction *TI = BB.getTerminator();
1648 if (TI->getNumSuccessors() < 2)
1649 continue;
1650 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1651 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1652 isa<CallBrInst>(TI)))
1653 continue;
1654
1655 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1656 if (!*BBCountInfo.Count)
1657 continue;
1658
1659 // We have a non-zero Branch BB.
1660 unsigned Size = BBCountInfo.OutEdges.size();
1661 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1662 uint64_t MaxCount = 0;
1663 for (unsigned s = 0; s < Size; s++) {
1664 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1665 const BasicBlock *SrcBB = E->SrcBB;
1666 const BasicBlock *DestBB = E->DestBB;
1667 if (DestBB == nullptr)
1668 continue;
1669 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1670 uint64_t EdgeCount = *E->Count;
1671 if (EdgeCount > MaxCount)
1672 MaxCount = EdgeCount;
1673 EdgeCounts[SuccNum] = EdgeCount;
1674 }
1675
1676 if (MaxCount)
1677 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1678 else {
1679 // A zero MaxCount can come about when we have a BB with a positive
1680 // count, and whose successor blocks all have 0 count. This can happen
1681 // when there is no exit block and the code exits via a noreturn function.
1682 auto &Ctx = M->getContext();
1683 Ctx.diagnose(DiagnosticInfoPGOProfile(
1684 M->getName().data(),
1685 Twine("Profile in ") + F.getName().str() +
1686 Twine(" partially ignored") +
1687 Twine(", possibly due to the lack of a return path."),
1688 DS_Warning));
1689 }
1690 }
1691}
1692
1694 for (BasicBlock *Pred : predecessors(BB)) {
1695 if (isa<IndirectBrInst>(Pred->getTerminator()))
1696 return true;
1697 }
1698 return false;
1699}
1700
1701void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1702 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1703 // Find irr loop headers
1704 for (auto &BB : F) {
1705 // As a heuristic also annotate indrectbr targets as they have a high chance
1706 // to become an irreducible loop header after the indirectbr tail
1707 // duplication.
1708 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1709 Instruction *TI = BB.getTerminator();
1710 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1711 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1712 }
1713 }
1714}
1715
1716void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1717 Module *M = F.getParent();
1718 IRBuilder<> Builder(&SI);
1719 Type *Int64Ty = Builder.getInt64Ty();
1720 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1721 auto *NormalizedFuncNameVarPtr =
1723 FuncNameVar, PointerType::get(M->getContext(), 0));
1724 Builder.CreateCall(
1725 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1726 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1727 Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
1728 ++(*CurCtrIdx);
1729}
1730
1731void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1732 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1733 assert(*CurCtrIdx < CountFromProfile.size() &&
1734 "Out of bound access of counters");
1735 uint64_t SCounts[2];
1736 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1737 ++(*CurCtrIdx);
1738 uint64_t TotalCount = 0;
1739 auto BI = UseFunc->findBBInfo(SI.getParent());
1740 if (BI != nullptr)
1741 TotalCount = *BI->Count;
1742 // False Count
1743 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1744 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1745 if (MaxCount)
1746 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1747}
1748
1749void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1750 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1751 return;
1752 // FIXME: do not handle this yet.
1753 if (SI.getCondition()->getType()->isVectorTy())
1754 return;
1755
1756 switch (Mode) {
1757 case VM_counting:
1758 NSIs++;
1759 return;
1760 case VM_instrument:
1761 instrumentOneSelectInst(SI);
1762 return;
1763 case VM_annotate:
1764 annotateOneSelectInst(SI);
1765 return;
1766 }
1767
1768 llvm_unreachable("Unknown visiting mode");
1769}
1770
1772 if (ValueProfKind == IPVK_MemOPSize)
1774 if (ValueProfKind == llvm::IPVK_VTableTarget)
1776 return MaxNumAnnotations;
1777}
1778
1779// Traverse all valuesites and annotate the instructions for all value kind.
1780void PGOUseFunc::annotateValueSites() {
1782 return;
1783
1784 // Create the PGOFuncName meta data.
1785 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1786
1787 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1788 annotateValueSites(Kind);
1789}
1790
1791// Annotate the instructions for a specific value kind.
1792void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1793 assert(Kind <= IPVK_Last);
1794 unsigned ValueSiteIndex = 0;
1795
1796 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1797
1798 // Since there isn't a reliable or fast way for profile reader to tell if a
1799 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1800 // value profile collector over the function IR to find the instrumented sites
1801 // iff function profile records shows the number of instrumented vtable sites
1802 // is not zero. Function cfg already takes the number of instrumented
1803 // indirect call sites into account so it doesn't hash the number of
1804 // instrumented vtables; as a side effect it makes it easier to enable
1805 // profiling and profile use in two steps if needed.
1806 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1807 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1808 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1810 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1811 auto &ValueSites = FuncInfo.ValueSites[Kind];
1812 if (NumValueSites != ValueSites.size()) {
1813 auto &Ctx = M->getContext();
1814 Ctx.diagnose(DiagnosticInfoPGOProfile(
1815 M->getName().data(),
1816 Twine("Inconsistent number of value sites for ") +
1817 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1818 F.getName().str() +
1819 Twine("\", possibly due to the use of a stale profile."),
1820 DS_Warning));
1821 return;
1822 }
1823
1824 for (VPCandidateInfo &I : ValueSites) {
1825 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1826 << "): Index = " << ValueSiteIndex << " out of "
1827 << NumValueSites << "\n");
1829 *M, *I.AnnotatedInst, ProfileRecord,
1830 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1831 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1832 ValueSiteIndex++;
1833 }
1834}
1835
1836// Collect the set of members for each Comdat in module M and store
1837// in ComdatMembers.
1839 Module &M,
1840 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1841 if (!DoComdatRenaming)
1842 return;
1843 for (Function &F : M)
1844 if (Comdat *C = F.getComdat())
1845 ComdatMembers.insert(std::make_pair(C, &F));
1846 for (GlobalVariable &GV : M.globals())
1847 if (Comdat *C = GV.getComdat())
1848 ComdatMembers.insert(std::make_pair(C, &GV));
1849 for (GlobalAlias &GA : M.aliases())
1850 if (Comdat *C = GA.getComdat())
1851 ComdatMembers.insert(std::make_pair(C, &GA));
1852}
1853
1854// Return true if we should not find instrumentation data for this function
1855static bool skipPGOUse(const Function &F) {
1856 if (F.isDeclaration())
1857 return true;
1858 // If there are too many critical edges, PGO might cause
1859 // compiler time problem. Skip PGO if the number of
1860 // critical edges execeed the threshold.
1861 unsigned NumCriticalEdges = 0;
1862 for (auto &BB : F) {
1863 const Instruction *TI = BB.getTerminator();
1864 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1865 if (isCriticalEdge(TI, I))
1866 NumCriticalEdges++;
1867 }
1868 }
1869 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1870 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1871 << ", NumCriticalEdges=" << NumCriticalEdges
1872 << " exceed the threshold. Skip PGO.\n");
1873 return true;
1874 }
1875 return false;
1876}
1877
1878// Return true if we should not instrument this function
1879static bool skipPGOGen(const Function &F) {
1880 if (skipPGOUse(F))
1881 return true;
1882 if (F.hasFnAttribute(llvm::Attribute::Naked))
1883 return true;
1884 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1885 return true;
1886 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1887 return true;
1888 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1889 return true;
1890 return false;
1891}
1892
1894 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1897 PGOInstrumentationType InstrumentationType) {
1898 // For the context-sensitve instrumentation, we should have a separated pass
1899 // (before LTO/ThinLTO linking) to create these variables.
1900 if (InstrumentationType == PGOInstrumentationType::FDO)
1901 createIRLevelProfileFlagVar(M, InstrumentationType);
1902
1903 Triple TT(M.getTargetTriple());
1904 LLVMContext &Ctx = M.getContext();
1905 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1907 M.getName().data(),
1908 Twine("VTable value profiling is presently not "
1909 "supported for non-ELF object formats"),
1910 DS_Warning));
1911 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1912 collectComdatMembers(M, ComdatMembers);
1913
1914 for (auto &F : M) {
1915 if (skipPGOGen(F))
1916 continue;
1917 auto &TLI = LookupTLI(F);
1918 auto *BPI = LookupBPI(F);
1919 auto *BFI = LookupBFI(F);
1920 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
1921 InstrumentationType);
1922 FI.instrument();
1923 }
1924 return true;
1925}
1926
1929 createProfileFileNameVar(M, CSInstrName);
1930 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1931 // will be retained.
1934 if (ProfileSampling)
1939 return PA;
1940}
1941
1944 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1945 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1947 };
1948 auto LookupBPI = [&FAM](Function &F) {
1950 };
1951 auto LookupBFI = [&FAM](Function &F) {
1953 };
1954
1955 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
1956 InstrumentationType))
1957 return PreservedAnalyses::all();
1958
1959 return PreservedAnalyses::none();
1960}
1961
1962// Using the ratio b/w sums of profile count values and BFI count values to
1963// adjust the func entry count.
1964static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1965 BranchProbabilityInfo &NBPI) {
1966 Function &F = Func.getFunc();
1967 BlockFrequencyInfo NBFI(F, NBPI, LI);
1968#ifndef NDEBUG
1969 auto BFIEntryCount = F.getEntryCount();
1970 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1971 "Invalid BFI Entrycount");
1972#endif
1973 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1974 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1975 for (auto &BBI : F) {
1976 uint64_t CountValue = 0;
1977 uint64_t BFICountValue = 0;
1978 if (!Func.findBBInfo(&BBI))
1979 continue;
1980 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1981 CountValue = *Func.getBBInfo(&BBI).Count;
1982 BFICountValue = *BFICount;
1983 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1984 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1985 }
1986 if (SumCount.isZero())
1987 return;
1988
1989 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1990 "Incorrect sum of BFI counts");
1991 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1992 return;
1993 double Scale = (SumCount / SumBFICount).convertToDouble();
1994 if (Scale < 1.001 && Scale > 0.999)
1995 return;
1996
1997 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1998 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1999 if (NewEntryCount == 0)
2000 NewEntryCount = 1;
2001 if (NewEntryCount != FuncEntryCount) {
2002 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2003 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2004 << ", entry_count " << FuncEntryCount << " --> "
2005 << NewEntryCount << "\n");
2006 }
2007}
2008
2009// Compare the profile count values with BFI count values, and print out
2010// the non-matching ones.
2011static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2013 uint64_t HotCountThreshold,
2015 Function &F = Func.getFunc();
2016 BlockFrequencyInfo NBFI(F, NBPI, LI);
2017 // bool PrintFunc = false;
2018 bool HotBBOnly = PGOVerifyHotBFI;
2019 StringRef Msg;
2021
2022 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2023 for (auto &BBI : F) {
2024 uint64_t CountValue = 0;
2025 uint64_t BFICountValue = 0;
2026
2027 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
2028
2029 BBNum++;
2030 if (CountValue)
2031 NonZeroBBNum++;
2032 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2033 if (BFICount)
2034 BFICountValue = *BFICount;
2035
2036 if (HotBBOnly) {
2037 bool rawIsHot = CountValue >= HotCountThreshold;
2038 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2039 bool rawIsCold = CountValue <= ColdCountThreshold;
2040 bool ShowCount = false;
2041 if (rawIsHot && !BFIIsHot) {
2042 Msg = "raw-Hot to BFI-nonHot";
2043 ShowCount = true;
2044 } else if (rawIsCold && BFIIsHot) {
2045 Msg = "raw-Cold to BFI-Hot";
2046 ShowCount = true;
2047 }
2048 if (!ShowCount)
2049 continue;
2050 } else {
2051 if ((CountValue < PGOVerifyBFICutoff) &&
2052 (BFICountValue < PGOVerifyBFICutoff))
2053 continue;
2054 uint64_t Diff = (BFICountValue >= CountValue)
2055 ? BFICountValue - CountValue
2056 : CountValue - BFICountValue;
2057 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2058 continue;
2059 }
2060 BBMisMatchNum++;
2061
2062 ORE.emit([&]() {
2064 F.getSubprogram(), &BBI);
2065 Remark << "BB " << ore::NV("Block", BBI.getName())
2066 << " Count=" << ore::NV("Count", CountValue)
2067 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2068 if (!Msg.empty())
2069 Remark << " (" << Msg << ")";
2070 return Remark;
2071 });
2072 }
2073 if (BBMisMatchNum)
2074 ORE.emit([&]() {
2075 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2076 F.getSubprogram(), &F.getEntryBlock())
2077 << "In Func " << ore::NV("Function", F.getName())
2078 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2079 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2080 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2081 });
2082}
2083
2085 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2086 vfs::FileSystem &FS,
2087 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2090 ProfileSummaryInfo *PSI, bool IsCS) {
2091 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2092 auto &Ctx = M.getContext();
2093 // Read the counter array from file.
2094 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2095 ProfileRemappingFileName);
2096 if (Error E = ReaderOrErr.takeError()) {
2097 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2098 Ctx.diagnose(
2099 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2100 });
2101 return false;
2102 }
2103
2104 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2105 std::move(ReaderOrErr.get());
2106 if (!PGOReader) {
2107 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2108 StringRef("Cannot get PGOReader")));
2109 return false;
2110 }
2111 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2112 return false;
2113
2114 // TODO: might need to change the warning once the clang option is finalized.
2115 if (!PGOReader->isIRLevelProfile()) {
2116 Ctx.diagnose(DiagnosticInfoPGOProfile(
2117 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2118 return false;
2119 }
2120 if (PGOReader->functionEntryOnly()) {
2121 Ctx.diagnose(DiagnosticInfoPGOProfile(
2122 ProfileFileName.data(),
2123 "Function entry profiles are not yet supported for optimization"));
2124 return false;
2125 }
2126
2128 for (GlobalVariable &G : M.globals()) {
2129 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2130 continue;
2131
2132 // Create the PGOFuncName meta data.
2133 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2134 }
2135 }
2136
2137 // Add the profile summary (read from the header of the indexed summary) here
2138 // so that we can use it below when reading counters (which checks if the
2139 // function should be marked with a cold or inlinehint attribute).
2140 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2143 PSI->refresh();
2144
2145 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2146 collectComdatMembers(M, ComdatMembers);
2147 std::vector<Function *> HotFunctions;
2148 std::vector<Function *> ColdFunctions;
2149
2150 // If the profile marked as always instrument the entry BB, do the
2151 // same. Note this can be overwritten by the internal option in CFGMST.h
2152 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2153 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2154 InstrumentFuncEntry = PGOInstrumentEntry;
2155
2156 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2157 for (auto &F : M) {
2158 if (skipPGOUse(F))
2159 continue;
2160 auto &TLI = LookupTLI(F);
2161 auto *BPI = LookupBPI(F);
2162 auto *BFI = LookupBFI(F);
2163 if (!HasSingleByteCoverage) {
2164 // Split indirectbr critical edges here before computing the MST rather
2165 // than later in getInstrBB() to avoid invalidating it.
2166 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2167 BFI);
2168 }
2169 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2170 InstrumentFuncEntry, HasSingleByteCoverage);
2171 if (HasSingleByteCoverage) {
2172 Func.populateCoverage(PGOReader.get());
2173 continue;
2174 }
2175 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2176 // it means the profile for the function is unrepresentative and this
2177 // function is actually hot / warm. We will reset the function hot / cold
2178 // attribute and drop all the profile counters.
2180 bool AllZeros = false;
2181 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2182 continue;
2183 if (AllZeros) {
2184 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2185 if (Func.getProgramMaxCount() != 0)
2186 ColdFunctions.push_back(&F);
2187 continue;
2188 }
2189 if (PseudoKind != InstrProfRecord::NotPseudo) {
2190 // Clear function attribute cold.
2191 if (F.hasFnAttribute(Attribute::Cold))
2192 F.removeFnAttr(Attribute::Cold);
2193 // Set function attribute as hot.
2194 if (PseudoKind == InstrProfRecord::PseudoHot)
2195 F.addFnAttr(Attribute::Hot);
2196 continue;
2197 }
2198 Func.populateCounters();
2199 Func.setBranchWeights();
2200 Func.annotateValueSites();
2201 Func.annotateIrrLoopHeaderWeights();
2202 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2203 if (FreqAttr == PGOUseFunc::FFA_Cold)
2204 ColdFunctions.push_back(&F);
2205 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2206 HotFunctions.push_back(&F);
2207 if (PGOViewCounts != PGOVCT_None &&
2208 (ViewBlockFreqFuncName.empty() ||
2209 F.getName() == ViewBlockFreqFuncName)) {
2211 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2212 std::make_unique<BranchProbabilityInfo>(F, LI);
2213 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2214 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2216 NewBFI->view();
2217 else if (PGOViewCounts == PGOVCT_Text) {
2218 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2219 NewBFI->print(dbgs());
2220 }
2221 }
2223 (ViewBlockFreqFuncName.empty() ||
2224 F.getName() == ViewBlockFreqFuncName)) {
2226 if (ViewBlockFreqFuncName.empty())
2227 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2228 else
2229 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2230 else if (PGOViewRawCounts == PGOVCT_Text) {
2231 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2232 Func.dumpInfo();
2233 }
2234 }
2235
2238 BranchProbabilityInfo NBPI(F, LI);
2239
2240 // Fix func entry count.
2241 if (PGOFixEntryCount)
2242 fixFuncEntryCount(Func, LI, NBPI);
2243
2244 // Verify BlockFrequency information.
2245 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2246 if (PGOVerifyHotBFI) {
2247 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2249 }
2250 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2251 }
2252 }
2253
2254 // Set function hotness attribute from the profile.
2255 // We have to apply these attributes at the end because their presence
2256 // can affect the BranchProbabilityInfo of any callers, resulting in an
2257 // inconsistent MST between prof-gen and prof-use.
2258 for (auto &F : HotFunctions) {
2259 F->addFnAttr(Attribute::InlineHint);
2260 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2261 << "\n");
2262 }
2263 for (auto &F : ColdFunctions) {
2264 // Only set when there is no Attribute::Hot set by the user. For Hot
2265 // attribute, user's annotation has the precedence over the profile.
2266 if (F->hasFnAttribute(Attribute::Hot)) {
2267 auto &Ctx = M.getContext();
2268 std::string Msg = std::string("Function ") + F->getName().str() +
2269 std::string(" is annotated as a hot function but"
2270 " the profile is cold");
2271 Ctx.diagnose(
2272 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2273 continue;
2274 }
2275 F->addFnAttr(Attribute::Cold);
2276 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2277 << "\n");
2278 }
2279 return true;
2280}
2281
2283 std::string Filename, std::string RemappingFilename, bool IsCS,
2285 : ProfileFileName(std::move(Filename)),
2286 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2287 FS(std::move(VFS)) {
2288 if (!PGOTestProfileFile.empty())
2289 ProfileFileName = PGOTestProfileFile;
2290 if (!PGOTestProfileRemappingFile.empty())
2291 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2292 if (!FS)
2294}
2295
2298
2299 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2300 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2302 };
2303 auto LookupBPI = [&FAM](Function &F) {
2305 };
2306 auto LookupBFI = [&FAM](Function &F) {
2308 };
2309
2310 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2311 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2312 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2313 return PreservedAnalyses::all();
2314
2315 return PreservedAnalyses::none();
2316}
2317
2318static std::string getSimpleNodeName(const BasicBlock *Node) {
2319 if (!Node->getName().empty())
2320 return Node->getName().str();
2321
2322 std::string SimpleNodeName;
2323 raw_string_ostream OS(SimpleNodeName);
2324 Node->printAsOperand(OS, false);
2325 return SimpleNodeName;
2326}
2327
2329 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2330 assert(MaxCount > 0 && "Bad max count");
2331 uint64_t Scale = calculateCountScale(MaxCount);
2333 for (const auto &ECI : EdgeCounts)
2334 Weights.push_back(scaleBranchCount(ECI, Scale));
2335
2336 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2337 : Weights) {
2338 dbgs() << W << " ";
2339 } dbgs() << "\n";);
2340
2341 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2342
2343 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2345 std::string BrCondStr = getBranchCondString(TI);
2346 if (BrCondStr.empty())
2347 return;
2348
2349 uint64_t WSum =
2350 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2351 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2352 uint64_t TotalCount =
2353 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2354 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2355 Scale = calculateCountScale(WSum);
2356 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2357 scaleBranchCount(WSum, Scale));
2358 std::string BranchProbStr;
2359 raw_string_ostream OS(BranchProbStr);
2360 OS << BP;
2361 OS << " (total count : " << TotalCount << ")";
2362 OS.flush();
2363 Function *F = TI->getParent()->getParent();
2365 ORE.emit([&]() {
2366 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2367 << BrCondStr << " is true with probability : " << BranchProbStr;
2368 });
2369 }
2370}
2371
2372namespace llvm {
2373
2375 MDBuilder MDB(M->getContext());
2376 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2377 MDB.createIrrLoopHeaderWeight(Count));
2378}
2379
2380template <> struct GraphTraits<PGOUseFunc *> {
2381 using NodeRef = const BasicBlock *;
2384
2385 static NodeRef getEntryNode(const PGOUseFunc *G) {
2386 return &G->getFunc().front();
2387 }
2388
2390 return succ_begin(N);
2391 }
2392
2393 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2394
2395 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2396 return nodes_iterator(G->getFunc().begin());
2397 }
2398
2399 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2400 return nodes_iterator(G->getFunc().end());
2401 }
2402};
2403
2404template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2405 explicit DOTGraphTraits(bool isSimple = false)
2407
2408 static std::string getGraphName(const PGOUseFunc *G) {
2409 return std::string(G->getFunc().getName());
2410 }
2411
2412 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2413 std::string Result;
2414 raw_string_ostream OS(Result);
2415
2416 OS << getSimpleNodeName(Node) << ":\\l";
2417 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2418 OS << "Count : ";
2419 if (BI && BI->Count)
2420 OS << *BI->Count << "\\l";
2421 else
2422 OS << "Unknown\\l";
2423
2424 if (!PGOInstrSelect)
2425 return Result;
2426
2427 for (const Instruction &I : *Node) {
2428 if (!isa<SelectInst>(&I))
2429 continue;
2430 // Display scaled counts for SELECT instruction:
2431 OS << "SELECT : { T = ";
2432 uint64_t TC, FC;
2433 bool HasProf = extractBranchWeights(I, TC, FC);
2434 if (!HasProf)
2435 OS << "Unknown, F = Unknown }\\l";
2436 else
2437 OS << TC << ", F = " << FC << " }\\l";
2438 }
2439 return Result;
2440 }
2441};
2442
2443} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, PGOInstrumentationType InstrumentationType)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:990
Class for arbitrary precision integers.
Definition: APInt.h:77
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2242
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:296
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:417
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1069
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
Tuple of metadata.
Definition: Metadata.h:1472
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1499
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:204
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1423
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1427
PGOInstrumentationType
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:482
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1297
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:56
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:275
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1479
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1502
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1856
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:253
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:831
std::vector< uint64_t > Counts
Definition: InstrProf.h:832
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:929
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1031
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1012