LLVM  16.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 // Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
51 #include "CFGMST.h"
52 #include "ValueProfileCollector.h"
53 #include "llvm/ADT/APInt.h"
54 #include "llvm/ADT/ArrayRef.h"
55 #include "llvm/ADT/STLExtras.h"
56 #include "llvm/ADT/SmallVector.h"
57 #include "llvm/ADT/Statistic.h"
58 #include "llvm/ADT/StringRef.h"
59 #include "llvm/ADT/Triple.h"
60 #include "llvm/ADT/Twine.h"
61 #include "llvm/ADT/iterator.h"
65 #include "llvm/Analysis/CFG.h"
67 #include "llvm/Analysis/LoopInfo.h"
73 #include "llvm/IR/Attributes.h"
74 #include "llvm/IR/BasicBlock.h"
75 #include "llvm/IR/CFG.h"
76 #include "llvm/IR/Comdat.h"
77 #include "llvm/IR/Constant.h"
78 #include "llvm/IR/Constants.h"
79 #include "llvm/IR/DiagnosticInfo.h"
80 #include "llvm/IR/Dominators.h"
81 #include "llvm/IR/Function.h"
82 #include "llvm/IR/GlobalAlias.h"
83 #include "llvm/IR/GlobalValue.h"
84 #include "llvm/IR/GlobalVariable.h"
85 #include "llvm/IR/IRBuilder.h"
86 #include "llvm/IR/InstVisitor.h"
87 #include "llvm/IR/InstrTypes.h"
88 #include "llvm/IR/Instruction.h"
89 #include "llvm/IR/Instructions.h"
90 #include "llvm/IR/IntrinsicInst.h"
91 #include "llvm/IR/Intrinsics.h"
92 #include "llvm/IR/LLVMContext.h"
93 #include "llvm/IR/MDBuilder.h"
94 #include "llvm/IR/Module.h"
95 #include "llvm/IR/PassManager.h"
96 #include "llvm/IR/ProfDataUtils.h"
97 #include "llvm/IR/ProfileSummary.h"
98 #include "llvm/IR/Type.h"
99 #include "llvm/IR/Value.h"
102 #include "llvm/Support/BLAKE3.h"
104 #include "llvm/Support/CRC.h"
105 #include "llvm/Support/Casting.h"
108 #include "llvm/Support/Debug.h"
109 #include "llvm/Support/Error.h"
118 #include <algorithm>
119 #include <cassert>
120 #include <cstdint>
121 #include <map>
122 #include <memory>
123 #include <numeric>
124 #include <set>
125 #include <string>
126 #include <unordered_map>
127 #include <utility>
128 #include <vector>
129 
130 using namespace llvm;
131 using namespace llvm::memprof;
134 
135 #define DEBUG_TYPE "pgo-instrumentation"
136 
137 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
138 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
139 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
140 STATISTIC(NumOfPGOEdge, "Number of edges.");
141 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
142 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
143 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
144 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
145 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
146 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
147 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
148 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
149 STATISTIC(NumOfCSPGOSelectInsts,
150  "Number of select instruction instrumented in CSPGO.");
151 STATISTIC(NumOfCSPGOMemIntrinsics,
152  "Number of mem intrinsics instrumented in CSPGO.");
153 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
154 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
155 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
156 STATISTIC(NumOfCSPGOFunc,
157  "Number of functions having valid profile counts in CSPGO.");
158 STATISTIC(NumOfCSPGOMismatch,
159  "Number of functions having mismatch profile in CSPGO.");
160 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
161 
162 // Command line option to specify the file to read profile from. This is
163 // mainly used for testing.
165  PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
166  cl::value_desc("filename"),
167  cl::desc("Specify the path of profile data file. This is"
168  "mainly for test purpose."));
170  "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
171  cl::value_desc("filename"),
172  cl::desc("Specify the path of profile remapping file. This is mainly for "
173  "test purpose."));
174 
175 // Command line option to disable value profiling. The default is false:
176 // i.e. value profiling is enabled by default. This is for debug purpose.
177 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
178  cl::Hidden,
179  cl::desc("Disable Value Profiling"));
180 
181 // Command line option to set the maximum number of VP annotations to write to
182 // the metadata for a single indirect call callsite.
184  "icp-max-annotations", cl::init(3), cl::Hidden,
185  cl::desc("Max number of annotations for a single indirect "
186  "call callsite"));
187 
188 // Command line option to set the maximum number of value annotations
189 // to write to the metadata for a single memop intrinsic.
191  "memop-max-annotations", cl::init(4), cl::Hidden,
192  cl::desc("Max number of preicise value annotations for a single memop"
193  "intrinsic"));
194 
195 // Command line option to control appending FunctionHash to the name of a COMDAT
196 // function. This is to avoid the hash mismatch caused by the preinliner.
198  "do-comdat-renaming", cl::init(false), cl::Hidden,
199  cl::desc("Append function hash to the name of COMDAT function to avoid "
200  "function hash mismatch due to the preinliner"));
201 
202 // Command line option to enable/disable the warning about missing profile
203 // information.
204 static cl::opt<bool>
205  PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
206  cl::desc("Use this option to turn on/off "
207  "warnings about missing profile data for "
208  "functions."));
209 
210 namespace llvm {
211 // Command line option to enable/disable the warning about a hash mismatch in
212 // the profile data.
214  NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
215  cl::desc("Use this option to turn off/on "
216  "warnings about profile cfg mismatch."));
217 } // namespace llvm
218 
219 // Command line option to enable/disable the warning about a hash mismatch in
220 // the profile data for Comdat functions, which often turns out to be false
221 // positive due to the pre-instrumentation inline.
223  "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
224  cl::desc("The option is used to turn on/off "
225  "warnings about hash mismatch for comdat "
226  "or weak functions."));
227 
228 // Command line option to enable/disable select instruction instrumentation.
229 static cl::opt<bool>
230  PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
231  cl::desc("Use this option to turn on/off SELECT "
232  "instruction instrumentation. "));
233 
234 // Command line option to turn on CFG dot or text dump of raw profile counts
236  "pgo-view-raw-counts", cl::Hidden,
237  cl::desc("A boolean option to show CFG dag or text "
238  "with raw profile counts from "
239  "profile data. See also option "
240  "-pgo-view-counts. To limit graph "
241  "display to only one function, use "
242  "filtering option -view-bfi-func-name."),
243  cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
244  clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
245  clEnumValN(PGOVCT_Text, "text", "show in text.")));
246 
247 // Command line option to enable/disable memop intrinsic call.size profiling.
248 static cl::opt<bool>
249  PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
250  cl::desc("Use this option to turn on/off "
251  "memory intrinsic size profiling."));
252 
253 // Emit branch probability as optimization remarks.
254 static cl::opt<bool>
255  EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
256  cl::desc("When this option is on, the annotated "
257  "branch probability will be emitted as "
258  "optimization remarks: -{Rpass|"
259  "pass-remarks}=pgo-instrumentation"));
260 
262  "pgo-instrument-entry", cl::init(false), cl::Hidden,
263  cl::desc("Force to instrument function entry basicblock."));
264 
266  "pgo-function-entry-coverage", cl::Hidden,
267  cl::desc(
268  "Use this option to enable function entry coverage instrumentation."));
269 
270 static cl::opt<bool>
271  PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
272  cl::desc("Fix function entry count in profile use."));
273 
275  "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
276  cl::desc("Print out the non-match BFI count if a hot raw profile count "
277  "becomes non-hot, or a cold raw profile count becomes hot. "
278  "The print is enabled under -Rpass-analysis=pgo, or "
279  "internal option -pass-remakrs-analysis=pgo."));
280 
282  "pgo-verify-bfi", cl::init(false), cl::Hidden,
283  cl::desc("Print out mismatched BFI counts after setting profile metadata "
284  "The print is enabled under -Rpass-analysis=pgo, or "
285  "internal option -pass-remakrs-analysis=pgo."));
286 
288  "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
289  cl::desc("Set the threshold for pgo-verify-bfi: only print out "
290  "mismatched BFI if the difference percentage is greater than "
291  "this value (in percentage)."));
292 
294  "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
295  cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
296  "profile count value is below."));
297 
299  "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
300  cl::value_desc("function name"),
301  cl::desc("Trace the hash of the function with this name."));
302 
304  "pgo-function-size-threshold", cl::Hidden,
305  cl::desc("Do not instrument functions smaller than this threshold."));
306 
308  "pgo-match-memprof", cl::init(true), cl::Hidden,
309  cl::desc("Perform matching and annotation of memprof profiles."));
310 
312  "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
313  cl::desc("Do not instrument functions with the number of critical edges "
314  " greater than this threshold."));
315 
316 namespace llvm {
317 // Command line option to turn on CFG dot dump after profile annotation.
318 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
320 
321 // Command line option to specify the name of the function for CFG dump
322 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
324 
326 } // namespace llvm
327 
328 static cl::opt<bool>
329  PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
330  cl::desc("Use the old CFG function hashing"));
331 
332 // Return a string describing the branch condition that can be
333 // used in static branch probability heuristics:
334 static std::string getBranchCondString(Instruction *TI) {
335  BranchInst *BI = dyn_cast<BranchInst>(TI);
336  if (!BI || !BI->isConditional())
337  return std::string();
338 
339  Value *Cond = BI->getCondition();
340  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
341  if (!CI)
342  return std::string();
343 
344  std::string result;
346  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
347  CI->getOperand(0)->getType()->print(OS, true);
348 
349  Value *RHS = CI->getOperand(1);
350  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
351  if (CV) {
352  if (CV->isZero())
353  OS << "_Zero";
354  else if (CV->isOne())
355  OS << "_One";
356  else if (CV->isMinusOne())
357  OS << "_MinusOne";
358  else
359  OS << "_Const";
360  }
361  OS.flush();
362  return result;
363 }
364 
365 static const char *ValueProfKindDescr[] = {
366 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
368 };
369 
370 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
371 // aware this is an ir_level profile so it can set the version flag.
373  const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
374  Type *IntTy64 = Type::getInt64Ty(M.getContext());
375  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
376  if (IsCS)
377  ProfileVersion |= VARIANT_MASK_CSIR_PROF;
378  if (PGOInstrumentEntry)
379  ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
380  if (DebugInfoCorrelate)
381  ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
383  ProfileVersion |=
384  VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
385  auto IRLevelVersionVariable = new GlobalVariable(
386  M, IntTy64, true, GlobalValue::WeakAnyLinkage,
387  Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
388  IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
389  Triple TT(M.getTargetTriple());
390  if (TT.supportsCOMDAT()) {
391  IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
392  IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
393  }
394  return IRLevelVersionVariable;
395 }
396 
397 namespace {
398 
399 /// The select instruction visitor plays three roles specified
400 /// by the mode. In \c VM_counting mode, it simply counts the number of
401 /// select instructions. In \c VM_instrument mode, it inserts code to count
402 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
403 /// it reads the profile data and annotate the select instruction with metadata.
404 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
405 class PGOUseFunc;
406 
407 /// Instruction Visitor class to visit select instructions.
408 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
409  Function &F;
410  unsigned NSIs = 0; // Number of select instructions instrumented.
411  VisitMode Mode = VM_counting; // Visiting mode.
412  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
413  unsigned TotalNumCtrs = 0; // Total number of counters
414  GlobalVariable *FuncNameVar = nullptr;
415  uint64_t FuncHash = 0;
416  PGOUseFunc *UseFunc = nullptr;
417 
418  SelectInstVisitor(Function &Func) : F(Func) {}
419 
420  void countSelects(Function &Func) {
421  NSIs = 0;
422  Mode = VM_counting;
423  visit(Func);
424  }
425 
426  // Visit the IR stream and instrument all select instructions. \p
427  // Ind is a pointer to the counter index variable; \p TotalNC
428  // is the total number of counters; \p FNV is the pointer to the
429  // PGO function name var; \p FHash is the function hash.
430  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
431  GlobalVariable *FNV, uint64_t FHash) {
432  Mode = VM_instrument;
433  CurCtrIdx = Ind;
434  TotalNumCtrs = TotalNC;
435  FuncHash = FHash;
436  FuncNameVar = FNV;
437  visit(Func);
438  }
439 
440  // Visit the IR stream and annotate all select instructions.
441  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
442  Mode = VM_annotate;
443  UseFunc = UF;
444  CurCtrIdx = Ind;
445  visit(Func);
446  }
447 
448  void instrumentOneSelectInst(SelectInst &SI);
449  void annotateOneSelectInst(SelectInst &SI);
450 
451  // Visit \p SI instruction and perform tasks according to visit mode.
452  void visitSelectInst(SelectInst &SI);
453 
454  // Return the number of select instructions. This needs be called after
455  // countSelects().
456  unsigned getNumOfSelectInsts() const { return NSIs; }
457 };
458 
459 } // end anonymous namespace
460 
461 namespace {
462 
463 /// An MST based instrumentation for PGO
464 ///
465 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
466 /// in the function level.
467 struct PGOEdge {
468  // This class implements the CFG edges. Note the CFG can be a multi-graph.
469  // So there might be multiple edges with same SrcBB and DestBB.
470  const BasicBlock *SrcBB;
471  const BasicBlock *DestBB;
472  uint64_t Weight;
473  bool InMST = false;
474  bool Removed = false;
475  bool IsCritical = false;
476 
477  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
478  : SrcBB(Src), DestBB(Dest), Weight(W) {}
479 
480  // Return the information string of an edge.
481  std::string infoString() const {
482  return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
483  (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
484  }
485 };
486 
487 // This class stores the auxiliary information for each BB.
488 struct BBInfo {
489  BBInfo *Group;
490  uint32_t Index;
491  uint32_t Rank = 0;
492 
493  BBInfo(unsigned IX) : Group(this), Index(IX) {}
494 
495  // Return the information string of this object.
496  std::string infoString() const {
497  return (Twine("Index=") + Twine(Index)).str();
498  }
499 
500  // Empty function -- only applicable to UseBBInfo.
501  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
502 
503  // Empty function -- only applicable to UseBBInfo.
504  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
505 };
506 
507 // This class implements the CFG edges. Note the CFG can be a multi-graph.
508 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
509 private:
510  Function &F;
511 
512  // Is this is context-sensitive instrumentation.
513  bool IsCS;
514 
515  // A map that stores the Comdat group in function F.
516  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
517 
519 
520  void computeCFGHash();
521  void renameComdatFunction();
522 
523 public:
524  const TargetLibraryInfo &TLI;
525  std::vector<std::vector<VPCandidateInfo>> ValueSites;
526  SelectInstVisitor SIVisitor;
527  std::string FuncName;
528  GlobalVariable *FuncNameVar;
529 
530  // CFG hash value for this function.
531  uint64_t FunctionHash = 0;
532 
533  // The Minimum Spanning Tree of function CFG.
535 
536  // Collect all the BBs that will be instrumented, and store them in
537  // InstrumentBBs.
538  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
539 
540  // Give an edge, find the BB that will be instrumented.
541  // Return nullptr if there is no BB to be instrumented.
542  BasicBlock *getInstrBB(Edge *E);
543 
544  // Return the auxiliary BB information.
545  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
546 
547  // Return the auxiliary BB information if available.
548  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
549 
550  // Dump edges and BB information.
551  void dumpInfo(std::string Str = "") const {
552  MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
553  Twine(FunctionHash) + "\t" + Str);
554  }
555 
556  FuncPGOInstrumentation(
557  Function &Func, TargetLibraryInfo &TLI,
558  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
559  bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
560  BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
561  bool InstrumentFuncEntry = true)
562  : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
563  TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
564  MST(F, InstrumentFuncEntry, BPI, BFI) {
565  // This should be done before CFG hash computation.
566  SIVisitor.countSelects(Func);
567  ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
568  if (!IsCS) {
569  NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
570  NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
571  NumOfPGOBB += MST.BBInfos.size();
572  ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
573  } else {
574  NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
575  NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
576  NumOfCSPGOBB += MST.BBInfos.size();
577  }
578 
579  FuncName = getPGOFuncName(F);
580  computeCFGHash();
581  if (!ComdatMembers.empty())
582  renameComdatFunction();
583  LLVM_DEBUG(dumpInfo("after CFGMST"));
584 
585  for (auto &E : MST.AllEdges) {
586  if (E->Removed)
587  continue;
588  IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
589  if (!E->InMST)
590  IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
591  }
592 
593  if (CreateGlobalVar)
594  FuncNameVar = createPGOFuncNameVar(F, FuncName);
595  }
596 };
597 
598 } // end anonymous namespace
599 
600 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
601 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
602 // of selects, indirect calls, mem ops and edges.
603 template <class Edge, class BBInfo>
604 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
605  std::vector<uint8_t> Indexes;
606  JamCRC JC;
607  for (auto &BB : F) {
608  const Instruction *TI = BB.getTerminator();
609  for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
610  BasicBlock *Succ = TI->getSuccessor(I);
611  auto BI = findBBInfo(Succ);
612  if (BI == nullptr)
613  continue;
614  uint32_t Index = BI->Index;
615  for (int J = 0; J < 4; J++)
616  Indexes.push_back((uint8_t)(Index >> (J * 8)));
617  }
618  }
619  JC.update(Indexes);
620 
621  JamCRC JCH;
622  if (PGOOldCFGHashing) {
623  // Hash format for context sensitive profile. Reserve 4 bits for other
624  // information.
625  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
626  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
627  //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
628  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
629  } else {
630  // The higher 32 bits.
631  auto updateJCH = [&JCH](uint64_t Num) {
632  uint8_t Data[8];
633  support::endian::write64le(Data, Num);
634  JCH.update(Data);
635  };
636  updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
637  updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
638  updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
639  updateJCH((uint64_t)MST.AllEdges.size());
640 
641  // Hash format for context sensitive profile. Reserve 4 bits for other
642  // information.
643  FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
644  }
645 
646  // Reserve bit 60-63 for other information purpose.
647  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
648  if (IsCS)
650  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
651  << " CRC = " << JC.getCRC()
652  << ", Selects = " << SIVisitor.getNumOfSelectInsts()
653  << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
654  << ValueSites[IPVK_IndirectCallTarget].size());
655  if (!PGOOldCFGHashing) {
656  LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
657  << ", High32 CRC = " << JCH.getCRC());
658  }
659  LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
660 
661  if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
662  dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
663  << " in building " << F.getParent()->getSourceFileName() << "\n";
664 }
665 
666 // Check if we can safely rename this Comdat function.
667 static bool canRenameComdat(
668  Function &F,
669  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
670  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
671  return false;
672 
673  // FIXME: Current only handle those Comdat groups that only containing one
674  // function.
675  // (1) For a Comdat group containing multiple functions, we need to have a
676  // unique postfix based on the hashes for each function. There is a
677  // non-trivial code refactoring to do this efficiently.
678  // (2) Variables can not be renamed, so we can not rename Comdat function in a
679  // group including global vars.
680  Comdat *C = F.getComdat();
681  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
682  assert(!isa<GlobalAlias>(CM.second));
683  Function *FM = dyn_cast<Function>(CM.second);
684  if (FM != &F)
685  return false;
686  }
687  return true;
688 }
689 
690 // Append the CFGHash to the Comdat function name.
691 template <class Edge, class BBInfo>
692 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
693  if (!canRenameComdat(F, ComdatMembers))
694  return;
695  std::string OrigName = F.getName().str();
696  std::string NewFuncName =
697  Twine(F.getName() + "." + Twine(FunctionHash)).str();
698  F.setName(Twine(NewFuncName));
700  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
701  Comdat *NewComdat;
702  Module *M = F.getParent();
703  // For AvailableExternallyLinkage functions, change the linkage to
704  // LinkOnceODR and put them into comdat. This is because after renaming, there
705  // is no backup external copy available for the function.
706  if (!F.hasComdat()) {
708  NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
710  F.setComdat(NewComdat);
711  return;
712  }
713 
714  // This function belongs to a single function Comdat group.
715  Comdat *OrigComdat = F.getComdat();
716  std::string NewComdatName =
717  Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
718  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
719  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
720 
721  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
722  // Must be a function.
723  cast<Function>(CM.second)->setComdat(NewComdat);
724  }
725 }
726 
727 // Collect all the BBs that will be instruments and return them in
728 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
729 template <class Edge, class BBInfo>
730 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
731  std::vector<BasicBlock *> &InstrumentBBs) {
732  // Use a worklist as we will update the vector during the iteration.
733  std::vector<Edge *> EdgeList;
734  EdgeList.reserve(MST.AllEdges.size());
735  for (auto &E : MST.AllEdges)
736  EdgeList.push_back(E.get());
737 
738  for (auto &E : EdgeList) {
739  BasicBlock *InstrBB = getInstrBB(E);
740  if (InstrBB)
741  InstrumentBBs.push_back(InstrBB);
742  }
743 
744  // Set up InEdges/OutEdges for all BBs.
745  for (auto &E : MST.AllEdges) {
746  if (E->Removed)
747  continue;
748  const BasicBlock *SrcBB = E->SrcBB;
749  const BasicBlock *DestBB = E->DestBB;
750  BBInfo &SrcInfo = getBBInfo(SrcBB);
751  BBInfo &DestInfo = getBBInfo(DestBB);
752  SrcInfo.addOutEdge(E.get());
753  DestInfo.addInEdge(E.get());
754  }
755 }
756 
757 // Given a CFG E to be instrumented, find which BB to place the instrumented
758 // code. The function will split the critical edge if necessary.
759 template <class Edge, class BBInfo>
761  if (E->InMST || E->Removed)
762  return nullptr;
763 
764  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
765  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
766  // For a fake edge, instrument the real BB.
767  if (SrcBB == nullptr)
768  return DestBB;
769  if (DestBB == nullptr)
770  return SrcBB;
771 
772  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
773  // There are basic blocks (such as catchswitch) cannot be instrumented.
774  // If the returned first insertion point is the end of BB, skip this BB.
775  if (BB->getFirstInsertionPt() == BB->end())
776  return nullptr;
777  return BB;
778  };
779 
780  // Instrument the SrcBB if it has a single successor,
781  // otherwise, the DestBB if this is not a critical edge.
782  Instruction *TI = SrcBB->getTerminator();
783  if (TI->getNumSuccessors() <= 1)
784  return canInstrument(SrcBB);
785  if (!E->IsCritical)
786  return canInstrument(DestBB);
787 
788  // Some IndirectBr critical edges cannot be split by the previous
789  // SplitIndirectBrCriticalEdges call. Bail out.
790  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
791  BasicBlock *InstrBB =
792  isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
793  if (!InstrBB) {
794  LLVM_DEBUG(
795  dbgs() << "Fail to split critical edge: not instrument this edge.\n");
796  return nullptr;
797  }
798  // For a critical edge, we have to split. Instrument the newly
799  // created BB.
800  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
801  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
802  << " --> " << getBBInfo(DestBB).Index << "\n");
803  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
804  MST.addEdge(SrcBB, InstrBB, 0);
805  // Second one: Add new edge of InstrBB->DestBB.
806  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
807  NewEdge1.InMST = true;
808  E->Removed = true;
809 
810  return canInstrument(InstrBB);
811 }
812 
813 // When generating value profiling calls on Windows routines that make use of
814 // handler funclets for exception processing an operand bundle needs to attached
815 // to the called function. This routine will set \p OpBundles to contain the
816 // funclet information, if any is needed, that should be placed on the generated
817 // value profiling call for the value profile candidate call.
818 static void
822  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
823  if (!OrigCall)
824  return;
825 
826  if (!isa<IntrinsicInst>(OrigCall)) {
827  // The instrumentation call should belong to the same funclet as a
828  // non-intrinsic call, so just copy the operand bundle, if any exists.
829  Optional<OperandBundleUse> ParentFunclet =
830  OrigCall->getOperandBundle(LLVMContext::OB_funclet);
831  if (ParentFunclet)
832  OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
833  } else {
834  // Intrinsics or other instructions do not get funclet information from the
835  // front-end. Need to use the BlockColors that was computed by the routine
836  // colorEHFunclets to determine whether a funclet is needed.
837  if (!BlockColors.empty()) {
838  const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
839  assert(CV.size() == 1 && "non-unique color for block!");
840  Instruction *EHPad = CV.front()->getFirstNonPHI();
841  if (EHPad->isEHPad())
842  OpBundles.emplace_back("funclet", EHPad);
843  }
844  }
845 }
846 
847 // Visit all edge and instrument the edges not in MST, and do value profiling.
848 // Critical edges will be split.
849 static void instrumentOneFunc(
852  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
853  bool IsCS) {
854  // Split indirectbr critical edges here before computing the MST rather than
855  // later in getInstrBB() to avoid invalidating it.
856  SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
857 
858  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
859  F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
860 
861  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
862  auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
863  auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
864  FuncInfo.FunctionHash);
866  auto &EntryBB = F.getEntryBlock();
867  IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
868  // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
869  // i32 <index>)
870  Builder.CreateCall(
871  Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
872  {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
873  return;
874  }
875 
876  std::vector<BasicBlock *> InstrumentBBs;
877  FuncInfo.getInstrumentBBs(InstrumentBBs);
878  unsigned NumCounters =
879  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
880 
881  uint32_t I = 0;
882  for (auto *InstrBB : InstrumentBBs) {
883  IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
884  assert(Builder.GetInsertPoint() != InstrBB->end() &&
885  "Cannot get the Instrumentation point");
886  // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
887  // i32 <index>)
888  Builder.CreateCall(
889  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
890  {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
891  }
892 
893  // Now instrument select instructions:
894  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
895  FuncInfo.FunctionHash);
896  assert(I == NumCounters);
897 
899  return;
900 
901  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
902 
903  // Intrinsic function calls do not have funclet operand bundles needed for
904  // Windows exception handling attached to them. However, if value profiling is
905  // inserted for one of these calls, then a funclet value will need to be set
906  // on the instrumentation call based on the funclet coloring.
908  if (F.hasPersonalityFn() &&
909  isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
910  BlockColors = colorEHFunclets(F);
911 
912  // For each VP Kind, walk the VP candidates and instrument each one.
913  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
914  unsigned SiteIndex = 0;
915  if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
916  continue;
917 
918  for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
919  LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
920  << " site: CallSite Index = " << SiteIndex << "\n");
921 
922  IRBuilder<> Builder(Cand.InsertPt);
923  assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
924  "Cannot get the Instrumentation point");
925 
926  Value *ToProfile = nullptr;
927  if (Cand.V->getType()->isIntegerTy())
928  ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
929  else if (Cand.V->getType()->isPointerTy())
930  ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
931  assert(ToProfile && "value profiling Value is of unexpected type");
932 
934  populateEHOperandBundle(Cand, BlockColors, OpBundles);
935  Builder.CreateCall(
936  Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
937  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
938  Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
939  Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
940  OpBundles);
941  }
942  } // IPVK_First <= Kind <= IPVK_Last
943 }
944 
945 namespace {
946 
947 // This class represents a CFG edge in profile use compilation.
948 struct PGOUseEdge : public PGOEdge {
949  bool CountValid = false;
950  uint64_t CountValue = 0;
951 
952  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
953  : PGOEdge(Src, Dest, W) {}
954 
955  // Set edge count value
956  void setEdgeCount(uint64_t Value) {
957  CountValue = Value;
958  CountValid = true;
959  }
960 
961  // Return the information string for this object.
962  std::string infoString() const {
963  if (!CountValid)
964  return PGOEdge::infoString();
965  return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
966  .str();
967  }
968 };
969 
970 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
971 
972 // This class stores the auxiliary information for each BB.
973 struct UseBBInfo : public BBInfo {
974  uint64_t CountValue = 0;
975  bool CountValid;
976  int32_t UnknownCountInEdge = 0;
977  int32_t UnknownCountOutEdge = 0;
978  DirectEdges InEdges;
979  DirectEdges OutEdges;
980 
981  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
982 
983  UseBBInfo(unsigned IX, uint64_t C)
984  : BBInfo(IX), CountValue(C), CountValid(true) {}
985 
986  // Set the profile count value for this BB.
987  void setBBInfoCount(uint64_t Value) {
988  CountValue = Value;
989  CountValid = true;
990  }
991 
992  // Return the information string of this object.
993  std::string infoString() const {
994  if (!CountValid)
995  return BBInfo::infoString();
996  return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
997  }
998 
999  // Add an OutEdge and update the edge count.
1000  void addOutEdge(PGOUseEdge *E) {
1001  OutEdges.push_back(E);
1002  UnknownCountOutEdge++;
1003  }
1004 
1005  // Add an InEdge and update the edge count.
1006  void addInEdge(PGOUseEdge *E) {
1007  InEdges.push_back(E);
1008  UnknownCountInEdge++;
1009  }
1010 };
1011 
1012 } // end anonymous namespace
1013 
1014 // Sum up the count values for all the edges.
1016  uint64_t Total = 0;
1017  for (const auto &E : Edges) {
1018  if (E->Removed)
1019  continue;
1020  Total += E->CountValue;
1021  }
1022  return Total;
1023 }
1024 
1025 namespace {
1026 
1027 class PGOUseFunc {
1028 public:
1029  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1030  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1032  ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1033  : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1034  FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1035  InstrumentFuncEntry),
1036  FreqAttr(FFA_Normal), IsCS(IsCS) {}
1037 
1038  // Read counts for the instrumented BB from profile.
1039  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1040  InstrProfRecord::CountPseudoKind &PseudoKind);
1041 
1042  // Read memprof data for the instrumented function from profile.
1043  bool readMemprof(IndexedInstrProfReader *PGOReader);
1044 
1045  // Populate the counts for all BBs.
1046  void populateCounters();
1047 
1048  // Set the branch weights based on the count values.
1049  void setBranchWeights();
1050 
1051  // Annotate the value profile call sites for all value kind.
1052  void annotateValueSites();
1053 
1054  // Annotate the value profile call sites for one value kind.
1055  void annotateValueSites(uint32_t Kind);
1056 
1057  // Annotate the irreducible loop header weights.
1058  void annotateIrrLoopHeaderWeights();
1059 
1060  // The hotness of the function from the profile count.
1061  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1062 
1063  // Return the function hotness from the profile.
1064  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1065 
1066  // Return the function hash.
1067  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1068 
1069  // Return the profile record for this function;
1070  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1071 
1072  // Return the auxiliary BB information.
1073  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1074  return FuncInfo.getBBInfo(BB);
1075  }
1076 
1077  // Return the auxiliary BB information if available.
1078  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1079  return FuncInfo.findBBInfo(BB);
1080  }
1081 
1082  Function &getFunc() const { return F; }
1083 
1084  void dumpInfo(std::string Str = "") const {
1085  FuncInfo.dumpInfo(Str);
1086  }
1087 
1088  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1089 private:
1090  Function &F;
1091  Module *M;
1093  ProfileSummaryInfo *PSI;
1094 
1095  // This member stores the shared information with class PGOGenFunc.
1096  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1097 
1098  // The maximum count value in the profile. This is only used in PGO use
1099  // compilation.
1100  uint64_t ProgramMaxCount;
1101 
1102  // Position of counter that remains to be read.
1103  uint32_t CountPosition = 0;
1104 
1105  // Total size of the profile count for this function.
1106  uint32_t ProfileCountSize = 0;
1107 
1108  // ProfileRecord for this function.
1109  InstrProfRecord ProfileRecord;
1110 
1111  // Function hotness info derived from profile.
1112  FuncFreqAttr FreqAttr;
1113 
1114  // Is to use the context sensitive profile.
1115  bool IsCS;
1116 
1117  // Find the Instrumented BB and set the value. Return false on error.
1118  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1119 
1120  // Set the edge counter value for the unknown edge -- there should be only
1121  // one unknown edge.
1122  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1123 
1124  // Return FuncName string;
1125  std::string getFuncName() const { return FuncInfo.FuncName; }
1126 
1127  // Set the hot/cold inline hints based on the count values.
1128  // FIXME: This function should be removed once the functionality in
1129  // the inliner is implemented.
1130  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1131  if (PSI->isHotCount(EntryCount))
1132  FreqAttr = FFA_Hot;
1133  else if (PSI->isColdCount(MaxCount))
1134  FreqAttr = FFA_Cold;
1135  }
1136 };
1137 
1138 } // end anonymous namespace
1139 
1140 // Visit all the edges and assign the count value for the instrumented
1141 // edges and the BB. Return false on error.
1142 bool PGOUseFunc::setInstrumentedCounts(
1143  const std::vector<uint64_t> &CountFromProfile) {
1144 
1145  std::vector<BasicBlock *> InstrumentBBs;
1146  FuncInfo.getInstrumentBBs(InstrumentBBs);
1147  unsigned NumCounters =
1148  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1149  // The number of counters here should match the number of counters
1150  // in profile. Return if they mismatch.
1151  if (NumCounters != CountFromProfile.size()) {
1152  return false;
1153  }
1154  auto *FuncEntry = &*F.begin();
1155 
1156  // Set the profile count to the Instrumented BBs.
1157  uint32_t I = 0;
1158  for (BasicBlock *InstrBB : InstrumentBBs) {
1159  uint64_t CountValue = CountFromProfile[I++];
1160  UseBBInfo &Info = getBBInfo(InstrBB);
1161  // If we reach here, we know that we have some nonzero count
1162  // values in this function. The entry count should not be 0.
1163  // Fix it if necessary.
1164  if (InstrBB == FuncEntry && CountValue == 0)
1165  CountValue = 1;
1166  Info.setBBInfoCount(CountValue);
1167  }
1168  ProfileCountSize = CountFromProfile.size();
1169  CountPosition = I;
1170 
1171  // Set the edge count and update the count of unknown edges for BBs.
1172  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1173  E->setEdgeCount(Value);
1174  this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1175  this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1176  };
1177 
1178  // Set the profile count the Instrumented edges. There are BBs that not in
1179  // MST but not instrumented. Need to set the edge count value so that we can
1180  // populate the profile counts later.
1181  for (auto &E : FuncInfo.MST.AllEdges) {
1182  if (E->Removed || E->InMST)
1183  continue;
1184  const BasicBlock *SrcBB = E->SrcBB;
1185  UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1186 
1187  // If only one out-edge, the edge profile count should be the same as BB
1188  // profile count.
1189  if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1190  setEdgeCount(E.get(), SrcInfo.CountValue);
1191  else {
1192  const BasicBlock *DestBB = E->DestBB;
1193  UseBBInfo &DestInfo = getBBInfo(DestBB);
1194  // If only one in-edge, the edge profile count should be the same as BB
1195  // profile count.
1196  if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1197  setEdgeCount(E.get(), DestInfo.CountValue);
1198  }
1199  if (E->CountValid)
1200  continue;
1201  // E's count should have been set from profile. If not, this meenas E skips
1202  // the instrumentation. We set the count to 0.
1203  setEdgeCount(E.get(), 0);
1204  }
1205  return true;
1206 }
1207 
1208 // Set the count value for the unknown edge. There should be one and only one
1209 // unknown edge in Edges vector.
1210 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1211  for (auto &E : Edges) {
1212  if (E->CountValid)
1213  continue;
1214  E->setEdgeCount(Value);
1215 
1216  getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1217  getBBInfo(E->DestBB).UnknownCountInEdge--;
1218  return;
1219  }
1220  llvm_unreachable("Cannot find the unknown count edge");
1221 }
1222 
1223 // Emit function metadata indicating PGO profile mismatch.
1225  LLVMContext &ctx) {
1226  const char MetadataName[] = "instr_prof_hash_mismatch";
1228  // If this metadata already exists, ignore.
1229  auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1230  if (Existing) {
1231  MDTuple *Tuple = cast<MDTuple>(Existing);
1232  for (const auto &N : Tuple->operands()) {
1233  if (cast<MDString>(N.get())->getString() == MetadataName)
1234  return;
1235  Names.push_back(N.get());
1236  }
1237  }
1238 
1239  MDBuilder MDB(ctx);
1240  Names.push_back(MDB.createString(MetadataName));
1241  MDNode *MD = MDTuple::get(ctx, Names);
1242  F.setMetadata(LLVMContext::MD_annotation, MD);
1243 }
1244 
1246  std::vector<uint64_t> &InlinedCallStack,
1247  LLVMContext &Ctx) {
1248  I.setMetadata(LLVMContext::MD_callsite,
1249  buildCallstackMetadata(InlinedCallStack, Ctx));
1250 }
1251 
1253  uint32_t Column) {
1255  HashBuilder;
1256  HashBuilder.add(Function, LineOffset, Column);
1258  uint64_t Id;
1259  std::memcpy(&Id, Hash.data(), sizeof(Hash));
1260  return Id;
1261 }
1262 
1265 }
1266 
1267 static void addCallStack(CallStackTrie &AllocTrie,
1268  const AllocationInfo *AllocInfo) {
1269  SmallVector<uint64_t> StackIds;
1270  for (auto StackFrame : AllocInfo->CallStack)
1271  StackIds.push_back(computeStackId(StackFrame));
1272  auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
1273  AllocInfo->Info.getMinSize(),
1274  AllocInfo->Info.getMinLifetime());
1275  AllocTrie.addCallStack(AllocType, StackIds);
1276 }
1277 
1278 // Helper to compare the InlinedCallStack computed from an instruction's debug
1279 // info to a list of Frames from profile data (either the allocation data or a
1280 // callsite). For callsites, the StartIndex to use in the Frame array may be
1281 // non-zero.
1282 static bool
1284  ArrayRef<uint64_t> InlinedCallStack,
1285  unsigned StartIndex = 0) {
1286  auto StackFrame = ProfileCallStack.begin() + StartIndex;
1287  auto InlCallStackIter = InlinedCallStack.begin();
1288  for (; StackFrame != ProfileCallStack.end() &&
1289  InlCallStackIter != InlinedCallStack.end();
1290  ++StackFrame, ++InlCallStackIter) {
1291  uint64_t StackId = computeStackId(*StackFrame);
1292  if (StackId != *InlCallStackIter)
1293  return false;
1294  }
1295  // Return true if we found and matched all stack ids from the call
1296  // instruction.
1297  return InlCallStackIter == InlinedCallStack.end();
1298 }
1299 
1300 bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
1301  if (!MatchMemProf)
1302  return true;
1303 
1304  auto &Ctx = M->getContext();
1305 
1306  auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
1307  Expected<memprof::MemProfRecord> MemProfResult =
1308  PGOReader->getMemProfRecord(FuncGUID);
1309  if (Error E = MemProfResult.takeError()) {
1310  handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1311  auto Err = IPE.get();
1312  bool SkipWarning = false;
1313  LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1314  << FuncInfo.FuncName << ": ");
1315  if (Err == instrprof_error::unknown_function) {
1316  NumOfMemProfMissing++;
1317  SkipWarning = !PGOWarnMissing;
1318  LLVM_DEBUG(dbgs() << "unknown function");
1319  } else if (Err == instrprof_error::hash_mismatch) {
1320  SkipWarning =
1323  (F.hasComdat() ||
1324  F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1325  LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1326  }
1327 
1328  if (SkipWarning)
1329  return;
1330 
1331  std::string Msg =
1332  (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
1333  std::to_string(FuncInfo.FunctionHash))
1334  .str();
1335 
1336  Ctx.diagnose(
1337  DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1338  });
1339  return false;
1340  }
1341 
1342  // Build maps of the location hash to all profile data with that leaf location
1343  // (allocation info and the callsites).
1344  std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1345  // For the callsites we need to record the index of the associated frame in
1346  // the frame array (see comments below where the map entries are added).
1347  std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
1348  LocHashToCallSites;
1349  const auto MemProfRec = std::move(MemProfResult.get());
1350  for (auto &AI : MemProfRec.AllocSites) {
1351  // Associate the allocation info with the leaf frame. The later matching
1352  // code will match any inlined call sequences in the IR with a longer prefix
1353  // of call stack frames.
1354  uint64_t StackId = computeStackId(AI.CallStack[0]);
1355  LocHashToAllocInfo[StackId].insert(&AI);
1356  }
1357  for (auto &CS : MemProfRec.CallSites) {
1358  // Need to record all frames from leaf up to and including this function,
1359  // as any of these may or may not have been inlined at this point.
1360  unsigned Idx = 0;
1361  for (auto &StackFrame : CS) {
1362  uint64_t StackId = computeStackId(StackFrame);
1363  LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
1364  // Once we find this function, we can stop recording.
1365  if (StackFrame.Function == FuncGUID)
1366  break;
1367  }
1368  assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1369  }
1370 
1371  auto GetOffset = [](const DILocation *DIL) {
1372  return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1373  0xffff;
1374  };
1375 
1376  // Now walk the instructions, looking up the associated profile data using
1377  // dbug locations.
1378  for (auto &BB : F) {
1379  for (auto &I : BB) {
1380  if (I.isDebugOrPseudoInst())
1381  continue;
1382  // We are only interested in calls (allocation or interior call stack
1383  // context calls).
1384  auto *CI = dyn_cast<CallBase>(&I);
1385  if (!CI)
1386  continue;
1387  auto *CalledFunction = CI->getCalledFunction();
1388  if (CalledFunction && CalledFunction->isIntrinsic())
1389  continue;
1390  // List of call stack ids computed from the location hashes on debug
1391  // locations (leaf to inlined at root).
1392  std::vector<uint64_t> InlinedCallStack;
1393  // Was the leaf location found in one of the profile maps?
1394  bool LeafFound = false;
1395  // If leaf was found in a map, iterators pointing to its location in both
1396  // of the maps. It might exist in neither, one, or both (the latter case
1397  // can happen because we don't currently have discriminators to
1398  // distinguish the case when a single line/col maps to both an allocation
1399  // and another callsite).
1400  std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
1401  AllocInfoIter;
1402  std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
1403  unsigned>>>::iterator CallSitesIter;
1404  for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1405  DIL = DIL->getInlinedAt()) {
1406  // Use C++ linkage name if possible. Need to compile with
1407  // -fdebug-info-for-profiling to get linkage name.
1408  StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1409  if (Name.empty())
1410  Name = DIL->getScope()->getSubprogram()->getName();
1411  auto CalleeGUID = Function::getGUID(Name);
1412  auto StackId =
1413  computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
1414  // LeafFound will only be false on the first iteration, since we either
1415  // set it true or break out of the loop below.
1416  if (!LeafFound) {
1417  AllocInfoIter = LocHashToAllocInfo.find(StackId);
1418  CallSitesIter = LocHashToCallSites.find(StackId);
1419  // Check if the leaf is in one of the maps. If not, no need to look
1420  // further at this call.
1421  if (AllocInfoIter == LocHashToAllocInfo.end() &&
1422  CallSitesIter == LocHashToCallSites.end())
1423  break;
1424  LeafFound = true;
1425  }
1426  InlinedCallStack.push_back(StackId);
1427  }
1428  // If leaf not in either of the maps, skip inst.
1429  if (!LeafFound)
1430  continue;
1431 
1432  // First add !memprof metadata from allocation info, if we found the
1433  // instruction's leaf location in that map, and if the rest of the
1434  // instruction's locations match the prefix Frame locations on an
1435  // allocation context with the same leaf.
1436  if (AllocInfoIter != LocHashToAllocInfo.end()) {
1437  // Only consider allocations via new, to reduce unnecessary metadata,
1438  // since those are the only allocations that will be targeted initially.
1439  if (!isNewLikeFn(CI, &FuncInfo.TLI))
1440  continue;
1441  // We may match this instruction's location list to multiple MIB
1442  // contexts. Add them to a Trie specialized for trimming the contexts to
1443  // the minimal needed to disambiguate contexts with unique behavior.
1444  CallStackTrie AllocTrie;
1445  for (auto *AllocInfo : AllocInfoIter->second) {
1446  // Check the full inlined call stack against this one.
1447  // If we found and thus matched all frames on the call, include
1448  // this MIB.
1450  InlinedCallStack))
1451  addCallStack(AllocTrie, AllocInfo);
1452  }
1453  // We might not have matched any to the full inlined call stack.
1454  // But if we did, create and attach metadata, or a function attribute if
1455  // all contexts have identical profiled behavior.
1456  if (!AllocTrie.empty()) {
1457  // MemprofMDAttached will be false if a function attribute was
1458  // attached.
1459  bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1460  assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1461  if (MemprofMDAttached) {
1462  // Add callsite metadata for the instruction's location list so that
1463  // it simpler later on to identify which part of the MIB contexts
1464  // are from this particular instruction (including during inlining,
1465  // when the callsite metdata will be updated appropriately).
1466  // FIXME: can this be changed to strip out the matching stack
1467  // context ids from the MIB contexts and not add any callsite
1468  // metadata here to save space?
1469  addCallsiteMetadata(I, InlinedCallStack, Ctx);
1470  }
1471  }
1472  continue;
1473  }
1474 
1475  // Otherwise, add callsite metadata. If we reach here then we found the
1476  // instruction's leaf location in the callsites map and not the allocation
1477  // map.
1478  assert(CallSitesIter != LocHashToCallSites.end());
1479  for (auto CallStackIdx : CallSitesIter->second) {
1480  // If we found and thus matched all frames on the call, create and
1481  // attach call stack metadata.
1483  *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
1484  addCallsiteMetadata(I, InlinedCallStack, Ctx);
1485  // Only need to find one with a matching call stack and add a single
1486  // callsite metadata.
1487  break;
1488  }
1489  }
1490  }
1491  }
1492 
1493  return true;
1494 }
1495 
1496 // Read the profile from ProfileFileName and assign the value to the
1497 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1498 // Return true if the profile are successfully read, and false on errors.
1499 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1500  InstrProfRecord::CountPseudoKind &PseudoKind) {
1501  auto &Ctx = M->getContext();
1502  uint64_t MismatchedFuncSum = 0;
1504  FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
1505  if (Error E = Result.takeError()) {
1506  handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1507  auto Err = IPE.get();
1508  bool SkipWarning = false;
1509  LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1510  << FuncInfo.FuncName << ": ");
1511  if (Err == instrprof_error::unknown_function) {
1512  IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1513  SkipWarning = !PGOWarnMissing;
1514  LLVM_DEBUG(dbgs() << "unknown function");
1515  } else if (Err == instrprof_error::hash_mismatch ||
1516  Err == instrprof_error::malformed) {
1517  IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1518  SkipWarning =
1521  (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1522  F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1523  LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1524  << " skip=" << SkipWarning << ")");
1525  // Emit function metadata indicating PGO profile mismatch.
1526  annotateFunctionWithHashMismatch(F, M->getContext());
1527  }
1528 
1529  LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1530  if (SkipWarning)
1531  return;
1532 
1533  std::string Msg =
1534  IPE.message() + std::string(" ") + F.getName().str() +
1535  std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1536  std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1537  std::string(" count discarded");
1538 
1539  Ctx.diagnose(
1540  DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1541  });
1542  return false;
1543  }
1544  ProfileRecord = std::move(Result.get());
1545  PseudoKind = ProfileRecord.getCountPseudoKind();
1546  if (PseudoKind != InstrProfRecord::NotPseudo) {
1547  return true;
1548  }
1549  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1550 
1551  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1552  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1553 
1554  uint64_t ValueSum = 0;
1555  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1556  LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1557  ValueSum += CountFromProfile[I];
1558  }
1559  AllZeros = (ValueSum == 0);
1560 
1561  LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1562 
1563  getBBInfo(nullptr).UnknownCountOutEdge = 2;
1564  getBBInfo(nullptr).UnknownCountInEdge = 2;
1565 
1566  if (!setInstrumentedCounts(CountFromProfile)) {
1567  LLVM_DEBUG(
1568  dbgs() << "Inconsistent number of counts, skipping this function");
1569  Ctx.diagnose(DiagnosticInfoPGOProfile(
1570  M->getName().data(),
1571  Twine("Inconsistent number of counts in ") + F.getName().str()
1572  + Twine(": the profile may be stale or there is a function name collision."),
1573  DS_Warning));
1574  return false;
1575  }
1576  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1577  return true;
1578 }
1579 
1580 // Populate the counters from instrumented BBs to all BBs.
1581 // In the end of this operation, all BBs should have a valid count value.
1582 void PGOUseFunc::populateCounters() {
1583  bool Changes = true;
1584  unsigned NumPasses = 0;
1585  while (Changes) {
1586  NumPasses++;
1587  Changes = false;
1588 
1589  // For efficient traversal, it's better to start from the end as most
1590  // of the instrumented edges are at the end.
1591  for (auto &BB : reverse(F)) {
1592  UseBBInfo *Count = findBBInfo(&BB);
1593  if (Count == nullptr)
1594  continue;
1595  if (!Count->CountValid) {
1596  if (Count->UnknownCountOutEdge == 0) {
1597  Count->CountValue = sumEdgeCount(Count->OutEdges);
1598  Count->CountValid = true;
1599  Changes = true;
1600  } else if (Count->UnknownCountInEdge == 0) {
1601  Count->CountValue = sumEdgeCount(Count->InEdges);
1602  Count->CountValid = true;
1603  Changes = true;
1604  }
1605  }
1606  if (Count->CountValid) {
1607  if (Count->UnknownCountOutEdge == 1) {
1608  uint64_t Total = 0;
1609  uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1610  // If the one of the successor block can early terminate (no-return),
1611  // we can end up with situation where out edge sum count is larger as
1612  // the source BB's count is collected by a post-dominated block.
1613  if (Count->CountValue > OutSum)
1614  Total = Count->CountValue - OutSum;
1615  setEdgeCount(Count->OutEdges, Total);
1616  Changes = true;
1617  }
1618  if (Count->UnknownCountInEdge == 1) {
1619  uint64_t Total = 0;
1620  uint64_t InSum = sumEdgeCount(Count->InEdges);
1621  if (Count->CountValue > InSum)
1622  Total = Count->CountValue - InSum;
1623  setEdgeCount(Count->InEdges, Total);
1624  Changes = true;
1625  }
1626  }
1627  }
1628  }
1629 
1630  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1631  (void) NumPasses;
1632 #ifndef NDEBUG
1633  // Assert every BB has a valid counter.
1634  for (auto &BB : F) {
1635  auto BI = findBBInfo(&BB);
1636  if (BI == nullptr)
1637  continue;
1638  assert(BI->CountValid && "BB count is not valid");
1639  }
1640 #endif
1641  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1642  uint64_t FuncMaxCount = FuncEntryCount;
1643  for (auto &BB : F) {
1644  auto BI = findBBInfo(&BB);
1645  if (BI == nullptr)
1646  continue;
1647  FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1648  }
1649 
1650  // Fix the obviously inconsistent entry count.
1651  if (FuncMaxCount > 0 && FuncEntryCount == 0)
1652  FuncEntryCount = 1;
1653  F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1654  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1655 
1656  // Now annotate select instructions
1657  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1658  assert(CountPosition == ProfileCountSize);
1659 
1660  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1661 }
1662 
1663 // Assign the scaled count values to the BB with multiple out edges.
1665  // Generate MD_prof metadata for every branch instruction.
1666  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1667  << " IsCS=" << IsCS << "\n");
1668  for (auto &BB : F) {
1669  Instruction *TI = BB.getTerminator();
1670  if (TI->getNumSuccessors() < 2)
1671  continue;
1672  if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1673  isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1674  isa<CallBrInst>(TI)))
1675  continue;
1676 
1677  if (getBBInfo(&BB).CountValue == 0)
1678  continue;
1679 
1680  // We have a non-zero Branch BB.
1681  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1682  unsigned Size = BBCountInfo.OutEdges.size();
1683  SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1684  uint64_t MaxCount = 0;
1685  for (unsigned s = 0; s < Size; s++) {
1686  const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1687  const BasicBlock *SrcBB = E->SrcBB;
1688  const BasicBlock *DestBB = E->DestBB;
1689  if (DestBB == nullptr)
1690  continue;
1691  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1692  uint64_t EdgeCount = E->CountValue;
1693  if (EdgeCount > MaxCount)
1694  MaxCount = EdgeCount;
1695  EdgeCounts[SuccNum] = EdgeCount;
1696  }
1697 
1698  if (MaxCount)
1699  setProfMetadata(M, TI, EdgeCounts, MaxCount);
1700  else {
1701  // A zero MaxCount can come about when we have a BB with a positive
1702  // count, and whose successor blocks all have 0 count. This can happen
1703  // when there is no exit block and the code exits via a noreturn function.
1704  auto &Ctx = M->getContext();
1705  Ctx.diagnose(DiagnosticInfoPGOProfile(
1706  M->getName().data(),
1707  Twine("Profile in ") + F.getName().str() +
1708  Twine(" partially ignored") +
1709  Twine(", possibly due to the lack of a return path."),
1710  DS_Warning));
1711  }
1712  }
1713 }
1714 
1716  for (BasicBlock *Pred : predecessors(BB)) {
1717  if (isa<IndirectBrInst>(Pred->getTerminator()))
1718  return true;
1719  }
1720  return false;
1721 }
1722 
1723 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1724  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1725  // Find irr loop headers
1726  for (auto &BB : F) {
1727  // As a heuristic also annotate indrectbr targets as they have a high chance
1728  // to become an irreducible loop header after the indirectbr tail
1729  // duplication.
1730  if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1731  Instruction *TI = BB.getTerminator();
1732  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1733  setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1734  }
1735  }
1736 }
1737 
1738 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1740  return;
1741  Module *M = F.getParent();
1743  Type *Int64Ty = Builder.getInt64Ty();
1744  Type *I8PtrTy = Builder.getInt8PtrTy();
1745  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1746  Builder.CreateCall(
1747  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1748  {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1749  Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1750  Builder.getInt32(*CurCtrIdx), Step});
1751  ++(*CurCtrIdx);
1752 }
1753 
1754 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1755  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1756  assert(*CurCtrIdx < CountFromProfile.size() &&
1757  "Out of bound access of counters");
1758  uint64_t SCounts[2];
1759  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1760  ++(*CurCtrIdx);
1761  uint64_t TotalCount = 0;
1762  auto BI = UseFunc->findBBInfo(SI.getParent());
1763  if (BI != nullptr)
1764  TotalCount = BI->CountValue;
1765  // False Count
1766  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1767  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1768  if (MaxCount)
1769  setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1770 }
1771 
1772 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1773  if (!PGOInstrSelect)
1774  return;
1775  // FIXME: do not handle this yet.
1776  if (SI.getCondition()->getType()->isVectorTy())
1777  return;
1778 
1779  switch (Mode) {
1780  case VM_counting:
1781  NSIs++;
1782  return;
1783  case VM_instrument:
1784  instrumentOneSelectInst(SI);
1785  return;
1786  case VM_annotate:
1787  annotateOneSelectInst(SI);
1788  return;
1789  }
1790 
1791  llvm_unreachable("Unknown visiting mode");
1792 }
1793 
1794 // Traverse all valuesites and annotate the instructions for all value kind.
1795 void PGOUseFunc::annotateValueSites() {
1797  return;
1798 
1799  // Create the PGOFuncName meta data.
1800  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1801 
1802  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1803  annotateValueSites(Kind);
1804 }
1805 
1806 // Annotate the instructions for a specific value kind.
1807 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1808  assert(Kind <= IPVK_Last);
1809  unsigned ValueSiteIndex = 0;
1810  auto &ValueSites = FuncInfo.ValueSites[Kind];
1811  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1812  if (NumValueSites != ValueSites.size()) {
1813  auto &Ctx = M->getContext();
1814  Ctx.diagnose(DiagnosticInfoPGOProfile(
1815  M->getName().data(),
1816  Twine("Inconsistent number of value sites for ") +
1817  Twine(ValueProfKindDescr[Kind]) +
1818  Twine(" profiling in \"") + F.getName().str() +
1819  Twine("\", possibly due to the use of a stale profile."),
1820  DS_Warning));
1821  return;
1822  }
1823 
1824  for (VPCandidateInfo &I : ValueSites) {
1825  LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1826  << "): Index = " << ValueSiteIndex << " out of "
1827  << NumValueSites << "\n");
1828  annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1829  static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1830  Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1831  : MaxNumAnnotations);
1832  ValueSiteIndex++;
1833  }
1834 }
1835 
1836 // Collect the set of members for each Comdat in module M and store
1837 // in ComdatMembers.
1839  Module &M,
1840  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1841  if (!DoComdatRenaming)
1842  return;
1843  for (Function &F : M)
1844  if (Comdat *C = F.getComdat())
1845  ComdatMembers.insert(std::make_pair(C, &F));
1846  for (GlobalVariable &GV : M.globals())
1847  if (Comdat *C = GV.getComdat())
1848  ComdatMembers.insert(std::make_pair(C, &GV));
1849  for (GlobalAlias &GA : M.aliases())
1850  if (Comdat *C = GA.getComdat())
1851  ComdatMembers.insert(std::make_pair(C, &GA));
1852 }
1853 
1854 // Don't perform PGO instrumeatnion / profile-use.
1855 static bool skipPGO(const Function &F) {
1856  if (F.isDeclaration())
1857  return true;
1858  if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1859  return true;
1860  if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1861  return true;
1862  if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1863  return true;
1864 
1865  // If there are too many critical edges, PGO might cause
1866  // compiler time problem. Skip PGO if the number of
1867  // critical edges execeed the threshold.
1868  unsigned NumCriticalEdges = 0;
1869  for (auto &BB : F) {
1870  const Instruction *TI = BB.getTerminator();
1871  for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1872  if (isCriticalEdge(TI, I))
1873  NumCriticalEdges++;
1874  }
1875  }
1876  if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1877  LLVM_DEBUG(dbgs() << "In func " << F.getName()
1878  << ", NumCriticalEdges=" << NumCriticalEdges
1879  << " exceed the threshold. Skip PGO.\n");
1880  return true;
1881  }
1882 
1883  return false;
1884 }
1885 
1887  Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1889  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1890  // For the context-sensitve instrumentation, we should have a separated pass
1891  // (before LTO/ThinLTO linking) to create these variables.
1892  if (!IsCS)
1893  createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1894  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1895  collectComdatMembers(M, ComdatMembers);
1896 
1897  for (auto &F : M) {
1898  if (skipPGO(F))
1899  continue;
1900  auto &TLI = LookupTLI(F);
1901  auto *BPI = LookupBPI(F);
1902  auto *BFI = LookupBFI(F);
1903  instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1904  }
1905  return true;
1906 }
1907 
1910  createProfileFileNameVar(M, CSInstrName);
1911  // The variable in a comdat may be discarded by LTO. Ensure the declaration
1912  // will be retained.
1914  return PreservedAnalyses::all();
1915 }
1916 
1918  ModuleAnalysisManager &AM) {
1919  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1920  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1922  };
1923  auto LookupBPI = [&FAM](Function &F) {
1925  };
1926  auto LookupBFI = [&FAM](Function &F) {
1928  };
1929 
1930  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1931  return PreservedAnalyses::all();
1932 
1933  return PreservedAnalyses::none();
1934 }
1935 
1936 // Using the ratio b/w sums of profile count values and BFI count values to
1937 // adjust the func entry count.
1938 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1939  BranchProbabilityInfo &NBPI) {
1940  Function &F = Func.getFunc();
1941  BlockFrequencyInfo NBFI(F, NBPI, LI);
1942 #ifndef NDEBUG
1943  auto BFIEntryCount = F.getEntryCount();
1944  assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1945  "Invalid BFI Entrycount");
1946 #endif
1947  auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1948  auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1949  for (auto &BBI : F) {
1950  uint64_t CountValue = 0;
1951  uint64_t BFICountValue = 0;
1952  if (!Func.findBBInfo(&BBI))
1953  continue;
1954  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1955  CountValue = Func.getBBInfo(&BBI).CountValue;
1956  BFICountValue = *BFICount;
1957  SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1958  SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1959  }
1960  if (SumCount.isZero())
1961  return;
1962 
1963  assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1964  "Incorrect sum of BFI counts");
1965  if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1966  return;
1967  double Scale = (SumCount / SumBFICount).convertToDouble();
1968  if (Scale < 1.001 && Scale > 0.999)
1969  return;
1970 
1971  uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1972  uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1973  if (NewEntryCount == 0)
1974  NewEntryCount = 1;
1975  if (NewEntryCount != FuncEntryCount) {
1976  F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1977  LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1978  << ", entry_count " << FuncEntryCount << " --> "
1979  << NewEntryCount << "\n");
1980  }
1981 }
1982 
1983 // Compare the profile count values with BFI count values, and print out
1984 // the non-matching ones.
1985 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1986  BranchProbabilityInfo &NBPI,
1987  uint64_t HotCountThreshold,
1989  Function &F = Func.getFunc();
1990  BlockFrequencyInfo NBFI(F, NBPI, LI);
1991  // bool PrintFunc = false;
1992  bool HotBBOnly = PGOVerifyHotBFI;
1993  std::string Msg;
1995 
1996  unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1997  for (auto &BBI : F) {
1998  uint64_t CountValue = 0;
1999  uint64_t BFICountValue = 0;
2000 
2001  if (Func.getBBInfo(&BBI).CountValid)
2002  CountValue = Func.getBBInfo(&BBI).CountValue;
2003 
2004  BBNum++;
2005  if (CountValue)
2006  NonZeroBBNum++;
2007  auto BFICount = NBFI.getBlockProfileCount(&BBI);
2008  if (BFICount)
2009  BFICountValue = *BFICount;
2010 
2011  if (HotBBOnly) {
2012  bool rawIsHot = CountValue >= HotCountThreshold;
2013  bool BFIIsHot = BFICountValue >= HotCountThreshold;
2014  bool rawIsCold = CountValue <= ColdCountThreshold;
2015  bool ShowCount = false;
2016  if (rawIsHot && !BFIIsHot) {
2017  Msg = "raw-Hot to BFI-nonHot";
2018  ShowCount = true;
2019  } else if (rawIsCold && BFIIsHot) {
2020  Msg = "raw-Cold to BFI-Hot";
2021  ShowCount = true;
2022  }
2023  if (!ShowCount)
2024  continue;
2025  } else {
2026  if ((CountValue < PGOVerifyBFICutoff) &&
2027  (BFICountValue < PGOVerifyBFICutoff))
2028  continue;
2029  uint64_t Diff = (BFICountValue >= CountValue)
2030  ? BFICountValue - CountValue
2031  : CountValue - BFICountValue;
2032  if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2033  continue;
2034  }
2035  BBMisMatchNum++;
2036 
2037  ORE.emit([&]() {
2039  F.getSubprogram(), &BBI);
2040  Remark << "BB " << ore::NV("Block", BBI.getName())
2041  << " Count=" << ore::NV("Count", CountValue)
2042  << " BFI_Count=" << ore::NV("Count", BFICountValue);
2043  if (!Msg.empty())
2044  Remark << " (" << Msg << ")";
2045  return Remark;
2046  });
2047  }
2048  if (BBMisMatchNum)
2049  ORE.emit([&]() {
2050  return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2051  F.getSubprogram(), &F.getEntryBlock())
2052  << "In Func " << ore::NV("Function", F.getName())
2053  << ": Num_of_BB=" << ore::NV("Count", BBNum)
2054  << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2055  << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2056  });
2057 }
2058 
2060  Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2061  function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2063  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2064  ProfileSummaryInfo *PSI, bool IsCS) {
2065  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2066  auto &Ctx = M.getContext();
2067  // Read the counter array from file.
2068  auto ReaderOrErr =
2069  IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
2070  if (Error E = ReaderOrErr.takeError()) {
2071  handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2072  Ctx.diagnose(
2073  DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2074  });
2075  return false;
2076  }
2077 
2078  std::unique_ptr<IndexedInstrProfReader> PGOReader =
2079  std::move(ReaderOrErr.get());
2080  if (!PGOReader) {
2081  Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2082  StringRef("Cannot get PGOReader")));
2083  return false;
2084  }
2085  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2086  return false;
2087 
2088  // TODO: might need to change the warning once the clang option is finalized.
2089  if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
2090  Ctx.diagnose(DiagnosticInfoPGOProfile(
2091  ProfileFileName.data(), "Not an IR level instrumentation profile"));
2092  return false;
2093  }
2094  if (PGOReader->hasSingleByteCoverage()) {
2095  Ctx.diagnose(DiagnosticInfoPGOProfile(
2096  ProfileFileName.data(),
2097  "Cannot use coverage profiles for optimization"));
2098  return false;
2099  }
2100  if (PGOReader->functionEntryOnly()) {
2101  Ctx.diagnose(DiagnosticInfoPGOProfile(
2102  ProfileFileName.data(),
2103  "Function entry profiles are not yet supported for optimization"));
2104  return false;
2105  }
2106 
2107  // Add the profile summary (read from the header of the indexed summary) here
2108  // so that we can use it below when reading counters (which checks if the
2109  // function should be marked with a cold or inlinehint attribute).
2110  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2113  PSI->refresh();
2114 
2115  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2116  collectComdatMembers(M, ComdatMembers);
2117  std::vector<Function *> HotFunctions;
2118  std::vector<Function *> ColdFunctions;
2119 
2120  // If the profile marked as always instrument the entry BB, do the
2121  // same. Note this can be overwritten by the internal option in CFGMST.h
2122  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2124  InstrumentFuncEntry = PGOInstrumentEntry;
2125  for (auto &F : M) {
2126  if (skipPGO(F))
2127  continue;
2128  auto &TLI = LookupTLI(F);
2129  auto *BPI = LookupBPI(F);
2130  auto *BFI = LookupBFI(F);
2131  // Split indirectbr critical edges here before computing the MST rather than
2132  // later in getInstrBB() to avoid invalidating it.
2133  SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
2134  PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2135  InstrumentFuncEntry);
2136  // Read and match memprof first since we do this via debug info and can
2137  // match even if there is an IR mismatch detected for regular PGO below.
2138  if (PGOReader->hasMemoryProfile())
2139  Func.readMemprof(PGOReader.get());
2140 
2141  if (!PGOReader->isIRLevelProfile())
2142  continue;
2143 
2144  // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2145  // it means the profile for the function is unrepresentative and this
2146  // function is actually hot / warm. We will reset the function hot / cold
2147  // attribute and drop all the profile counters.
2149  bool AllZeros = false;
2150  if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2151  continue;
2152  if (AllZeros) {
2153  F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2154  if (Func.getProgramMaxCount() != 0)
2155  ColdFunctions.push_back(&F);
2156  continue;
2157  }
2158  if (PseudoKind != InstrProfRecord::NotPseudo) {
2159  // Clear function attribute cold.
2160  if (F.hasFnAttribute(Attribute::Cold))
2161  F.removeFnAttr(Attribute::Cold);
2162  // Set function attribute as hot.
2163  if (PseudoKind == InstrProfRecord::PseudoHot)
2164  F.addFnAttr(Attribute::Hot);
2165  continue;
2166  }
2167  Func.populateCounters();
2168  Func.setBranchWeights();
2169  Func.annotateValueSites();
2170  Func.annotateIrrLoopHeaderWeights();
2171  PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2172  if (FreqAttr == PGOUseFunc::FFA_Cold)
2173  ColdFunctions.push_back(&F);
2174  else if (FreqAttr == PGOUseFunc::FFA_Hot)
2175  HotFunctions.push_back(&F);
2176  if (PGOViewCounts != PGOVCT_None &&
2177  (ViewBlockFreqFuncName.empty() ||
2178  F.getName().equals(ViewBlockFreqFuncName))) {
2179  LoopInfo LI{DominatorTree(F)};
2180  std::unique_ptr<BranchProbabilityInfo> NewBPI =
2181  std::make_unique<BranchProbabilityInfo>(F, LI);
2182  std::unique_ptr<BlockFrequencyInfo> NewBFI =
2183  std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2184  if (PGOViewCounts == PGOVCT_Graph)
2185  NewBFI->view();
2186  else if (PGOViewCounts == PGOVCT_Text) {
2187  dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2188  NewBFI->print(dbgs());
2189  }
2190  }
2191  if (PGOViewRawCounts != PGOVCT_None &&
2192  (ViewBlockFreqFuncName.empty() ||
2193  F.getName().equals(ViewBlockFreqFuncName))) {
2195  if (ViewBlockFreqFuncName.empty())
2196  WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2197  else
2198  ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2199  else if (PGOViewRawCounts == PGOVCT_Text) {
2200  dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2201  Func.dumpInfo();
2202  }
2203  }
2204 
2206  LoopInfo LI{DominatorTree(F)};
2207  BranchProbabilityInfo NBPI(F, LI);
2208 
2209  // Fix func entry count.
2210  if (PGOFixEntryCount)
2211  fixFuncEntryCount(Func, LI, NBPI);
2212 
2213  // Verify BlockFrequency information.
2214  uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2215  if (PGOVerifyHotBFI) {
2216  HotCountThreshold = PSI->getOrCompHotCountThreshold();
2218  }
2219  verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2220  }
2221  }
2222 
2223  // Set function hotness attribute from the profile.
2224  // We have to apply these attributes at the end because their presence
2225  // can affect the BranchProbabilityInfo of any callers, resulting in an
2226  // inconsistent MST between prof-gen and prof-use.
2227  for (auto &F : HotFunctions) {
2228  F->addFnAttr(Attribute::InlineHint);
2229  LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2230  << "\n");
2231  }
2232  for (auto &F : ColdFunctions) {
2233  // Only set when there is no Attribute::Hot set by the user. For Hot
2234  // attribute, user's annotation has the precedence over the profile.
2235  if (F->hasFnAttribute(Attribute::Hot)) {
2236  auto &Ctx = M.getContext();
2237  std::string Msg = std::string("Function ") + F->getName().str() +
2238  std::string(" is annotated as a hot function but"
2239  " the profile is cold");
2240  Ctx.diagnose(
2241  DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2242  continue;
2243  }
2244  F->addFnAttr(Attribute::Cold);
2245  LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2246  << "\n");
2247  }
2248  return true;
2249 }
2250 
2252  std::string RemappingFilename,
2253  bool IsCS)
2254  : ProfileFileName(std::move(Filename)),
2255  ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
2256  if (!PGOTestProfileFile.empty())
2257  ProfileFileName = PGOTestProfileFile;
2258  if (!PGOTestProfileRemappingFile.empty())
2259  ProfileRemappingFileName = PGOTestProfileRemappingFile;
2260 }
2261 
2263  ModuleAnalysisManager &AM) {
2264 
2265  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2266  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2268  };
2269  auto LookupBPI = [&FAM](Function &F) {
2271  };
2272  auto LookupBFI = [&FAM](Function &F) {
2274  };
2275 
2276  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
2277 
2278  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
2279  LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2280  return PreservedAnalyses::all();
2281 
2282  return PreservedAnalyses::none();
2283 }
2284 
2285 static std::string getSimpleNodeName(const BasicBlock *Node) {
2286  if (!Node->getName().empty())
2287  return std::string(Node->getName());
2288 
2289  std::string SimpleNodeName;
2290  raw_string_ostream OS(SimpleNodeName);
2291  Node->printAsOperand(OS, false);
2292  return OS.str();
2293 }
2294 
2296  ArrayRef<uint64_t> EdgeCounts,
2297  uint64_t MaxCount) {
2298  MDBuilder MDB(M->getContext());
2299  assert(MaxCount > 0 && "Bad max count");
2300  uint64_t Scale = calculateCountScale(MaxCount);
2301  SmallVector<unsigned, 4> Weights;
2302  for (const auto &ECI : EdgeCounts)
2303  Weights.push_back(scaleBranchCount(ECI, Scale));
2304 
2305  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2306  : Weights) {
2307  dbgs() << W << " ";
2308  } dbgs() << "\n";);
2309 
2310  misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2311 
2312  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2313  if (EmitBranchProbability) {
2314  std::string BrCondStr = getBranchCondString(TI);
2315  if (BrCondStr.empty())
2316  return;
2317 
2318  uint64_t WSum =
2319  std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2320  [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2321  uint64_t TotalCount =
2322  std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2323  [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2324  Scale = calculateCountScale(WSum);
2325  BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2326  scaleBranchCount(WSum, Scale));
2327  std::string BranchProbStr;
2328  raw_string_ostream OS(BranchProbStr);
2329  OS << BP;
2330  OS << " (total count : " << TotalCount << ")";
2331  OS.flush();
2332  Function *F = TI->getParent()->getParent();
2334  ORE.emit([&]() {
2335  return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2336  << BrCondStr << " is true with probability : " << BranchProbStr;
2337  });
2338  }
2339 }
2340 
2341 namespace llvm {
2342 
2344  MDBuilder MDB(M->getContext());
2345  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2346  MDB.createIrrLoopHeaderWeight(Count));
2347 }
2348 
2349 template <> struct GraphTraits<PGOUseFunc *> {
2350  using NodeRef = const BasicBlock *;
2353 
2354  static NodeRef getEntryNode(const PGOUseFunc *G) {
2355  return &G->getFunc().front();
2356  }
2357 
2359  return succ_begin(N);
2360  }
2361 
2362  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2363 
2364  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2365  return nodes_iterator(G->getFunc().begin());
2366  }
2367 
2368  static nodes_iterator nodes_end(const PGOUseFunc *G) {
2369  return nodes_iterator(G->getFunc().end());
2370  }
2371 };
2372 
2373 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2374  explicit DOTGraphTraits(bool isSimple = false)
2376 
2377  static std::string getGraphName(const PGOUseFunc *G) {
2378  return std::string(G->getFunc().getName());
2379  }
2380 
2381  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2382  std::string Result;
2383  raw_string_ostream OS(Result);
2384 
2385  OS << getSimpleNodeName(Node) << ":\\l";
2386  UseBBInfo *BI = Graph->findBBInfo(Node);
2387  OS << "Count : ";
2388  if (BI && BI->CountValid)
2389  OS << BI->CountValue << "\\l";
2390  else
2391  OS << "Unknown\\l";
2392 
2393  if (!PGOInstrSelect)
2394  return Result;
2395 
2396  for (const Instruction &I : *Node) {
2397  if (!isa<SelectInst>(&I))
2398  continue;
2399  // Display scaled counts for SELECT instruction:
2400  OS << "SELECT : { T = ";
2401  uint64_t TC, FC;
2402  bool HasProf = extractBranchWeights(I, TC, FC);
2403  if (!HasProf)
2404  OS << "Unknown, F = Unknown }\\l";
2405  else
2406  OS << TC << ", F = " << FC << " }\\l";
2407  }
2408  return Result;
2409  }
2410 };
2411 
2412 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
DoComdatRenaming
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
llvm::SuccIterator
Definition: CFG.h:138
llvm::isNewLikeFn
bool isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory via new.
Definition: MemoryBuiltins.cpp:309
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:459
Instrumentation.h
llvm::createPGOFuncNameMetadata
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1122
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:308
llvm::PGOInstrumentationUse::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:2262
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:814
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
fixFuncEntryCount
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
Definition: PGOInstrumentation.cpp:1938
llvm::DOTGraphTraits< PGOUseFunc * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: PGOInstrumentation.cpp:2374
llvm::APFloatBase::cmpGreaterThan
@ cmpGreaterThan
Definition: APFloat.h:193
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
c2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps c2(%esp) ... xorps %xmm0
Comdat.h
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::Comdat::getName
StringRef getName() const
Definition: Comdat.cpp:28
llvm::JamCRC::update
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
ProfileCount
Function::ProfileCount ProfileCount
Definition: PGOInstrumentation.cpp:132
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::memprof::Frame::LineOffset
uint32_t LineOffset
Definition: MemProf.h:150
llvm::InstrProfError::message
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:256
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::InstrProfRecord::getCountPseudoKind
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:836
llvm::GlobalValue::HiddenVisibility
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
llvm::CFGMST::getBBInfo
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
llvm::Function
Definition: Function.h:60
ProfileSummary.h
StringRef.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:629
llvm::memprof::getAllocType
AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize, uint64_t MinLifetime)
Return the allocation type for a given set of memory profile values.
Definition: MemoryProfileInfo.cpp:34
llvm::DOTGraphTraits< PGOUseFunc * >::getNodeLabel
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
Definition: PGOInstrumentation.cpp:2381
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::ProfileSummaryInfo::getOrCompHotCountThreshold
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:319
PGOTestProfileRemappingFile
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
llvm::getPGOFuncName
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:300
sumEdgeCount
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
Definition: PGOInstrumentation.cpp:1015
ErrorHandling.h
llvm::InstrProfError::get
instrprof_error get() const
Definition: InstrProf.h:353
llvm::IRBuilder<>
PGOVerifyBFI
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2202
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
llvm::GlobalAlias
Definition: GlobalAlias.h:28
Error.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createPGOFuncNameVar
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:379
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:72
llvm::coverage::accessors::getFuncHash
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
Definition: CoverageMapping.h:774
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
APInt.h
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1595
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
PGOInstrSelect
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1793
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:101
Module.h
MemoryBuiltins.h
llvm::GraphTraits< PGOUseFunc * >::nodes_end
static nodes_iterator nodes_end(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2368
EHPersonalities.h
llvm::memprof::CallStackTrie::buildAndAttachMIBMetadata
bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Definition: MemoryProfileInfo.cpp:210
llvm::CFGMST::AllEdges
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
llvm::Optional
Definition: APInt.h:33
llvm::LLVMContext::OB_funclet
@ OB_funclet
Definition: LLVMContext.h:90
llvm::InstrProfRecord::PseudoHot
@ PseudoHot
Definition: InstrProf.h:829
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::JamCRC
Definition: CRC.h:45
llvm::GraphTraits< PGOUseFunc * >::child_end
static ChildIteratorType child_end(const NodeRef N)
Definition: PGOInstrumentation.cpp:2362
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::isCriticalEdge
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:41
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::createProfileFileNameVar
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1205
llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:4096
CFGMST.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
llvm::PGOVCT_Text
@ PGOVCT_Text
Definition: BlockFrequencyInfo.h:33
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::InstrProfRecord
Profiling information for a single function.
Definition: InstrProf.h:730
F
#define F(x, y, z)
Definition: MD5.cpp:55
InstrProfData.inc
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1456
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::memprof::CallStackTrie::empty
bool empty() const
Definition: MemoryProfileInfo.h:80
PGOWarnMissing
static cl::opt< bool > PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about missing profile data for " "functions."))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:22
llvm::memprof::AllocationInfo
Definition: MemProf.h:293
Instruction.h
PGOInstrumentEntry
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
CommandLine.h
llvm::APFloat::getZero
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:900
llvm::IndexedInstrProfReader::getInstrProfRecord
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Definition: InstrProfReader.cpp:1031
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::PGOVCT_Graph
@ PGOVCT_Graph
Definition: BlockFrequencyInfo.h:33
addCallsiteMetadata
static void addCallsiteMetadata(Instruction &I, std::vector< uint64_t > &InlinedCallStack, LLVMContext &Ctx)
Definition: PGOInstrumentation.cpp:1245
llvm::BranchProbabilityAnalysis
Analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:412
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::ProfileSummaryInfo::isColdCount
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
Definition: ProfileSummaryInfo.cpp:295
GlobalValue.h
llvm::calculateCountScale
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
Definition: Instrumentation.h:164
PGOViewRawCounts
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Constants.h
llvm::ValueProfileCollector::CandidateInfo::AnnotatedInst
Instruction * AnnotatedInst
Definition: ValueProfileCollector.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
getBranchCondString
static std::string getBranchCondString(Instruction *TI)
Definition: PGOInstrumentation.cpp:334
llvm::support::little
@ little
Definition: Endian.h:27
llvm::PGOInstrumentationGen::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1917
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::setIrrLoopHeaderMetadata
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
Definition: PGOInstrumentation.cpp:2343
Twine.h
InstrTypes.h
llvm::Type::print
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Definition: AsmWriter.cpp:4651
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1290
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::MDTuple
Tuple of metadata.
Definition: Metadata.h:1329
llvm::canRenameComdatFunc
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1182
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::InstrProfRecord::getNumValueSites
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:945
MatchMemProf
static cl::opt< bool > MatchMemProf("pgo-match-memprof", cl::init(true), cl::Hidden, cl::desc("Perform matching and annotation of memprof profiles."))
TargetLibraryInfo.h
CRC.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::HashBuilderBase::final
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition: HashBuilder.h:66
llvm::Instruction
Definition: Instruction.h:42
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
InstrProf.h
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:111
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
PGOFixEntryCount
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::HashBuilderImpl::add
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilderImpl & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Definition: HashBuilder.h:109
llvm::OperandBundleDef
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
HashBuilder.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CFGMST
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
llvm::Comdat
Definition: Comdat.h:33
llvm::scaleBranchCount
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
Definition: Instrumentation.h:174
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::GraphTraits< PGOUseFunc * >::getEntryNode
static NodeRef getEntryNode(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2354
PGOFunctionCriticalEdgeThreshold
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::ErrorInfoBase
Base class for error info classes.
Definition: Error.h:46
llvm::ErrorInfoBase::message
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:54
Type.h
BranchProbability.h
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3213
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
getSimpleNodeName
static std::string getSimpleNodeName(const BasicBlock *Node)
Definition: PGOInstrumentation.cpp:2285
PGOVerifyBFICutoff
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
EmitBranchProbability
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
instrumentOneFunc
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
Definition: PGOInstrumentation.cpp:849
llvm::MDBuilder::createString
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
PGOVerifyHotBFI
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::pointer_iterator
Definition: iterator.h:344
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::APFloat
Definition: APFloat.h:716
InstrProfReader.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:826
llvm::memprof::Frame
Definition: MemProf.h:142
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
BranchProbabilityInfo.h
llvm::PGOViewCounts
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Definition: PGOInstrumentation.cpp:319
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
llvm::ProfileSummaryInfo::isHotCount
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Definition: ProfileSummaryInfo.cpp:291
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
MaxNumMemOPAnnotations
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
uint64_t
llvm::colorEHFunclets
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
Definition: EHPersonalities.cpp:85
ProfileSummaryInfo.h
PGOOldCFGHashing
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
setBranchWeights
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
Definition: SimplifyCFG.cpp:864
MisExpect.h
llvm::IndexedInstrProfReader
Reader for the indexed binary instrprof format.
Definition: InstrProfReader.h:579
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
llvm::InstrProfError
Definition: InstrProf.h:338
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
ProfDataUtils.h
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
PGOInstrumentation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::InstrProfRecord::NotPseudo
@ NotPseudo
Definition: InstrProf.h:828
iterator.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
PGOVerifyBFIRatio
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
llvm::HashBuilder
HashBuilderImpl< HasherT,(Endianness==support::endianness::native ? support::endian::system_endianness() :Endianness)> HashBuilder
Interface to help hash various types through a hasher type.
Definition: HashBuilder.h:412
ArrayRef.h
llvm::misexpect::checkExpectAnnotations
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
PGOFunctionEntryCoverage
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::DiagnosticInfoPGOProfile
Diagnostic information for the PGO profiler.
Definition: DiagnosticInfo.h:274
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1861
llvm::PGOVCT_None
@ PGOVCT_None
Definition: BlockFrequencyInfo.h:33
llvm::instrprof_error::unknown_function
@ unknown_function
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::SplitIndirectBrCriticalEdges
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Definition: BreakCriticalEdges.cpp:338
PGOTraceFuncHash
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
llvm::WriteGraph
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1735
iterator_range.h
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:264
llvm::MDTuple::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1357
annotateAllFunctions
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
Definition: PGOInstrumentation.cpp:2059
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:66
llvm::InstrProfValueKind
InstrProfValueKind
Definition: InstrProf.h:244
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
bool empty() const
Definition: DenseMap.h:98
llvm::IndexedInstrProfReader::create
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Definition: InstrProfReader.cpp:119
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOInstrumentation.cpp:135
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Triple.h
llvm::GetSuccessorNumber
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
CFG.h
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:212
PGOFunctionSizeThreshold
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
skipPGO
static bool skipPGO(const Function &F)
Definition: PGOInstrumentation.cpp:1855
llvm::HashBuilderImpl
Implementation of the HashBuilder interface.
Definition: HashBuilder.h:94
llvm::ProfileSummaryInfo::refresh
void refresh()
If no summary is present, attempt to refresh.
Definition: ProfileSummaryInfo.cpp:59
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::DOTGraphTraits< PGOUseFunc * >::getGraphName
static std::string getGraphName(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2377
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::NamedInstrProfRecord::setCSFlagInHash
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:926
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:204
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
PGOInstrMemOP
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
computeStackId
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
Definition: PGOInstrumentation.cpp:1252
llvm::Expected::get
reference get()
Returns a reference to the stored T value.
Definition: Error.h:566
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::PGOInstrumentationUse::PGOInstrumentationUse
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false)
Definition: PGOInstrumentation.cpp:2251
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:206
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::DebugInfoCorrelate
cl::opt< bool > DebugInfoCorrelate("debug-info-correlate", cl::desc("Use debug info to correlate profiles."), cl::init(false))
Definition: PGOInstrumentation.cpp:325
BlockFrequencyInfo.h
llvm::ProfileSummary::PSK_CSInstr
@ PSK_CSInstr
Definition: ProfileSummary.h:47
llvm::GlobalValue::AvailableExternallyLinkage
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::instrprof_error::hash_mismatch
@ hash_mismatch
llvm::NoPGOWarnMismatch
cl::opt< bool > NoPGOWarnMismatch
llvm::GraphTraits< PGOUseFunc * >::nodes_begin
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2364
llvm::PGOInstrumentationGenCreateVar::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1909
llvm::memprof::Frame::Function
GlobalValue::GUID Function
Definition: MemProf.h:145
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1017
llvm::memprof::buildCallstackMetadata
MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
Definition: MemoryProfileInfo.cpp:44
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
annotateFunctionWithHashMismatch
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
Definition: PGOInstrumentation.cpp:1224
llvm::memprof::Frame::Column
uint32_t Column
Definition: MemProf.h:153
MaxNumAnnotations
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
llvm::memprof
Definition: MemoryProfileInfo.h:24
Attributes.h
llvm::ValueProfileCollector
Utility analysis that determines what values are worth profiling.
Definition: ValueProfileCollector.h:57
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
ValueProfKindDescr
static const char * ValueProfKindDescr[]
Definition: PGOInstrumentation.cpp:365
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
GraphWriter.h
std
Definition: BitVector.h:851
ValueProfileCollector.h
llvm::BLAKE3Result
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition: BLAKE3.h:35
llvm::JamCRC::getCRC
uint32_t getCRC() const
Definition: CRC.h:52
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:511
MemoryProfileInfo.h
canRenameComdat
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:667
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:152
BLAKE3.h
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::ProfileSummaryInfo::getOrCompColdCountThreshold
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:323
GlobalVariable.h
llvm::GlobalValue::getGUID
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:591
Casting.h
llvm::IndexedInstrProfReader::getMemProfRecord
Expected< memprof::MemProfRecord > getMemProfRecord(uint64_t FuncNameHash)
Return the memprof record for the function identified by llvm::md5(Name).
Definition: InstrProfReader.cpp:1077
DiagnosticInfo.h
Function.h
DOTGraphTraits.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
llvm::ValueProfileCollector::CandidateInfo
Definition: ValueProfileCollector.h:59
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:669
getInstrBB
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
Definition: GCOVProfiling.cpp:702
llvm::cl::value_desc
Definition: CommandLine.h:422
llvm::CFGMST::BBInfos
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
llvm::CFGMST::addEdge
Edge & addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:787
stackFrameIncludesInlinedCallStack
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack, unsigned StartIndex=0)
Definition: PGOInstrumentation.cpp:1283
GlobalAlias.h
AllocType
AllocType
Definition: MemoryBuiltins.cpp:55
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::Comdat::setSelectionKind
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
llvm::Expected::takeError
Error takeError()
Take ownership of the stored error.
Definition: Error.h:596
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:200
NoPGOWarnMismatchComdatWeak
static cl::opt< bool > NoPGOWarnMismatchComdatWeak("no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "or weak functions."))
Instructions.h
llvm::Constant::getIntegerValue
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:378
llvm::support::endian::write64le
void write64le(void *P, uint64_t V)
Definition: Endian.h:417
SmallVector.h
llvm::GraphTraits< PGOUseFunc * >::child_begin
static ChildIteratorType child_begin(const NodeRef N)
Definition: PGOInstrumentation.cpp:2358
llvm::memprof::CallStackTrie
Class to build a trie of call stack contexts for a particular profiled allocation call,...
Definition: MemoryProfileInfo.h:45
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:208
Dominators.h
DisableValueProfiling
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
ModuleUtils.h
llvm::ProfileSummary::PSK_Instr
@ PSK_Instr
Definition: ProfileSummary.h:47
N
#define N
createIRLevelProfileFlagVar
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
Definition: PGOInstrumentation.cpp:372
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:810
PGOTestProfileFile
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:85
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:2295
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28
llvm::instrprof_error::malformed
@ malformed
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
verifyFuncBFI
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
Definition: PGOInstrumentation.cpp:1985
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:931
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
llvm::InstrProfRecord::CountPseudoKind
CountPseudoKind
Definition: InstrProf.h:827
populateEHOperandBundle
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
Definition: PGOInstrumentation.cpp:819
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ViewBlockFreqFuncName
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
Definition: MachineBlockFrequencyInfo.cpp:66
llvm::AllocInfo
Summary of memprof metadata on allocations.
Definition: ModuleSummaryIndex.h:341
isIndirectBrTarget
static bool isIndirectBrTarget(BasicBlock *BB)
Definition: PGOInstrumentation.cpp:1715
llvm::GraphTraits
Definition: GraphTraits.h:37
LLVMContext.h
llvm::const_succ_iterator
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
llvm::IndexedInstrProfReader::getMaximumFunctionCount
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Definition: InstrProfReader.h:670
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::memprof::CallStackTrie::addCallStack
void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds)
Add a call stack context with the given allocation type to the Trie.
Definition: MemoryProfileInfo.cpp:100
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
llvm::Comdat::getSelectionKind
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
raw_ostream.h
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:192
BasicBlockUtils.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
collectComdatMembers
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:1838
Value.h
llvm::raw_string_ostream::str
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:647
addCallStack
static void addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo)
Definition: PGOInstrumentation.cpp:1267
llvm::CFGMST::dumpEdges
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
llvm::handleAllErrors
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:965
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::CFGMST::findBBInfo
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:450
llvm::InstrProfRecord::Counts
std::vector< uint64_t > Counts
Definition: InstrProf.h:731
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:153
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3211
llvm::ValueProfileCollector::get
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
Definition: ValueProfileCollector.cpp:73
llvm::isFuncletEHPersonality
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
Definition: EHPersonalities.h:64
llvm::MDBuilder::createIrrLoopHeaderWeight
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:330
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
InstrumentAllFunctions
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
Definition: PGOInstrumentation.cpp:1886