LLVM  15.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 // Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
51 #include "CFGMST.h"
52 #include "ValueProfileCollector.h"
53 #include "llvm/ADT/APInt.h"
54 #include "llvm/ADT/ArrayRef.h"
55 #include "llvm/ADT/STLExtras.h"
56 #include "llvm/ADT/SmallVector.h"
57 #include "llvm/ADT/Statistic.h"
58 #include "llvm/ADT/StringRef.h"
59 #include "llvm/ADT/Triple.h"
60 #include "llvm/ADT/Twine.h"
61 #include "llvm/ADT/iterator.h"
65 #include "llvm/Analysis/CFG.h"
67 #include "llvm/Analysis/LoopInfo.h"
71 #include "llvm/IR/Attributes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/CFG.h"
74 #include "llvm/IR/Comdat.h"
75 #include "llvm/IR/Constant.h"
76 #include "llvm/IR/Constants.h"
77 #include "llvm/IR/DiagnosticInfo.h"
78 #include "llvm/IR/Dominators.h"
79 #include "llvm/IR/Function.h"
80 #include "llvm/IR/GlobalAlias.h"
81 #include "llvm/IR/GlobalValue.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/IRBuilder.h"
84 #include "llvm/IR/InstVisitor.h"
85 #include "llvm/IR/InstrTypes.h"
86 #include "llvm/IR/Instruction.h"
87 #include "llvm/IR/Instructions.h"
88 #include "llvm/IR/IntrinsicInst.h"
89 #include "llvm/IR/Intrinsics.h"
90 #include "llvm/IR/LLVMContext.h"
91 #include "llvm/IR/MDBuilder.h"
92 #include "llvm/IR/Module.h"
93 #include "llvm/IR/PassManager.h"
94 #include "llvm/IR/ProfileSummary.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/Value.h"
100 #include "llvm/Support/CRC.h"
101 #include "llvm/Support/Casting.h"
104 #include "llvm/Support/Debug.h"
105 #include "llvm/Support/Error.h"
113 #include <algorithm>
114 #include <cassert>
115 #include <cstdint>
116 #include <memory>
117 #include <numeric>
118 #include <string>
119 #include <unordered_map>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
126 
127 #define DEBUG_TYPE "pgo-instrumentation"
128 
129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
132 STATISTIC(NumOfPGOEdge, "Number of edges.");
133 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
137 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
140 STATISTIC(NumOfCSPGOSelectInsts,
141  "Number of select instruction instrumented in CSPGO.");
142 STATISTIC(NumOfCSPGOMemIntrinsics,
143  "Number of mem intrinsics instrumented in CSPGO.");
144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
147 STATISTIC(NumOfCSPGOFunc,
148  "Number of functions having valid profile counts in CSPGO.");
149 STATISTIC(NumOfCSPGOMismatch,
150  "Number of functions having mismatch profile in CSPGO.");
151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
152 
153 // Command line option to specify the file to read profile from. This is
154 // mainly used for testing.
156  PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
157  cl::value_desc("filename"),
158  cl::desc("Specify the path of profile data file. This is"
159  "mainly for test purpose."));
161  "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
162  cl::value_desc("filename"),
163  cl::desc("Specify the path of profile remapping file. This is mainly for "
164  "test purpose."));
165 
166 // Command line option to disable value profiling. The default is false:
167 // i.e. value profiling is enabled by default. This is for debug purpose.
168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
169  cl::Hidden,
170  cl::desc("Disable Value Profiling"));
171 
172 // Command line option to set the maximum number of VP annotations to write to
173 // the metadata for a single indirect call callsite.
175  "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
176  cl::desc("Max number of annotations for a single indirect "
177  "call callsite"));
178 
179 // Command line option to set the maximum number of value annotations
180 // to write to the metadata for a single memop intrinsic.
182  "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
183  cl::desc("Max number of preicise value annotations for a single memop"
184  "intrinsic"));
185 
186 // Command line option to control appending FunctionHash to the name of a COMDAT
187 // function. This is to avoid the hash mismatch caused by the preinliner.
189  "do-comdat-renaming", cl::init(false), cl::Hidden,
190  cl::desc("Append function hash to the name of COMDAT function to avoid "
191  "function hash mismatch due to the preinliner"));
192 
193 // Command line option to enable/disable the warning about missing profile
194 // information.
195 static cl::opt<bool>
196  PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
197  cl::desc("Use this option to turn on/off "
198  "warnings about missing profile data for "
199  "functions."));
200 
201 namespace llvm {
202 // Command line option to enable/disable the warning about a hash mismatch in
203 // the profile data.
205  NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
206  cl::desc("Use this option to turn off/on "
207  "warnings about profile cfg mismatch."));
208 } // namespace llvm
209 
210 // Command line option to enable/disable the warning about a hash mismatch in
211 // the profile data for Comdat functions, which often turns out to be false
212 // positive due to the pre-instrumentation inline.
213 static cl::opt<bool>
214  NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
215  cl::Hidden,
216  cl::desc("The option is used to turn on/off "
217  "warnings about hash mismatch for comdat "
218  "functions."));
219 
220 // Command line option to enable/disable select instruction instrumentation.
221 static cl::opt<bool>
222  PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
223  cl::desc("Use this option to turn on/off SELECT "
224  "instruction instrumentation. "));
225 
226 // Command line option to turn on CFG dot or text dump of raw profile counts
228  "pgo-view-raw-counts", cl::Hidden,
229  cl::desc("A boolean option to show CFG dag or text "
230  "with raw profile counts from "
231  "profile data. See also option "
232  "-pgo-view-counts. To limit graph "
233  "display to only one function, use "
234  "filtering option -view-bfi-func-name."),
235  cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
236  clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
237  clEnumValN(PGOVCT_Text, "text", "show in text.")));
238 
239 // Command line option to enable/disable memop intrinsic call.size profiling.
240 static cl::opt<bool>
241  PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
242  cl::desc("Use this option to turn on/off "
243  "memory intrinsic size profiling."));
244 
245 // Emit branch probability as optimization remarks.
246 static cl::opt<bool>
247  EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
248  cl::desc("When this option is on, the annotated "
249  "branch probability will be emitted as "
250  "optimization remarks: -{Rpass|"
251  "pass-remarks}=pgo-instrumentation"));
252 
254  "pgo-instrument-entry", cl::init(false), cl::Hidden,
255  cl::desc("Force to instrument function entry basicblock."));
256 
258  "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
259  cl::desc(
260  "Use this option to enable function entry coverage instrumentation."));
261 
262 static cl::opt<bool>
263  PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
264  cl::desc("Fix function entry count in profile use."));
265 
267  "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
268  cl::desc("Print out the non-match BFI count if a hot raw profile count "
269  "becomes non-hot, or a cold raw profile count becomes hot. "
270  "The print is enabled under -Rpass-analysis=pgo, or "
271  "internal option -pass-remakrs-analysis=pgo."));
272 
274  "pgo-verify-bfi", cl::init(false), cl::Hidden,
275  cl::desc("Print out mismatched BFI counts after setting profile metadata "
276  "The print is enabled under -Rpass-analysis=pgo, or "
277  "internal option -pass-remakrs-analysis=pgo."));
278 
280  "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
281  cl::desc("Set the threshold for pgo-verify-bfi: only print out "
282  "mismatched BFI if the difference percentage is greater than "
283  "this value (in percentage)."));
284 
286  "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
287  cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
288  "profile count value is below."));
289 
290 namespace llvm {
291 // Command line option to turn on CFG dot dump after profile annotation.
292 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
294 
295 // Command line option to specify the name of the function for CFG dump
296 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
298 
300 } // namespace llvm
301 
302 static cl::opt<bool>
303  PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
304  cl::desc("Use the old CFG function hashing"));
305 
306 // Return a string describing the branch condition that can be
307 // used in static branch probability heuristics:
308 static std::string getBranchCondString(Instruction *TI) {
309  BranchInst *BI = dyn_cast<BranchInst>(TI);
310  if (!BI || !BI->isConditional())
311  return std::string();
312 
313  Value *Cond = BI->getCondition();
314  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
315  if (!CI)
316  return std::string();
317 
318  std::string result;
320  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
321  CI->getOperand(0)->getType()->print(OS, true);
322 
323  Value *RHS = CI->getOperand(1);
324  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
325  if (CV) {
326  if (CV->isZero())
327  OS << "_Zero";
328  else if (CV->isOne())
329  OS << "_One";
330  else if (CV->isMinusOne())
331  OS << "_MinusOne";
332  else
333  OS << "_Const";
334  }
335  OS.flush();
336  return result;
337 }
338 
339 static const char *ValueProfKindDescr[] = {
340 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
342 };
343 
344 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
345 // aware this is an ir_level profile so it can set the version flag.
347  const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
348  Type *IntTy64 = Type::getInt64Ty(M.getContext());
349  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
350  if (IsCS)
351  ProfileVersion |= VARIANT_MASK_CSIR_PROF;
352  if (PGOInstrumentEntry)
353  ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
354  if (DebugInfoCorrelate)
355  ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
357  ProfileVersion |=
358  VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
359  auto IRLevelVersionVariable = new GlobalVariable(
360  M, IntTy64, true, GlobalValue::WeakAnyLinkage,
361  Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
362  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
363  Triple TT(M.getTargetTriple());
364  if (TT.supportsCOMDAT()) {
365  IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
366  IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
367  }
368  return IRLevelVersionVariable;
369 }
370 
371 namespace {
372 
373 /// The select instruction visitor plays three roles specified
374 /// by the mode. In \c VM_counting mode, it simply counts the number of
375 /// select instructions. In \c VM_instrument mode, it inserts code to count
376 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
377 /// it reads the profile data and annotate the select instruction with metadata.
378 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
379 class PGOUseFunc;
380 
381 /// Instruction Visitor class to visit select instructions.
382 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
383  Function &F;
384  unsigned NSIs = 0; // Number of select instructions instrumented.
385  VisitMode Mode = VM_counting; // Visiting mode.
386  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
387  unsigned TotalNumCtrs = 0; // Total number of counters
388  GlobalVariable *FuncNameVar = nullptr;
389  uint64_t FuncHash = 0;
390  PGOUseFunc *UseFunc = nullptr;
391 
392  SelectInstVisitor(Function &Func) : F(Func) {}
393 
394  void countSelects(Function &Func) {
395  NSIs = 0;
396  Mode = VM_counting;
397  visit(Func);
398  }
399 
400  // Visit the IR stream and instrument all select instructions. \p
401  // Ind is a pointer to the counter index variable; \p TotalNC
402  // is the total number of counters; \p FNV is the pointer to the
403  // PGO function name var; \p FHash is the function hash.
404  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
405  GlobalVariable *FNV, uint64_t FHash) {
406  Mode = VM_instrument;
407  CurCtrIdx = Ind;
408  TotalNumCtrs = TotalNC;
409  FuncHash = FHash;
410  FuncNameVar = FNV;
411  visit(Func);
412  }
413 
414  // Visit the IR stream and annotate all select instructions.
415  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
416  Mode = VM_annotate;
417  UseFunc = UF;
418  CurCtrIdx = Ind;
419  visit(Func);
420  }
421 
422  void instrumentOneSelectInst(SelectInst &SI);
423  void annotateOneSelectInst(SelectInst &SI);
424 
425  // Visit \p SI instruction and perform tasks according to visit mode.
426  void visitSelectInst(SelectInst &SI);
427 
428  // Return the number of select instructions. This needs be called after
429  // countSelects().
430  unsigned getNumOfSelectInsts() const { return NSIs; }
431 };
432 
433 } // end anonymous namespace
434 
435 namespace {
436 
437 /// An MST based instrumentation for PGO
438 ///
439 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
440 /// in the function level.
441 struct PGOEdge {
442  // This class implements the CFG edges. Note the CFG can be a multi-graph.
443  // So there might be multiple edges with same SrcBB and DestBB.
444  const BasicBlock *SrcBB;
445  const BasicBlock *DestBB;
446  uint64_t Weight;
447  bool InMST = false;
448  bool Removed = false;
449  bool IsCritical = false;
450 
451  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
452  : SrcBB(Src), DestBB(Dest), Weight(W) {}
453 
454  // Return the information string of an edge.
455  std::string infoString() const {
456  return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
457  (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
458  }
459 };
460 
461 // This class stores the auxiliary information for each BB.
462 struct BBInfo {
463  BBInfo *Group;
464  uint32_t Index;
465  uint32_t Rank = 0;
466 
467  BBInfo(unsigned IX) : Group(this), Index(IX) {}
468 
469  // Return the information string of this object.
470  std::string infoString() const {
471  return (Twine("Index=") + Twine(Index)).str();
472  }
473 
474  // Empty function -- only applicable to UseBBInfo.
475  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
476 
477  // Empty function -- only applicable to UseBBInfo.
478  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
479 };
480 
481 // This class implements the CFG edges. Note the CFG can be a multi-graph.
482 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
483 private:
484  Function &F;
485 
486  // Is this is context-sensitive instrumentation.
487  bool IsCS;
488 
489  // A map that stores the Comdat group in function F.
490  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
491 
493 
494  void computeCFGHash();
495  void renameComdatFunction();
496 
497 public:
498  std::vector<std::vector<VPCandidateInfo>> ValueSites;
499  SelectInstVisitor SIVisitor;
500  std::string FuncName;
501  GlobalVariable *FuncNameVar;
502 
503  // CFG hash value for this function.
504  uint64_t FunctionHash = 0;
505 
506  // The Minimum Spanning Tree of function CFG.
508 
509  // Collect all the BBs that will be instrumented, and store them in
510  // InstrumentBBs.
511  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
512 
513  // Give an edge, find the BB that will be instrumented.
514  // Return nullptr if there is no BB to be instrumented.
515  BasicBlock *getInstrBB(Edge *E);
516 
517  // Return the auxiliary BB information.
518  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
519 
520  // Return the auxiliary BB information if available.
521  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
522 
523  // Dump edges and BB information.
524  void dumpInfo(std::string Str = "") const {
525  MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
526  Twine(FunctionHash) + "\t" + Str);
527  }
528 
529  FuncPGOInstrumentation(
530  Function &Func, TargetLibraryInfo &TLI,
531  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
532  bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
533  BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
534  bool InstrumentFuncEntry = true)
535  : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
536  ValueSites(IPVK_Last + 1), SIVisitor(Func),
537  MST(F, InstrumentFuncEntry, BPI, BFI) {
538  // This should be done before CFG hash computation.
539  SIVisitor.countSelects(Func);
540  ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
541  if (!IsCS) {
542  NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
543  NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
544  NumOfPGOBB += MST.BBInfos.size();
545  ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
546  } else {
547  NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
548  NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
549  NumOfCSPGOBB += MST.BBInfos.size();
550  }
551 
552  FuncName = getPGOFuncName(F);
553  computeCFGHash();
554  if (!ComdatMembers.empty())
555  renameComdatFunction();
556  LLVM_DEBUG(dumpInfo("after CFGMST"));
557 
558  for (auto &E : MST.AllEdges) {
559  if (E->Removed)
560  continue;
561  IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
562  if (!E->InMST)
563  IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
564  }
565 
566  if (CreateGlobalVar)
567  FuncNameVar = createPGOFuncNameVar(F, FuncName);
568  }
569 };
570 
571 } // end anonymous namespace
572 
573 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
574 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
575 // of selects, indirect calls, mem ops and edges.
576 template <class Edge, class BBInfo>
577 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
578  std::vector<uint8_t> Indexes;
579  JamCRC JC;
580  for (auto &BB : F) {
581  const Instruction *TI = BB.getTerminator();
582  for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
583  BasicBlock *Succ = TI->getSuccessor(I);
584  auto BI = findBBInfo(Succ);
585  if (BI == nullptr)
586  continue;
587  uint32_t Index = BI->Index;
588  for (int J = 0; J < 4; J++)
589  Indexes.push_back((uint8_t)(Index >> (J * 8)));
590  }
591  }
592  JC.update(Indexes);
593 
594  JamCRC JCH;
595  if (PGOOldCFGHashing) {
596  // Hash format for context sensitive profile. Reserve 4 bits for other
597  // information.
598  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
599  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
600  //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
601  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
602  } else {
603  // The higher 32 bits.
604  auto updateJCH = [&JCH](uint64_t Num) {
605  uint8_t Data[8];
607  JCH.update(Data);
608  };
609  updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
610  updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
611  updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
612  updateJCH((uint64_t)MST.AllEdges.size());
613 
614  // Hash format for context sensitive profile. Reserve 4 bits for other
615  // information.
616  FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
617  }
618 
619  // Reserve bit 60-63 for other information purpose.
620  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
621  if (IsCS)
623  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
624  << " CRC = " << JC.getCRC()
625  << ", Selects = " << SIVisitor.getNumOfSelectInsts()
626  << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
627  << ValueSites[IPVK_IndirectCallTarget].size());
628  if (!PGOOldCFGHashing) {
629  LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
630  << ", High32 CRC = " << JCH.getCRC());
631  }
632  LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
633 }
634 
635 // Check if we can safely rename this Comdat function.
636 static bool canRenameComdat(
637  Function &F,
638  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
639  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
640  return false;
641 
642  // FIXME: Current only handle those Comdat groups that only containing one
643  // function.
644  // (1) For a Comdat group containing multiple functions, we need to have a
645  // unique postfix based on the hashes for each function. There is a
646  // non-trivial code refactoring to do this efficiently.
647  // (2) Variables can not be renamed, so we can not rename Comdat function in a
648  // group including global vars.
649  Comdat *C = F.getComdat();
650  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
651  assert(!isa<GlobalAlias>(CM.second));
652  Function *FM = dyn_cast<Function>(CM.second);
653  if (FM != &F)
654  return false;
655  }
656  return true;
657 }
658 
659 // Append the CFGHash to the Comdat function name.
660 template <class Edge, class BBInfo>
661 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
662  if (!canRenameComdat(F, ComdatMembers))
663  return;
664  std::string OrigName = F.getName().str();
665  std::string NewFuncName =
666  Twine(F.getName() + "." + Twine(FunctionHash)).str();
667  F.setName(Twine(NewFuncName));
669  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
670  Comdat *NewComdat;
671  Module *M = F.getParent();
672  // For AvailableExternallyLinkage functions, change the linkage to
673  // LinkOnceODR and put them into comdat. This is because after renaming, there
674  // is no backup external copy available for the function.
675  if (!F.hasComdat()) {
677  NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
679  F.setComdat(NewComdat);
680  return;
681  }
682 
683  // This function belongs to a single function Comdat group.
684  Comdat *OrigComdat = F.getComdat();
685  std::string NewComdatName =
686  Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
687  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
688  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
689 
690  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
691  // Must be a function.
692  cast<Function>(CM.second)->setComdat(NewComdat);
693  }
694 }
695 
696 // Collect all the BBs that will be instruments and return them in
697 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
698 template <class Edge, class BBInfo>
699 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
700  std::vector<BasicBlock *> &InstrumentBBs) {
701  // Use a worklist as we will update the vector during the iteration.
702  std::vector<Edge *> EdgeList;
703  EdgeList.reserve(MST.AllEdges.size());
704  for (auto &E : MST.AllEdges)
705  EdgeList.push_back(E.get());
706 
707  for (auto &E : EdgeList) {
708  BasicBlock *InstrBB = getInstrBB(E);
709  if (InstrBB)
710  InstrumentBBs.push_back(InstrBB);
711  }
712 
713  // Set up InEdges/OutEdges for all BBs.
714  for (auto &E : MST.AllEdges) {
715  if (E->Removed)
716  continue;
717  const BasicBlock *SrcBB = E->SrcBB;
718  const BasicBlock *DestBB = E->DestBB;
719  BBInfo &SrcInfo = getBBInfo(SrcBB);
720  BBInfo &DestInfo = getBBInfo(DestBB);
721  SrcInfo.addOutEdge(E.get());
722  DestInfo.addInEdge(E.get());
723  }
724 }
725 
726 // Given a CFG E to be instrumented, find which BB to place the instrumented
727 // code. The function will split the critical edge if necessary.
728 template <class Edge, class BBInfo>
730  if (E->InMST || E->Removed)
731  return nullptr;
732 
733  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
734  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
735  // For a fake edge, instrument the real BB.
736  if (SrcBB == nullptr)
737  return DestBB;
738  if (DestBB == nullptr)
739  return SrcBB;
740 
741  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
742  // There are basic blocks (such as catchswitch) cannot be instrumented.
743  // If the returned first insertion point is the end of BB, skip this BB.
744  if (BB->getFirstInsertionPt() == BB->end())
745  return nullptr;
746  return BB;
747  };
748 
749  // Instrument the SrcBB if it has a single successor,
750  // otherwise, the DestBB if this is not a critical edge.
751  Instruction *TI = SrcBB->getTerminator();
752  if (TI->getNumSuccessors() <= 1)
753  return canInstrument(SrcBB);
754  if (!E->IsCritical)
755  return canInstrument(DestBB);
756 
757  // Some IndirectBr critical edges cannot be split by the previous
758  // SplitIndirectBrCriticalEdges call. Bail out.
759  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
760  BasicBlock *InstrBB =
761  isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
762  if (!InstrBB) {
763  LLVM_DEBUG(
764  dbgs() << "Fail to split critical edge: not instrument this edge.\n");
765  return nullptr;
766  }
767  // For a critical edge, we have to split. Instrument the newly
768  // created BB.
769  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
770  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
771  << " --> " << getBBInfo(DestBB).Index << "\n");
772  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
773  MST.addEdge(SrcBB, InstrBB, 0);
774  // Second one: Add new edge of InstrBB->DestBB.
775  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
776  NewEdge1.InMST = true;
777  E->Removed = true;
778 
779  return canInstrument(InstrBB);
780 }
781 
782 // When generating value profiling calls on Windows routines that make use of
783 // handler funclets for exception processing an operand bundle needs to attached
784 // to the called function. This routine will set \p OpBundles to contain the
785 // funclet information, if any is needed, that should be placed on the generated
786 // value profiling call for the value profile candidate call.
787 static void
791  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
792  if (!OrigCall)
793  return;
794 
795  if (!isa<IntrinsicInst>(OrigCall)) {
796  // The instrumentation call should belong to the same funclet as a
797  // non-intrinsic call, so just copy the operand bundle, if any exists.
798  Optional<OperandBundleUse> ParentFunclet =
799  OrigCall->getOperandBundle(LLVMContext::OB_funclet);
800  if (ParentFunclet)
801  OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
802  } else {
803  // Intrinsics or other instructions do not get funclet information from the
804  // front-end. Need to use the BlockColors that was computed by the routine
805  // colorEHFunclets to determine whether a funclet is needed.
806  if (!BlockColors.empty()) {
807  const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
808  assert(CV.size() == 1 && "non-unique color for block!");
809  Instruction *EHPad = CV.front()->getFirstNonPHI();
810  if (EHPad->isEHPad())
811  OpBundles.emplace_back("funclet", EHPad);
812  }
813  }
814 }
815 
816 // Visit all edge and instrument the edges not in MST, and do value profiling.
817 // Critical edges will be split.
818 static void instrumentOneFunc(
821  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
822  bool IsCS) {
823  // Split indirectbr critical edges here before computing the MST rather than
824  // later in getInstrBB() to avoid invalidating it.
825  SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
826 
827  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
828  F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
829 
830  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
831  auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
832  auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
833  FuncInfo.FunctionHash);
835  assert(!IsCS &&
836  "entry coverge does not support context-sensitive instrumentation");
837  auto &EntryBB = F.getEntryBlock();
838  IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
839  // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
840  // i32 <index>)
841  Builder.CreateCall(
842  Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
843  {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
844  return;
845  }
846 
847  std::vector<BasicBlock *> InstrumentBBs;
848  FuncInfo.getInstrumentBBs(InstrumentBBs);
849  unsigned NumCounters =
850  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
851 
852  uint32_t I = 0;
853  for (auto *InstrBB : InstrumentBBs) {
854  IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
855  assert(Builder.GetInsertPoint() != InstrBB->end() &&
856  "Cannot get the Instrumentation point");
857  // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
858  // i32 <index>)
859  Builder.CreateCall(
860  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
861  {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
862  }
863 
864  // Now instrument select instructions:
865  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
866  FuncInfo.FunctionHash);
867  assert(I == NumCounters);
868 
870  return;
871 
872  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
873 
874  // Intrinsic function calls do not have funclet operand bundles needed for
875  // Windows exception handling attached to them. However, if value profiling is
876  // inserted for one of these calls, then a funclet value will need to be set
877  // on the instrumentation call based on the funclet coloring.
879  if (F.hasPersonalityFn() &&
880  isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
881  BlockColors = colorEHFunclets(F);
882 
883  // For each VP Kind, walk the VP candidates and instrument each one.
884  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
885  unsigned SiteIndex = 0;
886  if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
887  continue;
888 
889  for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
890  LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
891  << " site: CallSite Index = " << SiteIndex << "\n");
892 
893  IRBuilder<> Builder(Cand.InsertPt);
894  assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
895  "Cannot get the Instrumentation point");
896 
897  Value *ToProfile = nullptr;
898  if (Cand.V->getType()->isIntegerTy())
899  ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
900  else if (Cand.V->getType()->isPointerTy())
901  ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
902  assert(ToProfile && "value profiling Value is of unexpected type");
903 
905  populateEHOperandBundle(Cand, BlockColors, OpBundles);
906  Builder.CreateCall(
907  Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
908  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
909  Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
910  Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
911  OpBundles);
912  }
913  } // IPVK_First <= Kind <= IPVK_Last
914 }
915 
916 namespace {
917 
918 // This class represents a CFG edge in profile use compilation.
919 struct PGOUseEdge : public PGOEdge {
920  bool CountValid = false;
921  uint64_t CountValue = 0;
922 
923  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
924  : PGOEdge(Src, Dest, W) {}
925 
926  // Set edge count value
927  void setEdgeCount(uint64_t Value) {
928  CountValue = Value;
929  CountValid = true;
930  }
931 
932  // Return the information string for this object.
933  std::string infoString() const {
934  if (!CountValid)
935  return PGOEdge::infoString();
936  return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
937  .str();
938  }
939 };
940 
941 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
942 
943 // This class stores the auxiliary information for each BB.
944 struct UseBBInfo : public BBInfo {
945  uint64_t CountValue = 0;
946  bool CountValid;
947  int32_t UnknownCountInEdge = 0;
948  int32_t UnknownCountOutEdge = 0;
949  DirectEdges InEdges;
950  DirectEdges OutEdges;
951 
952  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
953 
954  UseBBInfo(unsigned IX, uint64_t C)
955  : BBInfo(IX), CountValue(C), CountValid(true) {}
956 
957  // Set the profile count value for this BB.
958  void setBBInfoCount(uint64_t Value) {
959  CountValue = Value;
960  CountValid = true;
961  }
962 
963  // Return the information string of this object.
964  std::string infoString() const {
965  if (!CountValid)
966  return BBInfo::infoString();
967  return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
968  }
969 
970  // Add an OutEdge and update the edge count.
971  void addOutEdge(PGOUseEdge *E) {
972  OutEdges.push_back(E);
973  UnknownCountOutEdge++;
974  }
975 
976  // Add an InEdge and update the edge count.
977  void addInEdge(PGOUseEdge *E) {
978  InEdges.push_back(E);
979  UnknownCountInEdge++;
980  }
981 };
982 
983 } // end anonymous namespace
984 
985 // Sum up the count values for all the edges.
987  uint64_t Total = 0;
988  for (auto &E : Edges) {
989  if (E->Removed)
990  continue;
991  Total += E->CountValue;
992  }
993  return Total;
994 }
995 
996 namespace {
997 
998 class PGOUseFunc {
999 public:
1000  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1001  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1003  ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1004  : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1005  FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1006  InstrumentFuncEntry),
1007  FreqAttr(FFA_Normal), IsCS(IsCS) {}
1008 
1009  // Read counts for the instrumented BB from profile.
1010  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1011  bool &AllMinusOnes);
1012 
1013  // Populate the counts for all BBs.
1014  void populateCounters();
1015 
1016  // Set the branch weights based on the count values.
1017  void setBranchWeights();
1018 
1019  // Annotate the value profile call sites for all value kind.
1020  void annotateValueSites();
1021 
1022  // Annotate the value profile call sites for one value kind.
1023  void annotateValueSites(uint32_t Kind);
1024 
1025  // Annotate the irreducible loop header weights.
1026  void annotateIrrLoopHeaderWeights();
1027 
1028  // The hotness of the function from the profile count.
1029  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1030 
1031  // Return the function hotness from the profile.
1032  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1033 
1034  // Return the function hash.
1035  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1036 
1037  // Return the profile record for this function;
1038  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1039 
1040  // Return the auxiliary BB information.
1041  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1042  return FuncInfo.getBBInfo(BB);
1043  }
1044 
1045  // Return the auxiliary BB information if available.
1046  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1047  return FuncInfo.findBBInfo(BB);
1048  }
1049 
1050  Function &getFunc() const { return F; }
1051 
1052  void dumpInfo(std::string Str = "") const {
1053  FuncInfo.dumpInfo(Str);
1054  }
1055 
1056  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1057 private:
1058  Function &F;
1059  Module *M;
1061  ProfileSummaryInfo *PSI;
1062 
1063  // This member stores the shared information with class PGOGenFunc.
1064  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1065 
1066  // The maximum count value in the profile. This is only used in PGO use
1067  // compilation.
1068  uint64_t ProgramMaxCount;
1069 
1070  // Position of counter that remains to be read.
1071  uint32_t CountPosition = 0;
1072 
1073  // Total size of the profile count for this function.
1074  uint32_t ProfileCountSize = 0;
1075 
1076  // ProfileRecord for this function.
1077  InstrProfRecord ProfileRecord;
1078 
1079  // Function hotness info derived from profile.
1080  FuncFreqAttr FreqAttr;
1081 
1082  // Is to use the context sensitive profile.
1083  bool IsCS;
1084 
1085  // Find the Instrumented BB and set the value. Return false on error.
1086  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1087 
1088  // Set the edge counter value for the unknown edge -- there should be only
1089  // one unknown edge.
1090  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1091 
1092  // Return FuncName string;
1093  std::string getFuncName() const { return FuncInfo.FuncName; }
1094 
1095  // Set the hot/cold inline hints based on the count values.
1096  // FIXME: This function should be removed once the functionality in
1097  // the inliner is implemented.
1098  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1099  if (PSI->isHotCount(EntryCount))
1100  FreqAttr = FFA_Hot;
1101  else if (PSI->isColdCount(MaxCount))
1102  FreqAttr = FFA_Cold;
1103  }
1104 };
1105 
1106 } // end anonymous namespace
1107 
1108 // Visit all the edges and assign the count value for the instrumented
1109 // edges and the BB. Return false on error.
1110 bool PGOUseFunc::setInstrumentedCounts(
1111  const std::vector<uint64_t> &CountFromProfile) {
1112 
1113  std::vector<BasicBlock *> InstrumentBBs;
1114  FuncInfo.getInstrumentBBs(InstrumentBBs);
1115  unsigned NumCounters =
1116  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1117  // The number of counters here should match the number of counters
1118  // in profile. Return if they mismatch.
1119  if (NumCounters != CountFromProfile.size()) {
1120  return false;
1121  }
1122  auto *FuncEntry = &*F.begin();
1123 
1124  // Set the profile count to the Instrumented BBs.
1125  uint32_t I = 0;
1126  for (BasicBlock *InstrBB : InstrumentBBs) {
1127  uint64_t CountValue = CountFromProfile[I++];
1128  UseBBInfo &Info = getBBInfo(InstrBB);
1129  // If we reach here, we know that we have some nonzero count
1130  // values in this function. The entry count should not be 0.
1131  // Fix it if necessary.
1132  if (InstrBB == FuncEntry && CountValue == 0)
1133  CountValue = 1;
1134  Info.setBBInfoCount(CountValue);
1135  }
1136  ProfileCountSize = CountFromProfile.size();
1137  CountPosition = I;
1138 
1139  // Set the edge count and update the count of unknown edges for BBs.
1140  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1141  E->setEdgeCount(Value);
1142  this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1143  this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1144  };
1145 
1146  // Set the profile count the Instrumented edges. There are BBs that not in
1147  // MST but not instrumented. Need to set the edge count value so that we can
1148  // populate the profile counts later.
1149  for (auto &E : FuncInfo.MST.AllEdges) {
1150  if (E->Removed || E->InMST)
1151  continue;
1152  const BasicBlock *SrcBB = E->SrcBB;
1153  UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1154 
1155  // If only one out-edge, the edge profile count should be the same as BB
1156  // profile count.
1157  if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1158  setEdgeCount(E.get(), SrcInfo.CountValue);
1159  else {
1160  const BasicBlock *DestBB = E->DestBB;
1161  UseBBInfo &DestInfo = getBBInfo(DestBB);
1162  // If only one in-edge, the edge profile count should be the same as BB
1163  // profile count.
1164  if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1165  setEdgeCount(E.get(), DestInfo.CountValue);
1166  }
1167  if (E->CountValid)
1168  continue;
1169  // E's count should have been set from profile. If not, this meenas E skips
1170  // the instrumentation. We set the count to 0.
1171  setEdgeCount(E.get(), 0);
1172  }
1173  return true;
1174 }
1175 
1176 // Set the count value for the unknown edge. There should be one and only one
1177 // unknown edge in Edges vector.
1178 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1179  for (auto &E : Edges) {
1180  if (E->CountValid)
1181  continue;
1182  E->setEdgeCount(Value);
1183 
1184  getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1185  getBBInfo(E->DestBB).UnknownCountInEdge--;
1186  return;
1187  }
1188  llvm_unreachable("Cannot find the unknown count edge");
1189 }
1190 
1191 // Emit function metadata indicating PGO profile mismatch.
1193  LLVMContext &ctx) {
1194  const char MetadataName[] = "instr_prof_hash_mismatch";
1196  // If this metadata already exists, ignore.
1197  auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1198  if (Existing) {
1199  MDTuple *Tuple = cast<MDTuple>(Existing);
1200  for (auto &N : Tuple->operands()) {
1201  if (cast<MDString>(N.get())->getString() == MetadataName)
1202  return;
1203  Names.push_back(N.get());
1204  }
1205  }
1206 
1207  MDBuilder MDB(ctx);
1208  Names.push_back(MDB.createString(MetadataName));
1209  MDNode *MD = MDTuple::get(ctx, Names);
1210  F.setMetadata(LLVMContext::MD_annotation, MD);
1211 }
1212 
1213 // Read the profile from ProfileFileName and assign the value to the
1214 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1215 // Return true if the profile are successfully read, and false on errors.
1216 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1217  bool &AllMinusOnes) {
1218  auto &Ctx = M->getContext();
1220  PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1221  if (Error E = Result.takeError()) {
1222  handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1223  auto Err = IPE.get();
1224  bool SkipWarning = false;
1225  LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1226  << FuncInfo.FuncName << ": ");
1227  if (Err == instrprof_error::unknown_function) {
1228  IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1229  SkipWarning = !PGOWarnMissing;
1230  LLVM_DEBUG(dbgs() << "unknown function");
1231  } else if (Err == instrprof_error::hash_mismatch ||
1232  Err == instrprof_error::malformed) {
1233  IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1234  SkipWarning =
1237  (F.hasComdat() ||
1238  F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1239  LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1240  // Emit function metadata indicating PGO profile mismatch.
1241  annotateFunctionWithHashMismatch(F, M->getContext());
1242  }
1243 
1244  LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1245  if (SkipWarning)
1246  return;
1247 
1248  std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1249  std::string(" Hash = ") +
1250  std::to_string(FuncInfo.FunctionHash);
1251 
1252  Ctx.diagnose(
1253  DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1254  });
1255  return false;
1256  }
1257  ProfileRecord = std::move(Result.get());
1258  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1259 
1260  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1261  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1262  AllMinusOnes = (CountFromProfile.size() > 0);
1263  uint64_t ValueSum = 0;
1264  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1265  LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1266  ValueSum += CountFromProfile[I];
1267  if (CountFromProfile[I] != (uint64_t)-1)
1268  AllMinusOnes = false;
1269  }
1270  AllZeros = (ValueSum == 0);
1271 
1272  LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1273 
1274  getBBInfo(nullptr).UnknownCountOutEdge = 2;
1275  getBBInfo(nullptr).UnknownCountInEdge = 2;
1276 
1277  if (!setInstrumentedCounts(CountFromProfile)) {
1278  LLVM_DEBUG(
1279  dbgs() << "Inconsistent number of counts, skipping this function");
1280  Ctx.diagnose(DiagnosticInfoPGOProfile(
1281  M->getName().data(),
1282  Twine("Inconsistent number of counts in ") + F.getName().str()
1283  + Twine(": the profile may be stale or there is a function name collision."),
1284  DS_Warning));
1285  return false;
1286  }
1287  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1288  return true;
1289 }
1290 
1291 // Populate the counters from instrumented BBs to all BBs.
1292 // In the end of this operation, all BBs should have a valid count value.
1293 void PGOUseFunc::populateCounters() {
1294  bool Changes = true;
1295  unsigned NumPasses = 0;
1296  while (Changes) {
1297  NumPasses++;
1298  Changes = false;
1299 
1300  // For efficient traversal, it's better to start from the end as most
1301  // of the instrumented edges are at the end.
1302  for (auto &BB : reverse(F)) {
1303  UseBBInfo *Count = findBBInfo(&BB);
1304  if (Count == nullptr)
1305  continue;
1306  if (!Count->CountValid) {
1307  if (Count->UnknownCountOutEdge == 0) {
1308  Count->CountValue = sumEdgeCount(Count->OutEdges);
1309  Count->CountValid = true;
1310  Changes = true;
1311  } else if (Count->UnknownCountInEdge == 0) {
1312  Count->CountValue = sumEdgeCount(Count->InEdges);
1313  Count->CountValid = true;
1314  Changes = true;
1315  }
1316  }
1317  if (Count->CountValid) {
1318  if (Count->UnknownCountOutEdge == 1) {
1319  uint64_t Total = 0;
1320  uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1321  // If the one of the successor block can early terminate (no-return),
1322  // we can end up with situation where out edge sum count is larger as
1323  // the source BB's count is collected by a post-dominated block.
1324  if (Count->CountValue > OutSum)
1325  Total = Count->CountValue - OutSum;
1326  setEdgeCount(Count->OutEdges, Total);
1327  Changes = true;
1328  }
1329  if (Count->UnknownCountInEdge == 1) {
1330  uint64_t Total = 0;
1331  uint64_t InSum = sumEdgeCount(Count->InEdges);
1332  if (Count->CountValue > InSum)
1333  Total = Count->CountValue - InSum;
1334  setEdgeCount(Count->InEdges, Total);
1335  Changes = true;
1336  }
1337  }
1338  }
1339  }
1340 
1341  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1342  (void) NumPasses;
1343 #ifndef NDEBUG
1344  // Assert every BB has a valid counter.
1345  for (auto &BB : F) {
1346  auto BI = findBBInfo(&BB);
1347  if (BI == nullptr)
1348  continue;
1349  assert(BI->CountValid && "BB count is not valid");
1350  }
1351 #endif
1352  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1353  uint64_t FuncMaxCount = FuncEntryCount;
1354  for (auto &BB : F) {
1355  auto BI = findBBInfo(&BB);
1356  if (BI == nullptr)
1357  continue;
1358  FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1359  }
1360 
1361  // Fix the obviously inconsistent entry count.
1362  if (FuncMaxCount > 0 && FuncEntryCount == 0)
1363  FuncEntryCount = 1;
1364  F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1365  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1366 
1367  // Now annotate select instructions
1368  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1369  assert(CountPosition == ProfileCountSize);
1370 
1371  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1372 }
1373 
1374 // Assign the scaled count values to the BB with multiple out edges.
1376  // Generate MD_prof metadata for every branch instruction.
1377  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1378  << " IsCS=" << IsCS << "\n");
1379  for (auto &BB : F) {
1380  Instruction *TI = BB.getTerminator();
1381  if (TI->getNumSuccessors() < 2)
1382  continue;
1383  if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1384  isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
1385  continue;
1386 
1387  if (getBBInfo(&BB).CountValue == 0)
1388  continue;
1389 
1390  // We have a non-zero Branch BB.
1391  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1392  unsigned Size = BBCountInfo.OutEdges.size();
1393  SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1394  uint64_t MaxCount = 0;
1395  for (unsigned s = 0; s < Size; s++) {
1396  const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1397  const BasicBlock *SrcBB = E->SrcBB;
1398  const BasicBlock *DestBB = E->DestBB;
1399  if (DestBB == nullptr)
1400  continue;
1401  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1402  uint64_t EdgeCount = E->CountValue;
1403  if (EdgeCount > MaxCount)
1404  MaxCount = EdgeCount;
1405  EdgeCounts[SuccNum] = EdgeCount;
1406  }
1407  setProfMetadata(M, TI, EdgeCounts, MaxCount);
1408  }
1409 }
1410 
1412  for (BasicBlock *Pred : predecessors(BB)) {
1413  if (isa<IndirectBrInst>(Pred->getTerminator()))
1414  return true;
1415  }
1416  return false;
1417 }
1418 
1419 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1420  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1421  // Find irr loop headers
1422  for (auto &BB : F) {
1423  // As a heuristic also annotate indrectbr targets as they have a high chance
1424  // to become an irreducible loop header after the indirectbr tail
1425  // duplication.
1426  if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1427  Instruction *TI = BB.getTerminator();
1428  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1429  setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1430  }
1431  }
1432 }
1433 
1434 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1436  return;
1437  Module *M = F.getParent();
1439  Type *Int64Ty = Builder.getInt64Ty();
1440  Type *I8PtrTy = Builder.getInt8PtrTy();
1441  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1442  Builder.CreateCall(
1443  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1444  {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1445  Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1446  Builder.getInt32(*CurCtrIdx), Step});
1447  ++(*CurCtrIdx);
1448 }
1449 
1450 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1451  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1452  assert(*CurCtrIdx < CountFromProfile.size() &&
1453  "Out of bound access of counters");
1454  uint64_t SCounts[2];
1455  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1456  ++(*CurCtrIdx);
1457  uint64_t TotalCount = 0;
1458  auto BI = UseFunc->findBBInfo(SI.getParent());
1459  if (BI != nullptr)
1460  TotalCount = BI->CountValue;
1461  // False Count
1462  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1463  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1464  if (MaxCount)
1465  setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1466 }
1467 
1468 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1469  if (!PGOInstrSelect)
1470  return;
1471  // FIXME: do not handle this yet.
1472  if (SI.getCondition()->getType()->isVectorTy())
1473  return;
1474 
1475  switch (Mode) {
1476  case VM_counting:
1477  NSIs++;
1478  return;
1479  case VM_instrument:
1480  instrumentOneSelectInst(SI);
1481  return;
1482  case VM_annotate:
1483  annotateOneSelectInst(SI);
1484  return;
1485  }
1486 
1487  llvm_unreachable("Unknown visiting mode");
1488 }
1489 
1490 // Traverse all valuesites and annotate the instructions for all value kind.
1491 void PGOUseFunc::annotateValueSites() {
1493  return;
1494 
1495  // Create the PGOFuncName meta data.
1496  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1497 
1498  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1499  annotateValueSites(Kind);
1500 }
1501 
1502 // Annotate the instructions for a specific value kind.
1503 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1504  assert(Kind <= IPVK_Last);
1505  unsigned ValueSiteIndex = 0;
1506  auto &ValueSites = FuncInfo.ValueSites[Kind];
1507  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1508  if (NumValueSites != ValueSites.size()) {
1509  auto &Ctx = M->getContext();
1510  Ctx.diagnose(DiagnosticInfoPGOProfile(
1511  M->getName().data(),
1512  Twine("Inconsistent number of value sites for ") +
1514  Twine(" profiling in \"") + F.getName().str() +
1515  Twine("\", possibly due to the use of a stale profile."),
1516  DS_Warning));
1517  return;
1518  }
1519 
1520  for (VPCandidateInfo &I : ValueSites) {
1521  LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1522  << "): Index = " << ValueSiteIndex << " out of "
1523  << NumValueSites << "\n");
1524  annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1525  static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1526  Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1527  : MaxNumAnnotations);
1528  ValueSiteIndex++;
1529  }
1530 }
1531 
1532 // Collect the set of members for each Comdat in module M and store
1533 // in ComdatMembers.
1535  Module &M,
1536  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1537  if (!DoComdatRenaming)
1538  return;
1539  for (Function &F : M)
1540  if (Comdat *C = F.getComdat())
1541  ComdatMembers.insert(std::make_pair(C, &F));
1542  for (GlobalVariable &GV : M.globals())
1543  if (Comdat *C = GV.getComdat())
1544  ComdatMembers.insert(std::make_pair(C, &GV));
1545  for (GlobalAlias &GA : M.aliases())
1546  if (Comdat *C = GA.getComdat())
1547  ComdatMembers.insert(std::make_pair(C, &GA));
1548 }
1549 
1551  Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1553  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1554  // For the context-sensitve instrumentation, we should have a separated pass
1555  // (before LTO/ThinLTO linking) to create these variables.
1556  if (!IsCS)
1557  createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1558  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1559  collectComdatMembers(M, ComdatMembers);
1560 
1561  for (auto &F : M) {
1562  if (F.isDeclaration())
1563  continue;
1564  if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1565  continue;
1566  auto &TLI = LookupTLI(F);
1567  auto *BPI = LookupBPI(F);
1568  auto *BFI = LookupBFI(F);
1569  instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1570  }
1571  return true;
1572 }
1573 
1576  createProfileFileNameVar(M, CSInstrName);
1577  // The variable in a comdat may be discarded by LTO. Ensure the declaration
1578  // will be retained.
1580  return PreservedAnalyses::all();
1581 }
1582 
1584  ModuleAnalysisManager &AM) {
1585  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1586  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1588  };
1589  auto LookupBPI = [&FAM](Function &F) {
1591  };
1592  auto LookupBFI = [&FAM](Function &F) {
1594  };
1595 
1596  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1597  return PreservedAnalyses::all();
1598 
1599  return PreservedAnalyses::none();
1600 }
1601 
1602 // Using the ratio b/w sums of profile count values and BFI count values to
1603 // adjust the func entry count.
1604 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1605  BranchProbabilityInfo &NBPI) {
1606  Function &F = Func.getFunc();
1607  BlockFrequencyInfo NBFI(F, NBPI, LI);
1608 #ifndef NDEBUG
1609  auto BFIEntryCount = F.getEntryCount();
1610  assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) &&
1611  "Invalid BFI Entrycount");
1612 #endif
1613  auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1614  auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1615  for (auto &BBI : F) {
1616  uint64_t CountValue = 0;
1617  uint64_t BFICountValue = 0;
1618  if (!Func.findBBInfo(&BBI))
1619  continue;
1620  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1621  CountValue = Func.getBBInfo(&BBI).CountValue;
1622  BFICountValue = BFICount.getValue();
1623  SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1624  SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1625  }
1626  if (SumCount.isZero())
1627  return;
1628 
1629  assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1630  "Incorrect sum of BFI counts");
1631  if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1632  return;
1633  double Scale = (SumCount / SumBFICount).convertToDouble();
1634  if (Scale < 1.001 && Scale > 0.999)
1635  return;
1636 
1637  uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1638  uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1639  if (NewEntryCount == 0)
1640  NewEntryCount = 1;
1641  if (NewEntryCount != FuncEntryCount) {
1642  F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1643  LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1644  << ", entry_count " << FuncEntryCount << " --> "
1645  << NewEntryCount << "\n");
1646  }
1647 }
1648 
1649 // Compare the profile count values with BFI count values, and print out
1650 // the non-matching ones.
1651 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1652  BranchProbabilityInfo &NBPI,
1653  uint64_t HotCountThreshold,
1655  Function &F = Func.getFunc();
1656  BlockFrequencyInfo NBFI(F, NBPI, LI);
1657  // bool PrintFunc = false;
1658  bool HotBBOnly = PGOVerifyHotBFI;
1659  std::string Msg;
1661 
1662  unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1663  for (auto &BBI : F) {
1664  uint64_t CountValue = 0;
1665  uint64_t BFICountValue = 0;
1666 
1667  if (Func.getBBInfo(&BBI).CountValid)
1668  CountValue = Func.getBBInfo(&BBI).CountValue;
1669 
1670  BBNum++;
1671  if (CountValue)
1672  NonZeroBBNum++;
1673  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1674  if (BFICount)
1675  BFICountValue = BFICount.getValue();
1676 
1677  if (HotBBOnly) {
1678  bool rawIsHot = CountValue >= HotCountThreshold;
1679  bool BFIIsHot = BFICountValue >= HotCountThreshold;
1680  bool rawIsCold = CountValue <= ColdCountThreshold;
1681  bool ShowCount = false;
1682  if (rawIsHot && !BFIIsHot) {
1683  Msg = "raw-Hot to BFI-nonHot";
1684  ShowCount = true;
1685  } else if (rawIsCold && BFIIsHot) {
1686  Msg = "raw-Cold to BFI-Hot";
1687  ShowCount = true;
1688  }
1689  if (!ShowCount)
1690  continue;
1691  } else {
1692  if ((CountValue < PGOVerifyBFICutoff) &&
1693  (BFICountValue < PGOVerifyBFICutoff))
1694  continue;
1695  uint64_t Diff = (BFICountValue >= CountValue)
1696  ? BFICountValue - CountValue
1697  : CountValue - BFICountValue;
1698  if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1699  continue;
1700  }
1701  BBMisMatchNum++;
1702 
1703  ORE.emit([&]() {
1705  F.getSubprogram(), &BBI);
1706  Remark << "BB " << ore::NV("Block", BBI.getName())
1707  << " Count=" << ore::NV("Count", CountValue)
1708  << " BFI_Count=" << ore::NV("Count", BFICountValue);
1709  if (!Msg.empty())
1710  Remark << " (" << Msg << ")";
1711  return Remark;
1712  });
1713  }
1714  if (BBMisMatchNum)
1715  ORE.emit([&]() {
1716  return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1717  F.getSubprogram(), &F.getEntryBlock())
1718  << "In Func " << ore::NV("Function", F.getName())
1719  << ": Num_of_BB=" << ore::NV("Count", BBNum)
1720  << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1721  << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1722  });
1723 }
1724 
1726  Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1727  function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1729  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1730  ProfileSummaryInfo *PSI, bool IsCS) {
1731  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1732  auto &Ctx = M.getContext();
1733  // Read the counter array from file.
1734  auto ReaderOrErr =
1735  IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1736  if (Error E = ReaderOrErr.takeError()) {
1737  handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1738  Ctx.diagnose(
1739  DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1740  });
1741  return false;
1742  }
1743 
1744  std::unique_ptr<IndexedInstrProfReader> PGOReader =
1745  std::move(ReaderOrErr.get());
1746  if (!PGOReader) {
1747  Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1748  StringRef("Cannot get PGOReader")));
1749  return false;
1750  }
1751  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1752  return false;
1753 
1754  // TODO: might need to change the warning once the clang option is finalized.
1755  if (!PGOReader->isIRLevelProfile()) {
1756  Ctx.diagnose(DiagnosticInfoPGOProfile(
1757  ProfileFileName.data(), "Not an IR level instrumentation profile"));
1758  return false;
1759  }
1760  if (PGOReader->hasSingleByteCoverage()) {
1761  Ctx.diagnose(DiagnosticInfoPGOProfile(
1762  ProfileFileName.data(),
1763  "Cannot use coverage profiles for optimization"));
1764  return false;
1765  }
1766  if (PGOReader->functionEntryOnly()) {
1767  Ctx.diagnose(DiagnosticInfoPGOProfile(
1768  ProfileFileName.data(),
1769  "Function entry profiles are not yet supported for optimization"));
1770  return false;
1771  }
1772 
1773  // Add the profile summary (read from the header of the indexed summary) here
1774  // so that we can use it below when reading counters (which checks if the
1775  // function should be marked with a cold or inlinehint attribute).
1776  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1779  PSI->refresh();
1780 
1781  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1782  collectComdatMembers(M, ComdatMembers);
1783  std::vector<Function *> HotFunctions;
1784  std::vector<Function *> ColdFunctions;
1785 
1786  // If the profile marked as always instrument the entry BB, do the
1787  // same. Note this can be overwritten by the internal option in CFGMST.h
1788  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
1790  InstrumentFuncEntry = PGOInstrumentEntry;
1791  for (auto &F : M) {
1792  if (F.isDeclaration())
1793  continue;
1794  auto &TLI = LookupTLI(F);
1795  auto *BPI = LookupBPI(F);
1796  auto *BFI = LookupBFI(F);
1797  // Split indirectbr critical edges here before computing the MST rather than
1798  // later in getInstrBB() to avoid invalidating it.
1799  SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
1800  PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
1801  InstrumentFuncEntry);
1802  // When AllMinusOnes is true, it means the profile for the function
1803  // is unrepresentative and this function is actually hot. Set the
1804  // entry count of the function to be multiple times of hot threshold
1805  // and drop all its internal counters.
1806  bool AllMinusOnes = false;
1807  bool AllZeros = false;
1808  if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
1809  continue;
1810  if (AllZeros) {
1811  F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1812  if (Func.getProgramMaxCount() != 0)
1813  ColdFunctions.push_back(&F);
1814  continue;
1815  }
1816  const unsigned MultiplyFactor = 3;
1817  if (AllMinusOnes) {
1818  uint64_t HotThreshold = PSI->getHotCountThreshold();
1819  if (HotThreshold)
1820  F.setEntryCount(
1821  ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
1822  HotFunctions.push_back(&F);
1823  continue;
1824  }
1825  Func.populateCounters();
1826  Func.setBranchWeights();
1827  Func.annotateValueSites();
1828  Func.annotateIrrLoopHeaderWeights();
1829  PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1830  if (FreqAttr == PGOUseFunc::FFA_Cold)
1831  ColdFunctions.push_back(&F);
1832  else if (FreqAttr == PGOUseFunc::FFA_Hot)
1833  HotFunctions.push_back(&F);
1834  if (PGOViewCounts != PGOVCT_None &&
1835  (ViewBlockFreqFuncName.empty() ||
1836  F.getName().equals(ViewBlockFreqFuncName))) {
1837  LoopInfo LI{DominatorTree(F)};
1838  std::unique_ptr<BranchProbabilityInfo> NewBPI =
1839  std::make_unique<BranchProbabilityInfo>(F, LI);
1840  std::unique_ptr<BlockFrequencyInfo> NewBFI =
1841  std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1842  if (PGOViewCounts == PGOVCT_Graph)
1843  NewBFI->view();
1844  else if (PGOViewCounts == PGOVCT_Text) {
1845  dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1846  NewBFI->print(dbgs());
1847  }
1848  }
1849  if (PGOViewRawCounts != PGOVCT_None &&
1850  (ViewBlockFreqFuncName.empty() ||
1851  F.getName().equals(ViewBlockFreqFuncName))) {
1853  if (ViewBlockFreqFuncName.empty())
1854  WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1855  else
1856  ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1857  else if (PGOViewRawCounts == PGOVCT_Text) {
1858  dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1859  Func.dumpInfo();
1860  }
1861  }
1862 
1864  LoopInfo LI{DominatorTree(F)};
1865  BranchProbabilityInfo NBPI(F, LI);
1866 
1867  // Fix func entry count.
1868  if (PGOFixEntryCount)
1869  fixFuncEntryCount(Func, LI, NBPI);
1870 
1871  // Verify BlockFrequency information.
1872  uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
1873  if (PGOVerifyHotBFI) {
1874  HotCountThreshold = PSI->getOrCompHotCountThreshold();
1876  }
1877  verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
1878  }
1879  }
1880 
1881  // Set function hotness attribute from the profile.
1882  // We have to apply these attributes at the end because their presence
1883  // can affect the BranchProbabilityInfo of any callers, resulting in an
1884  // inconsistent MST between prof-gen and prof-use.
1885  for (auto &F : HotFunctions) {
1886  F->addFnAttr(Attribute::InlineHint);
1887  LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1888  << "\n");
1889  }
1890  for (auto &F : ColdFunctions) {
1891  // Only set when there is no Attribute::Hot set by the user. For Hot
1892  // attribute, user's annotation has the precedence over the profile.
1893  if (F->hasFnAttribute(Attribute::Hot)) {
1894  auto &Ctx = M.getContext();
1895  std::string Msg = std::string("Function ") + F->getName().str() +
1896  std::string(" is annotated as a hot function but"
1897  " the profile is cold");
1898  Ctx.diagnose(
1899  DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1900  continue;
1901  }
1902  F->addFnAttr(Attribute::Cold);
1903  LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1904  << "\n");
1905  }
1906  return true;
1907 }
1908 
1910  std::string RemappingFilename,
1911  bool IsCS)
1912  : ProfileFileName(std::move(Filename)),
1913  ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1914  if (!PGOTestProfileFile.empty())
1915  ProfileFileName = PGOTestProfileFile;
1916  if (!PGOTestProfileRemappingFile.empty())
1917  ProfileRemappingFileName = PGOTestProfileRemappingFile;
1918 }
1919 
1921  ModuleAnalysisManager &AM) {
1922 
1923  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1924  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1926  };
1927  auto LookupBPI = [&FAM](Function &F) {
1929  };
1930  auto LookupBFI = [&FAM](Function &F) {
1932  };
1933 
1934  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1935 
1936  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1937  LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
1938  return PreservedAnalyses::all();
1939 
1940  return PreservedAnalyses::none();
1941 }
1942 
1943 static std::string getSimpleNodeName(const BasicBlock *Node) {
1944  if (!Node->getName().empty())
1945  return std::string(Node->getName());
1946 
1947  std::string SimpleNodeName;
1948  raw_string_ostream OS(SimpleNodeName);
1949  Node->printAsOperand(OS, false);
1950  return OS.str();
1951 }
1952 
1954  ArrayRef<uint64_t> EdgeCounts,
1955  uint64_t MaxCount) {
1956  MDBuilder MDB(M->getContext());
1957  assert(MaxCount > 0 && "Bad max count");
1958  uint64_t Scale = calculateCountScale(MaxCount);
1959  SmallVector<unsigned, 4> Weights;
1960  for (const auto &ECI : EdgeCounts)
1961  Weights.push_back(scaleBranchCount(ECI, Scale));
1962 
1963  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
1964  : Weights) {
1965  dbgs() << W << " ";
1966  } dbgs() << "\n";);
1967 
1968  misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1969 
1970  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1971  if (EmitBranchProbability) {
1972  std::string BrCondStr = getBranchCondString(TI);
1973  if (BrCondStr.empty())
1974  return;
1975 
1976  uint64_t WSum =
1977  std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
1978  [](uint64_t w1, uint64_t w2) { return w1 + w2; });
1979  uint64_t TotalCount =
1980  std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
1981  [](uint64_t c1, uint64_t c2) { return c1 + c2; });
1982  Scale = calculateCountScale(WSum);
1983  BranchProbability BP(scaleBranchCount(Weights[0], Scale),
1984  scaleBranchCount(WSum, Scale));
1985  std::string BranchProbStr;
1986  raw_string_ostream OS(BranchProbStr);
1987  OS << BP;
1988  OS << " (total count : " << TotalCount << ")";
1989  OS.flush();
1990  Function *F = TI->getParent()->getParent();
1992  ORE.emit([&]() {
1993  return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
1994  << BrCondStr << " is true with probability : " << BranchProbStr;
1995  });
1996  }
1997 }
1998 
1999 namespace llvm {
2000 
2002  MDBuilder MDB(M->getContext());
2003  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2004  MDB.createIrrLoopHeaderWeight(Count));
2005 }
2006 
2007 template <> struct GraphTraits<PGOUseFunc *> {
2008  using NodeRef = const BasicBlock *;
2011 
2012  static NodeRef getEntryNode(const PGOUseFunc *G) {
2013  return &G->getFunc().front();
2014  }
2015 
2017  return succ_begin(N);
2018  }
2019 
2020  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2021 
2022  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2023  return nodes_iterator(G->getFunc().begin());
2024  }
2025 
2026  static nodes_iterator nodes_end(const PGOUseFunc *G) {
2027  return nodes_iterator(G->getFunc().end());
2028  }
2029 };
2030 
2031 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2032  explicit DOTGraphTraits(bool isSimple = false)
2034 
2035  static std::string getGraphName(const PGOUseFunc *G) {
2036  return std::string(G->getFunc().getName());
2037  }
2038 
2039  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2040  std::string Result;
2041  raw_string_ostream OS(Result);
2042 
2043  OS << getSimpleNodeName(Node) << ":\\l";
2044  UseBBInfo *BI = Graph->findBBInfo(Node);
2045  OS << "Count : ";
2046  if (BI && BI->CountValid)
2047  OS << BI->CountValue << "\\l";
2048  else
2049  OS << "Unknown\\l";
2050 
2051  if (!PGOInstrSelect)
2052  return Result;
2053 
2054  for (const Instruction &I : *Node) {
2055  if (!isa<SelectInst>(&I))
2056  continue;
2057  // Display scaled counts for SELECT instruction:
2058  OS << "SELECT : { T = ";
2059  uint64_t TC, FC;
2060  bool HasProf = I.extractProfMetadata(TC, FC);
2061  if (!HasProf)
2062  OS << "Unknown, F = Unknown }\\l";
2063  else
2064  OS << TC << ", F = " << FC << " }\\l";
2065  }
2066  return Result;
2067  }
2068 };
2069 
2070 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
DoComdatRenaming
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
llvm::SuccIterator
Definition: CFG.h:138
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:425
Instrumentation.h
llvm::createPGOFuncNameMetadata
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1099
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:299
llvm::PGOInstrumentationUse::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1920
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
fixFuncEntryCount
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
Definition: PGOInstrumentation.cpp:1604
llvm::DOTGraphTraits< PGOUseFunc * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: PGOInstrumentation.cpp:2032
llvm::APFloatBase::cmpGreaterThan
@ cmpGreaterThan
Definition: APFloat.h:183
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
c2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps c2(%esp) ... xorps %xmm0
Comdat.h
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::Comdat::getName
StringRef getName() const
Definition: Comdat.cpp:28
llvm::JamCRC::update
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
ProfileCount
Function::ProfileCount ProfileCount
Definition: PGOInstrumentation.cpp:124
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::InstrProfError::message
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:258
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
PGOFunctionEntryCoverage
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Use this option to enable function entry coverage instrumentation."))
llvm::CFGMST::getBBInfo
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
llvm::Function
Definition: Function.h:60
ProfileSummary.h
StringRef.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::DOTGraphTraits< PGOUseFunc * >::getNodeLabel
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
Definition: PGOInstrumentation.cpp:2039
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::ProfileSummaryInfo::getOrCompHotCountThreshold
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:317
PGOTestProfileRemappingFile
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
llvm::getPGOFuncName
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:302
sumEdgeCount
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
Definition: PGOInstrumentation.cpp:986
ErrorHandling.h
llvm::InstrProfError::get
instrprof_error get() const
Definition: InstrProf.h:348
llvm::IRBuilder<>
PGOVerifyBFI
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2258
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
llvm::GlobalAlias
Definition: GlobalAlias.h:28
Error.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createPGOFuncNameVar
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:381
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::coverage::accessors::getFuncHash
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
Definition: CoverageMapping.h:773
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
APInt.h
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
PGOInstrSelect
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1909
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:101
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::GraphTraits< PGOUseFunc * >::nodes_end
static nodes_iterator nodes_end(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2026
EHPersonalities.h
llvm::CFGMST::AllEdges
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
llvm::Optional
Definition: APInt.h:33
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::JamCRC
Definition: CRC.h:45
MaxNumMemOPAnnotations
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
llvm::GraphTraits< PGOUseFunc * >::child_end
static ChildIteratorType child_end(const NodeRef N)
Definition: PGOInstrumentation.cpp:2020
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:41
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::createProfileFileNameVar
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1182
llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:3935
CFGMST.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
llvm::PGOVCT_Text
@ PGOVCT_Text
Definition: BlockFrequencyInfo.h:33
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::InstrProfRecord
Profiling information for a single function.
Definition: InstrProf.h:725
F
#define F(x, y, z)
Definition: MD5.cpp:55
InstrProfData.inc
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1202
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1368
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
MaxNumAnnotations
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of annotations for a single indirect " "call callsite"))
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
PGOWarnMissing
static cl::opt< bool > PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about missing profile data for " "functions."))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
Instruction.h
PGOInstrumentEntry
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
CommandLine.h
llvm::APFloat::getZero
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:885
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::PGOVCT_Graph
@ PGOVCT_Graph
Definition: BlockFrequencyInfo.h:33
llvm::BranchProbabilityAnalysis
Analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:412
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::ProfileSummaryInfo::isColdCount
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
Definition: ProfileSummaryInfo.cpp:293
GlobalValue.h
llvm::calculateCountScale
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
Definition: Instrumentation.h:171
PGOViewRawCounts
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Constants.h
llvm::ValueProfileCollector::CandidateInfo::AnnotatedInst
Instruction * AnnotatedInst
Definition: ValueProfileCollector.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
getBranchCondString
static std::string getBranchCondString(Instruction *TI)
Definition: PGOInstrumentation.cpp:308
llvm::PGOInstrumentationGen::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1583
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::setIrrLoopHeaderMetadata
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
Definition: PGOInstrumentation.cpp:2001
Twine.h
InstrTypes.h
llvm::Type::print
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Definition: AsmWriter.cpp:4553
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::MDTuple
Tuple of metadata.
Definition: Metadata.h:1241
llvm::canRenameComdatFunc
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1159
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::InstrProfRecord::getNumValueSites
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:916
TargetLibraryInfo.h
CRC.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Instruction
Definition: Instruction.h:42
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
InstrProf.h
MDBuilder.h
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:109
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:187
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
PGOFixEntryCount
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
llvm::OperandBundleDef
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CFGMST
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
llvm::Comdat
Definition: Comdat.h:33
llvm::scaleBranchCount
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
Definition: Instrumentation.h:181
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::GraphTraits< PGOUseFunc * >::getEntryNode
static NodeRef getEntryNode(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2012
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::ErrorInfoBase
Base class for error info classes.
Definition: Error.h:46
llvm::ErrorInfoBase::message
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:54
Type.h
BranchProbability.h
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
getSimpleNodeName
static std::string getSimpleNodeName(const BasicBlock *Node)
Definition: PGOInstrumentation.cpp:1943
PGOVerifyBFICutoff
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
EmitBranchProbability
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
instrumentOneFunc
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
Definition: PGOInstrumentation.cpp:818
llvm::MDBuilder::createString
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
PGOVerifyHotBFI
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::pointer_iterator
Definition: iterator.h:344
llvm::HighlightColor::Remark
@ Remark
llvm::IndexedInstrProfReader::getInstrProfRecord
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Definition: InstrProfReader.cpp:1030
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::LLVMContext::OB_funclet
@ OB_funclet
Definition: LLVMContext.h:91
llvm::APFloat
Definition: APFloat.h:701
InstrProfReader.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
BranchProbabilityInfo.h
llvm::PGOViewCounts
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Definition: PGOInstrumentation.cpp:293
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
llvm::ProfileSummaryInfo::isHotCount
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Definition: ProfileSummaryInfo.cpp:289
uint64_t
llvm::colorEHFunclets
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
Definition: EHPersonalities.cpp:81
ProfileSummaryInfo.h
PGOOldCFGHashing
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
setBranchWeights
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
Definition: SimplifyCFG.cpp:856
MisExpect.h
llvm::IndexedInstrProfReader
Reader for the indexed binary instrprof format.
Definition: InstrProfReader.h:552
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
llvm::InstrProfError
Definition: InstrProf.h:333
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
PGOInstrumentation.h
llvm::DenseMap
Definition: DenseMap.h:716
iterator.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
PGOVerifyBFIRatio
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
ArrayRef.h
llvm::misexpect::checkExpectAnnotations
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:237
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::DiagnosticInfoPGOProfile
Diagnostic information for the PGO profiler.
Definition: DiagnosticInfo.h:330
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1665
llvm::PGOVCT_None
@ PGOVCT_None
Definition: BlockFrequencyInfo.h:33
llvm::instrprof_error::unknown_function
@ unknown_function
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::SplitIndirectBrCriticalEdges
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Definition: BreakCriticalEdges.cpp:342
llvm::WriteGraph
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1737
iterator_range.h
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::MDTuple::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1267
annotateAllFunctions
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
Definition: PGOInstrumentation.cpp:1725
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::InstrProfValueKind
InstrProfValueKind
Definition: InstrProf.h:239
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::IndexedInstrProfReader::create
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Definition: InstrProfReader.cpp:119
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOInstrumentation.cpp:127
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Triple.h
llvm::GetSuccessorNumber
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1588
CFG.h
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::ProfileSummaryInfo::refresh
void refresh()
If no summary is present, attempt to refresh.
Definition: ProfileSummaryInfo.cpp:57
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::DOTGraphTraits< PGOUseFunc * >::getGraphName
static std::string getGraphName(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2035
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::NamedInstrProfRecord::setCSFlagInHash
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:897
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
PGOInstrMemOP
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:29
llvm::PGOInstrumentationUse::PGOInstrumentationUse
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false)
Definition: PGOInstrumentation.cpp:1909
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:206
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
BlockFrequencyInfo.h
llvm::ProfileSummary::PSK_CSInstr
@ PSK_CSInstr
Definition: ProfileSummary.h:47
llvm::GlobalValue::AvailableExternallyLinkage
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::instrprof_error::hash_mismatch
@ hash_mismatch
llvm::NoPGOWarnMismatch
cl::opt< bool > NoPGOWarnMismatch
llvm::GraphTraits< PGOUseFunc * >::nodes_begin
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2022
llvm::PGOInstrumentationGenCreateVar::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1575
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:994
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:98
annotateFunctionWithHashMismatch
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
Definition: PGOInstrumentation.cpp:1192
Attributes.h
llvm::ValueProfileCollector
Utility analysis that determines what values are worth profiling.
Definition: ValueProfileCollector.h:57
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
ValueProfKindDescr
static const char * ValueProfKindDescr[]
Definition: PGOInstrumentation.cpp:339
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
GraphWriter.h
std
Definition: BitVector.h:851
ValueProfileCollector.h
llvm::JamCRC::getCRC
uint32_t getCRC() const
Definition: CRC.h:52
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:480
canRenameComdat
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:636
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::ProfileSummaryInfo::getOrCompColdCountThreshold
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:321
GlobalVariable.h
Casting.h
DiagnosticInfo.h
Function.h
DOTGraphTraits.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
llvm::ValueProfileCollector::CandidateInfo
Definition: ValueProfileCollector.h:59
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:662
getInstrBB
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
Definition: GCOVProfiling.cpp:702
llvm::cl::value_desc
Definition: CommandLine.h:414
llvm::CFGMST::BBInfos
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
llvm::CFGMST::addEdge
Edge & addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:664
GlobalAlias.h
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::Comdat::setSelectionKind
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
Instructions.h
llvm::Constant::getIntegerValue
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:378
llvm::support::endian::write64le
void write64le(void *P, uint64_t V)
Definition: Endian.h:417
SmallVector.h
llvm::GraphTraits< PGOUseFunc * >::child_begin
static ChildIteratorType child_begin(const NodeRef N)
Definition: PGOInstrumentation.cpp:2016
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:208
Dominators.h
DisableValueProfiling
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
ModuleUtils.h
llvm::ProfileSummary::PSK_Instr
@ PSK_Instr
Definition: ProfileSummary.h:47
N
#define N
createIRLevelProfileFlagVar
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
Definition: PGOInstrumentation.cpp:346
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:809
PGOTestProfileFile
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:86
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:1953
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::ProfileSummaryInfo::getHotCountThreshold
uint64_t getHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.h:172
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28
llvm::instrprof_error::malformed
@ malformed
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
verifyFuncBFI
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
Definition: PGOInstrumentation.cpp:1651
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
populateEHOperandBundle
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
Definition: PGOInstrumentation.cpp:788
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ViewBlockFreqFuncName
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
Definition: MachineBlockFrequencyInfo.cpp:66
isIndirectBrTarget
static bool isIndirectBrTarget(BasicBlock *BB)
Definition: PGOInstrumentation.cpp:1411
llvm::GraphTraits
Definition: GraphTraits.h:37
LLVMContext.h
llvm::const_succ_iterator
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
llvm::IndexedInstrProfReader::getMaximumFunctionCount
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Definition: InstrProfReader.h:636
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::Comdat::getSelectionKind
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
raw_ostream.h
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:182
BasicBlockUtils.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
collectComdatMembers
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:1534
Value.h
llvm::raw_string_ostream::str
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:650
llvm::CFGMST::dumpEdges
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
llvm::handleAllErrors
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:966
llvm::DebugInfoCorrelate
cl::opt< bool > DebugInfoCorrelate("debug-info-correlate", cl::ZeroOrMore, cl::desc("Use debug info to correlate profiles."), cl::init(false))
Definition: PGOInstrumentation.cpp:299
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::CFGMST::findBBInfo
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::InstrProfRecord::Counts
std::vector< uint64_t > Counts
Definition: InstrProf.h:726
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3178
llvm::ValueProfileCollector::get
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
Definition: ValueProfileCollector.cpp:73
NoPGOWarnMismatchComdat
static cl::opt< bool > NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "functions."))
llvm::isFuncletEHPersonality
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
Definition: EHPersonalities.h:64
llvm::MDBuilder::createIrrLoopHeaderWeight
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:301
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
InstrumentAllFunctions
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
Definition: PGOInstrumentation.cpp:1550