LLVM  13.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 // Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
51 #include "CFGMST.h"
52 #include "ValueProfileCollector.h"
53 #include "llvm/ADT/APInt.h"
54 #include "llvm/ADT/ArrayRef.h"
55 #include "llvm/ADT/MapVector.h"
56 #include "llvm/ADT/STLExtras.h"
57 #include "llvm/ADT/SmallVector.h"
58 #include "llvm/ADT/Statistic.h"
59 #include "llvm/ADT/StringRef.h"
60 #include "llvm/ADT/Triple.h"
61 #include "llvm/ADT/Twine.h"
62 #include "llvm/ADT/iterator.h"
66 #include "llvm/Analysis/CFG.h"
68 #include "llvm/Analysis/LoopInfo.h"
71 #include "llvm/IR/Attributes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/CFG.h"
74 #include "llvm/IR/Comdat.h"
75 #include "llvm/IR/Constant.h"
76 #include "llvm/IR/Constants.h"
77 #include "llvm/IR/DiagnosticInfo.h"
78 #include "llvm/IR/Dominators.h"
79 #include "llvm/IR/Function.h"
80 #include "llvm/IR/GlobalAlias.h"
81 #include "llvm/IR/GlobalValue.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/IRBuilder.h"
84 #include "llvm/IR/InstVisitor.h"
85 #include "llvm/IR/InstrTypes.h"
86 #include "llvm/IR/Instruction.h"
87 #include "llvm/IR/Instructions.h"
88 #include "llvm/IR/IntrinsicInst.h"
89 #include "llvm/IR/Intrinsics.h"
90 #include "llvm/IR/LLVMContext.h"
91 #include "llvm/IR/MDBuilder.h"
92 #include "llvm/IR/Module.h"
93 #include "llvm/IR/PassManager.h"
94 #include "llvm/IR/ProfileSummary.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/Value.h"
97 #include "llvm/InitializePasses.h"
98 #include "llvm/Pass.h"
102 #include "llvm/Support/CRC.h"
103 #include "llvm/Support/Casting.h"
106 #include "llvm/Support/Debug.h"
107 #include "llvm/Support/Error.h"
113 #include <algorithm>
114 #include <cassert>
115 #include <cstdint>
116 #include <memory>
117 #include <numeric>
118 #include <string>
119 #include <unordered_map>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
126 
127 #define DEBUG_TYPE "pgo-instrumentation"
128 
129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
132 STATISTIC(NumOfPGOEdge, "Number of edges.");
133 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
137 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
140 STATISTIC(NumOfCSPGOSelectInsts,
141  "Number of select instruction instrumented in CSPGO.");
142 STATISTIC(NumOfCSPGOMemIntrinsics,
143  "Number of mem intrinsics instrumented in CSPGO.");
144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
147 STATISTIC(NumOfCSPGOFunc,
148  "Number of functions having valid profile counts in CSPGO.");
149 STATISTIC(NumOfCSPGOMismatch,
150  "Number of functions having mismatch profile in CSPGO.");
151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
152 
153 // Command line option to specify the file to read profile from. This is
154 // mainly used for testing.
156  PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
157  cl::value_desc("filename"),
158  cl::desc("Specify the path of profile data file. This is"
159  "mainly for test purpose."));
161  "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
162  cl::value_desc("filename"),
163  cl::desc("Specify the path of profile remapping file. This is mainly for "
164  "test purpose."));
165 
166 // Command line option to disable value profiling. The default is false:
167 // i.e. value profiling is enabled by default. This is for debug purpose.
168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
169  cl::Hidden,
170  cl::desc("Disable Value Profiling"));
171 
172 // Command line option to set the maximum number of VP annotations to write to
173 // the metadata for a single indirect call callsite.
175  "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
176  cl::desc("Max number of annotations for a single indirect "
177  "call callsite"));
178 
179 // Command line option to set the maximum number of value annotations
180 // to write to the metadata for a single memop intrinsic.
182  "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
183  cl::desc("Max number of preicise value annotations for a single memop"
184  "intrinsic"));
185 
186 // Command line option to control appending FunctionHash to the name of a COMDAT
187 // function. This is to avoid the hash mismatch caused by the preinliner.
189  "do-comdat-renaming", cl::init(false), cl::Hidden,
190  cl::desc("Append function hash to the name of COMDAT function to avoid "
191  "function hash mismatch due to the preinliner"));
192 
193 // Command line option to enable/disable the warning about missing profile
194 // information.
195 static cl::opt<bool>
196  PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
197  cl::desc("Use this option to turn on/off "
198  "warnings about missing profile data for "
199  "functions."));
200 
201 // Command line option to enable/disable the warning about a hash mismatch in
202 // the profile data.
203 static cl::opt<bool>
204  NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
205  cl::desc("Use this option to turn off/on "
206  "warnings about profile cfg mismatch."));
207 
208 // Command line option to enable/disable the warning about a hash mismatch in
209 // the profile data for Comdat functions, which often turns out to be false
210 // positive due to the pre-instrumentation inline.
211 static cl::opt<bool>
212  NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
213  cl::Hidden,
214  cl::desc("The option is used to turn on/off "
215  "warnings about hash mismatch for comdat "
216  "functions."));
217 
218 // Command line option to enable/disable select instruction instrumentation.
219 static cl::opt<bool>
220  PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
221  cl::desc("Use this option to turn on/off SELECT "
222  "instruction instrumentation. "));
223 
224 // Command line option to turn on CFG dot or text dump of raw profile counts
226  "pgo-view-raw-counts", cl::Hidden,
227  cl::desc("A boolean option to show CFG dag or text "
228  "with raw profile counts from "
229  "profile data. See also option "
230  "-pgo-view-counts. To limit graph "
231  "display to only one function, use "
232  "filtering option -view-bfi-func-name."),
233  cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
234  clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
235  clEnumValN(PGOVCT_Text, "text", "show in text.")));
236 
237 // Command line option to enable/disable memop intrinsic call.size profiling.
238 static cl::opt<bool>
239  PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
240  cl::desc("Use this option to turn on/off "
241  "memory intrinsic size profiling."));
242 
243 // Emit branch probability as optimization remarks.
244 static cl::opt<bool>
245  EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
246  cl::desc("When this option is on, the annotated "
247  "branch probability will be emitted as "
248  "optimization remarks: -{Rpass|"
249  "pass-remarks}=pgo-instrumentation"));
250 
252  "pgo-instrument-entry", cl::init(false), cl::Hidden,
253  cl::desc("Force to instrument function entry basicblock."));
254 
255 static cl::opt<bool>
256  PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
257  cl::desc("Fix function entry count in profile use."));
258 
260  "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
261  cl::desc("Print out the non-match BFI count if a hot raw profile count "
262  "becomes non-hot, or a cold raw profile count becomes hot. "
263  "The print is enabled under -Rpass-analysis=pgo, or "
264  "internal option -pass-remakrs-analysis=pgo."));
265 
267  "pgo-verify-bfi", cl::init(false), cl::Hidden,
268  cl::desc("Print out mismatched BFI counts after setting profile metadata "
269  "The print is enabled under -Rpass-analysis=pgo, or "
270  "internal option -pass-remakrs-analysis=pgo."));
271 
273  "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
274  cl::desc("Set the threshold for pgo-verify-big -- only print out "
275  "mismatched BFI if the difference percentage is greater than "
276  "this value (in percentage)."));
277 
279  "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
280  cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
281  "profile count value is below."));
282 
283 // Command line option to turn on CFG dot dump after profile annotation.
284 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
286 
287 // Command line option to specify the name of the function for CFG dump
288 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
290 
291 static cl::opt<bool>
292  PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
293  cl::desc("Use the old CFG function hashing"));
294 
295 // Return a string describing the branch condition that can be
296 // used in static branch probability heuristics:
297 static std::string getBranchCondString(Instruction *TI) {
298  BranchInst *BI = dyn_cast<BranchInst>(TI);
299  if (!BI || !BI->isConditional())
300  return std::string();
301 
302  Value *Cond = BI->getCondition();
303  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
304  if (!CI)
305  return std::string();
306 
307  std::string result;
309  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
310  CI->getOperand(0)->getType()->print(OS, true);
311 
312  Value *RHS = CI->getOperand(1);
313  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
314  if (CV) {
315  if (CV->isZero())
316  OS << "_Zero";
317  else if (CV->isOne())
318  OS << "_One";
319  else if (CV->isMinusOne())
320  OS << "_MinusOne";
321  else
322  OS << "_Const";
323  }
324  OS.flush();
325  return result;
326 }
327 
328 static const char *ValueProfKindDescr[] = {
329 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
331 };
332 
333 namespace {
334 
335 /// The select instruction visitor plays three roles specified
336 /// by the mode. In \c VM_counting mode, it simply counts the number of
337 /// select instructions. In \c VM_instrument mode, it inserts code to count
338 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
339 /// it reads the profile data and annotate the select instruction with metadata.
340 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
341 class PGOUseFunc;
342 
343 /// Instruction Visitor class to visit select instructions.
344 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
345  Function &F;
346  unsigned NSIs = 0; // Number of select instructions instrumented.
347  VisitMode Mode = VM_counting; // Visiting mode.
348  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
349  unsigned TotalNumCtrs = 0; // Total number of counters
350  GlobalVariable *FuncNameVar = nullptr;
351  uint64_t FuncHash = 0;
352  PGOUseFunc *UseFunc = nullptr;
353 
354  SelectInstVisitor(Function &Func) : F(Func) {}
355 
356  void countSelects(Function &Func) {
357  NSIs = 0;
358  Mode = VM_counting;
359  visit(Func);
360  }
361 
362  // Visit the IR stream and instrument all select instructions. \p
363  // Ind is a pointer to the counter index variable; \p TotalNC
364  // is the total number of counters; \p FNV is the pointer to the
365  // PGO function name var; \p FHash is the function hash.
366  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
367  GlobalVariable *FNV, uint64_t FHash) {
368  Mode = VM_instrument;
369  CurCtrIdx = Ind;
370  TotalNumCtrs = TotalNC;
371  FuncHash = FHash;
372  FuncNameVar = FNV;
373  visit(Func);
374  }
375 
376  // Visit the IR stream and annotate all select instructions.
377  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
378  Mode = VM_annotate;
379  UseFunc = UF;
380  CurCtrIdx = Ind;
381  visit(Func);
382  }
383 
384  void instrumentOneSelectInst(SelectInst &SI);
385  void annotateOneSelectInst(SelectInst &SI);
386 
387  // Visit \p SI instruction and perform tasks according to visit mode.
388  void visitSelectInst(SelectInst &SI);
389 
390  // Return the number of select instructions. This needs be called after
391  // countSelects().
392  unsigned getNumOfSelectInsts() const { return NSIs; }
393 };
394 
395 
396 class PGOInstrumentationGenLegacyPass : public ModulePass {
397 public:
398  static char ID;
399 
400  PGOInstrumentationGenLegacyPass(bool IsCS = false)
401  : ModulePass(ID), IsCS(IsCS) {
404  }
405 
406  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
407 
408 private:
409  // Is this is context-sensitive instrumentation.
410  bool IsCS;
411  bool runOnModule(Module &M) override;
412 
413  void getAnalysisUsage(AnalysisUsage &AU) const override {
416  }
417 };
418 
419 class PGOInstrumentationUseLegacyPass : public ModulePass {
420 public:
421  static char ID;
422 
423  // Provide the profile filename as the parameter.
424  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
425  : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
426  if (!PGOTestProfileFile.empty())
427  ProfileFileName = PGOTestProfileFile;
430  }
431 
432  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
433 
434 private:
435  std::string ProfileFileName;
436  // Is this is context-sensitive instrumentation use.
437  bool IsCS;
438 
439  bool runOnModule(Module &M) override;
440 
441  void getAnalysisUsage(AnalysisUsage &AU) const override {
445  }
446 };
447 
448 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
449 public:
450  static char ID;
451  StringRef getPassName() const override {
452  return "PGOInstrumentationGenCreateVarPass";
453  }
454  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
455  : ModulePass(ID), InstrProfileOutput(CSInstrName) {
458  }
459 
460 private:
461  bool runOnModule(Module &M) override {
462  createProfileFileNameVar(M, InstrProfileOutput);
463  createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
464  return false;
465  }
466  std::string InstrProfileOutput;
467 };
468 
469 } // end anonymous namespace
470 
472 
473 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
474  "PGO instrumentation.", false, false)
478 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
480 
482  return new PGOInstrumentationGenLegacyPass(IsCS);
483 }
484 
486 
487 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
488  "Read PGO instrumentation profile.", false, false)
492 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
494 
496  bool IsCS) {
497  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
498 }
499 
501 
502 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
503  "pgo-instr-gen-create-var",
504  "Create PGO instrumentation version variable for CSPGO.", false,
505  false)
506 
507 ModulePass *
509  return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
510 }
511 
512 namespace {
513 
514 /// An MST based instrumentation for PGO
515 ///
516 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
517 /// in the function level.
518 struct PGOEdge {
519  // This class implements the CFG edges. Note the CFG can be a multi-graph.
520  // So there might be multiple edges with same SrcBB and DestBB.
521  const BasicBlock *SrcBB;
522  const BasicBlock *DestBB;
523  uint64_t Weight;
524  bool InMST = false;
525  bool Removed = false;
526  bool IsCritical = false;
527 
528  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
529  : SrcBB(Src), DestBB(Dest), Weight(W) {}
530 
531  // Return the information string of an edge.
532  std::string infoString() const {
533  return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
534  (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
535  }
536 };
537 
538 // This class stores the auxiliary information for each BB.
539 struct BBInfo {
540  BBInfo *Group;
541  uint32_t Index;
542  uint32_t Rank = 0;
543 
544  BBInfo(unsigned IX) : Group(this), Index(IX) {}
545 
546  // Return the information string of this object.
547  std::string infoString() const {
548  return (Twine("Index=") + Twine(Index)).str();
549  }
550 
551  // Empty function -- only applicable to UseBBInfo.
552  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
553 
554  // Empty function -- only applicable to UseBBInfo.
555  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
556 };
557 
558 // This class implements the CFG edges. Note the CFG can be a multi-graph.
559 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
560 private:
561  Function &F;
562 
563  // Is this is context-sensitive instrumentation.
564  bool IsCS;
565 
566  // A map that stores the Comdat group in function F.
567  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
568 
570 
571  void computeCFGHash();
572  void renameComdatFunction();
573 
574 public:
575  std::vector<std::vector<VPCandidateInfo>> ValueSites;
576  SelectInstVisitor SIVisitor;
577  std::string FuncName;
578  GlobalVariable *FuncNameVar;
579 
580  // CFG hash value for this function.
581  uint64_t FunctionHash = 0;
582 
583  // The Minimum Spanning Tree of function CFG.
585 
586  // Collect all the BBs that will be instrumented, and store them in
587  // InstrumentBBs.
588  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
589 
590  // Give an edge, find the BB that will be instrumented.
591  // Return nullptr if there is no BB to be instrumented.
592  BasicBlock *getInstrBB(Edge *E);
593 
594  // Return the auxiliary BB information.
595  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
596 
597  // Return the auxiliary BB information if available.
598  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
599 
600  // Dump edges and BB information.
601  void dumpInfo(std::string Str = "") const {
602  MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
603  Twine(FunctionHash) + "\t" + Str);
604  }
605 
606  FuncPGOInstrumentation(
607  Function &Func, TargetLibraryInfo &TLI,
608  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
609  bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
610  BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
611  bool InstrumentFuncEntry = true)
612  : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
613  ValueSites(IPVK_Last + 1), SIVisitor(Func),
614  MST(F, InstrumentFuncEntry, BPI, BFI) {
615  // This should be done before CFG hash computation.
616  SIVisitor.countSelects(Func);
617  ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
618  if (!IsCS) {
619  NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
620  NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
621  NumOfPGOBB += MST.BBInfos.size();
622  ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
623  } else {
624  NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
625  NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
626  NumOfCSPGOBB += MST.BBInfos.size();
627  }
628 
629  FuncName = getPGOFuncName(F);
630  computeCFGHash();
631  if (!ComdatMembers.empty())
632  renameComdatFunction();
633  LLVM_DEBUG(dumpInfo("after CFGMST"));
634 
635  for (auto &E : MST.AllEdges) {
636  if (E->Removed)
637  continue;
638  IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
639  if (!E->InMST)
640  IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
641  }
642 
643  if (CreateGlobalVar)
644  FuncNameVar = createPGOFuncNameVar(F, FuncName);
645  }
646 };
647 
648 } // end anonymous namespace
649 
650 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
651 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
652 // of selects, indirect calls, mem ops and edges.
653 template <class Edge, class BBInfo>
654 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
655  std::vector<uint8_t> Indexes;
656  JamCRC JC;
657  for (auto &BB : F) {
658  const Instruction *TI = BB.getTerminator();
659  for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
660  BasicBlock *Succ = TI->getSuccessor(I);
661  auto BI = findBBInfo(Succ);
662  if (BI == nullptr)
663  continue;
664  uint32_t Index = BI->Index;
665  for (int J = 0; J < 4; J++)
666  Indexes.push_back((uint8_t)(Index >> (J * 8)));
667  }
668  }
669  JC.update(Indexes);
670 
671  JamCRC JCH;
672  if (PGOOldCFGHashing) {
673  // Hash format for context sensitive profile. Reserve 4 bits for other
674  // information.
675  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
676  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
677  //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
678  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
679  } else {
680  // The higher 32 bits.
681  auto updateJCH = [&JCH](uint64_t Num) {
682  uint8_t Data[8];
684  JCH.update(Data);
685  };
686  updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
687  updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
688  updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
689  updateJCH((uint64_t)MST.AllEdges.size());
690 
691  // Hash format for context sensitive profile. Reserve 4 bits for other
692  // information.
693  FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
694  }
695 
696  // Reserve bit 60-63 for other information purpose.
697  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
698  if (IsCS)
700  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
701  << " CRC = " << JC.getCRC()
702  << ", Selects = " << SIVisitor.getNumOfSelectInsts()
703  << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
704  << ValueSites[IPVK_IndirectCallTarget].size());
705  if (!PGOOldCFGHashing) {
706  LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
707  << ", High32 CRC = " << JCH.getCRC());
708  }
709  LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
710 }
711 
712 // Check if we can safely rename this Comdat function.
713 static bool canRenameComdat(
714  Function &F,
715  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
716  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
717  return false;
718 
719  // FIXME: Current only handle those Comdat groups that only containing one
720  // function.
721  // (1) For a Comdat group containing multiple functions, we need to have a
722  // unique postfix based on the hashes for each function. There is a
723  // non-trivial code refactoring to do this efficiently.
724  // (2) Variables can not be renamed, so we can not rename Comdat function in a
725  // group including global vars.
726  Comdat *C = F.getComdat();
727  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
728  assert(!isa<GlobalAlias>(CM.second));
729  Function *FM = dyn_cast<Function>(CM.second);
730  if (FM != &F)
731  return false;
732  }
733  return true;
734 }
735 
736 // Append the CFGHash to the Comdat function name.
737 template <class Edge, class BBInfo>
738 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
739  if (!canRenameComdat(F, ComdatMembers))
740  return;
741  std::string OrigName = F.getName().str();
742  std::string NewFuncName =
743  Twine(F.getName() + "." + Twine(FunctionHash)).str();
744  F.setName(Twine(NewFuncName));
746  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
747  Comdat *NewComdat;
748  Module *M = F.getParent();
749  // For AvailableExternallyLinkage functions, change the linkage to
750  // LinkOnceODR and put them into comdat. This is because after renaming, there
751  // is no backup external copy available for the function.
752  if (!F.hasComdat()) {
754  NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
756  F.setComdat(NewComdat);
757  return;
758  }
759 
760  // This function belongs to a single function Comdat group.
761  Comdat *OrigComdat = F.getComdat();
762  std::string NewComdatName =
763  Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
764  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
765  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
766 
767  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
768  // Must be a function.
769  cast<Function>(CM.second)->setComdat(NewComdat);
770  }
771 }
772 
773 // Collect all the BBs that will be instruments and return them in
774 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
775 template <class Edge, class BBInfo>
776 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
777  std::vector<BasicBlock *> &InstrumentBBs) {
778  // Use a worklist as we will update the vector during the iteration.
779  std::vector<Edge *> EdgeList;
780  EdgeList.reserve(MST.AllEdges.size());
781  for (auto &E : MST.AllEdges)
782  EdgeList.push_back(E.get());
783 
784  for (auto &E : EdgeList) {
785  BasicBlock *InstrBB = getInstrBB(E);
786  if (InstrBB)
787  InstrumentBBs.push_back(InstrBB);
788  }
789 
790  // Set up InEdges/OutEdges for all BBs.
791  for (auto &E : MST.AllEdges) {
792  if (E->Removed)
793  continue;
794  const BasicBlock *SrcBB = E->SrcBB;
795  const BasicBlock *DestBB = E->DestBB;
796  BBInfo &SrcInfo = getBBInfo(SrcBB);
797  BBInfo &DestInfo = getBBInfo(DestBB);
798  SrcInfo.addOutEdge(E.get());
799  DestInfo.addInEdge(E.get());
800  }
801 }
802 
803 // Given a CFG E to be instrumented, find which BB to place the instrumented
804 // code. The function will split the critical edge if necessary.
805 template <class Edge, class BBInfo>
807  if (E->InMST || E->Removed)
808  return nullptr;
809 
810  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
811  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
812  // For a fake edge, instrument the real BB.
813  if (SrcBB == nullptr)
814  return DestBB;
815  if (DestBB == nullptr)
816  return SrcBB;
817 
818  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
819  // There are basic blocks (such as catchswitch) cannot be instrumented.
820  // If the returned first insertion point is the end of BB, skip this BB.
821  if (BB->getFirstInsertionPt() == BB->end())
822  return nullptr;
823  return BB;
824  };
825 
826  // Instrument the SrcBB if it has a single successor,
827  // otherwise, the DestBB if this is not a critical edge.
828  Instruction *TI = SrcBB->getTerminator();
829  if (TI->getNumSuccessors() <= 1)
830  return canInstrument(SrcBB);
831  if (!E->IsCritical)
832  return canInstrument(DestBB);
833 
834  // Some IndirectBr critical edges cannot be split by the previous
835  // SplitIndirectBrCriticalEdges call. Bail out.
836  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
837  BasicBlock *InstrBB =
838  isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
839  if (!InstrBB) {
840  LLVM_DEBUG(
841  dbgs() << "Fail to split critical edge: not instrument this edge.\n");
842  return nullptr;
843  }
844  // For a critical edge, we have to split. Instrument the newly
845  // created BB.
846  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
847  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
848  << " --> " << getBBInfo(DestBB).Index << "\n");
849  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
850  MST.addEdge(SrcBB, InstrBB, 0);
851  // Second one: Add new edge of InstrBB->DestBB.
852  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
853  NewEdge1.InMST = true;
854  E->Removed = true;
855 
856  return canInstrument(InstrBB);
857 }
858 
859 // When generating value profiling calls on Windows routines that make use of
860 // handler funclets for exception processing an operand bundle needs to attached
861 // to the called function. This routine will set \p OpBundles to contain the
862 // funclet information, if any is needed, that should be placed on the generated
863 // value profiling call for the value profile candidate call.
864 static void
868  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
869  if (OrigCall && !isa<IntrinsicInst>(OrigCall)) {
870  // The instrumentation call should belong to the same funclet as a
871  // non-intrinsic call, so just copy the operand bundle, if any exists.
872  Optional<OperandBundleUse> ParentFunclet =
873  OrigCall->getOperandBundle(LLVMContext::OB_funclet);
874  if (ParentFunclet)
875  OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
876  } else {
877  // Intrinsics or other instructions do not get funclet information from the
878  // front-end. Need to use the BlockColors that was computed by the routine
879  // colorEHFunclets to determine whether a funclet is needed.
880  if (!BlockColors.empty()) {
881  const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
882  assert(CV.size() == 1 && "non-unique color for block!");
883  Instruction *EHPad = CV.front()->getFirstNonPHI();
884  if (EHPad->isEHPad())
885  OpBundles.emplace_back("funclet", EHPad);
886  }
887  }
888 }
889 
890 // Visit all edge and instrument the edges not in MST, and do value profiling.
891 // Critical edges will be split.
892 static void instrumentOneFunc(
895  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
896  bool IsCS) {
897  // Split indirectbr critical edges here before computing the MST rather than
898  // later in getInstrBB() to avoid invalidating it.
900 
901  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
902  F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
903  std::vector<BasicBlock *> InstrumentBBs;
904  FuncInfo.getInstrumentBBs(InstrumentBBs);
905  unsigned NumCounters =
906  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
907 
908  uint32_t I = 0;
909  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
910  for (auto *InstrBB : InstrumentBBs) {
911  IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
912  assert(Builder.GetInsertPoint() != InstrBB->end() &&
913  "Cannot get the Instrumentation point");
914  Builder.CreateCall(
915  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
916  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
917  Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
918  Builder.getInt32(I++)});
919  }
920 
921  // Now instrument select instructions:
922  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
923  FuncInfo.FunctionHash);
924  assert(I == NumCounters);
925 
927  return;
928 
929  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
930 
931  // Intrinsic function calls do not have funclet operand bundles needed for
932  // Windows exception handling attached to them. However, if value profiling is
933  // inserted for one of these calls, then a funclet value will need to be set
934  // on the instrumentation call based on the funclet coloring.
936  if (F.hasPersonalityFn() &&
937  isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
938  BlockColors = colorEHFunclets(F);
939 
940  // For each VP Kind, walk the VP candidates and instrument each one.
941  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
942  unsigned SiteIndex = 0;
943  if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
944  continue;
945 
946  for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
947  LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
948  << " site: CallSite Index = " << SiteIndex << "\n");
949 
950  IRBuilder<> Builder(Cand.InsertPt);
951  assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
952  "Cannot get the Instrumentation point");
953 
954  Value *ToProfile = nullptr;
955  if (Cand.V->getType()->isIntegerTy())
956  ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
957  else if (Cand.V->getType()->isPointerTy())
958  ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
959  assert(ToProfile && "value profiling Value is of unexpected type");
960 
962  populateEHOperandBundle(Cand, BlockColors, OpBundles);
963  Builder.CreateCall(
964  Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
965  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
966  Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
967  Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
968  OpBundles);
969  }
970  } // IPVK_First <= Kind <= IPVK_Last
971 }
972 
973 namespace {
974 
975 // This class represents a CFG edge in profile use compilation.
976 struct PGOUseEdge : public PGOEdge {
977  bool CountValid = false;
978  uint64_t CountValue = 0;
979 
980  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
981  : PGOEdge(Src, Dest, W) {}
982 
983  // Set edge count value
984  void setEdgeCount(uint64_t Value) {
985  CountValue = Value;
986  CountValid = true;
987  }
988 
989  // Return the information string for this object.
990  std::string infoString() const {
991  if (!CountValid)
992  return PGOEdge::infoString();
993  return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
994  .str();
995  }
996 };
997 
998 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
999 
1000 // This class stores the auxiliary information for each BB.
1001 struct UseBBInfo : public BBInfo {
1002  uint64_t CountValue = 0;
1003  bool CountValid;
1004  int32_t UnknownCountInEdge = 0;
1005  int32_t UnknownCountOutEdge = 0;
1006  DirectEdges InEdges;
1007  DirectEdges OutEdges;
1008 
1009  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
1010 
1011  UseBBInfo(unsigned IX, uint64_t C)
1012  : BBInfo(IX), CountValue(C), CountValid(true) {}
1013 
1014  // Set the profile count value for this BB.
1015  void setBBInfoCount(uint64_t Value) {
1016  CountValue = Value;
1017  CountValid = true;
1018  }
1019 
1020  // Return the information string of this object.
1021  std::string infoString() const {
1022  if (!CountValid)
1023  return BBInfo::infoString();
1024  return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
1025  }
1026 
1027  // Add an OutEdge and update the edge count.
1028  void addOutEdge(PGOUseEdge *E) {
1029  OutEdges.push_back(E);
1030  UnknownCountOutEdge++;
1031  }
1032 
1033  // Add an InEdge and update the edge count.
1034  void addInEdge(PGOUseEdge *E) {
1035  InEdges.push_back(E);
1036  UnknownCountInEdge++;
1037  }
1038 };
1039 
1040 } // end anonymous namespace
1041 
1042 // Sum up the count values for all the edges.
1043 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
1044  uint64_t Total = 0;
1045  for (auto &E : Edges) {
1046  if (E->Removed)
1047  continue;
1048  Total += E->CountValue;
1049  }
1050  return Total;
1051 }
1052 
1053 namespace {
1054 
1055 class PGOUseFunc {
1056 public:
1057  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1058  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1060  ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1061  : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1062  FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1063  InstrumentFuncEntry),
1064  FreqAttr(FFA_Normal), IsCS(IsCS) {}
1065 
1066  // Read counts for the instrumented BB from profile.
1067  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1068  bool &AllMinusOnes);
1069 
1070  // Populate the counts for all BBs.
1071  void populateCounters();
1072 
1073  // Set the branch weights based on the count values.
1074  void setBranchWeights();
1075 
1076  // Annotate the value profile call sites for all value kind.
1077  void annotateValueSites();
1078 
1079  // Annotate the value profile call sites for one value kind.
1080  void annotateValueSites(uint32_t Kind);
1081 
1082  // Annotate the irreducible loop header weights.
1083  void annotateIrrLoopHeaderWeights();
1084 
1085  // The hotness of the function from the profile count.
1086  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1087 
1088  // Return the function hotness from the profile.
1089  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1090 
1091  // Return the function hash.
1092  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1093 
1094  // Return the profile record for this function;
1095  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1096 
1097  // Return the auxiliary BB information.
1098  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1099  return FuncInfo.getBBInfo(BB);
1100  }
1101 
1102  // Return the auxiliary BB information if available.
1103  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1104  return FuncInfo.findBBInfo(BB);
1105  }
1106 
1107  Function &getFunc() const { return F; }
1108 
1109  void dumpInfo(std::string Str = "") const {
1110  FuncInfo.dumpInfo(Str);
1111  }
1112 
1113  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1114 private:
1115  Function &F;
1116  Module *M;
1118  ProfileSummaryInfo *PSI;
1119 
1120  // This member stores the shared information with class PGOGenFunc.
1121  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1122 
1123  // The maximum count value in the profile. This is only used in PGO use
1124  // compilation.
1125  uint64_t ProgramMaxCount;
1126 
1127  // Position of counter that remains to be read.
1128  uint32_t CountPosition = 0;
1129 
1130  // Total size of the profile count for this function.
1131  uint32_t ProfileCountSize = 0;
1132 
1133  // ProfileRecord for this function.
1134  InstrProfRecord ProfileRecord;
1135 
1136  // Function hotness info derived from profile.
1137  FuncFreqAttr FreqAttr;
1138 
1139  // Is to use the context sensitive profile.
1140  bool IsCS;
1141 
1142  // Find the Instrumented BB and set the value. Return false on error.
1143  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1144 
1145  // Set the edge counter value for the unknown edge -- there should be only
1146  // one unknown edge.
1147  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1148 
1149  // Return FuncName string;
1150  std::string getFuncName() const { return FuncInfo.FuncName; }
1151 
1152  // Set the hot/cold inline hints based on the count values.
1153  // FIXME: This function should be removed once the functionality in
1154  // the inliner is implemented.
1155  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1156  if (PSI->isHotCount(EntryCount))
1157  FreqAttr = FFA_Hot;
1158  else if (PSI->isColdCount(MaxCount))
1159  FreqAttr = FFA_Cold;
1160  }
1161 };
1162 
1163 } // end anonymous namespace
1164 
1165 // Visit all the edges and assign the count value for the instrumented
1166 // edges and the BB. Return false on error.
1167 bool PGOUseFunc::setInstrumentedCounts(
1168  const std::vector<uint64_t> &CountFromProfile) {
1169 
1170  std::vector<BasicBlock *> InstrumentBBs;
1171  FuncInfo.getInstrumentBBs(InstrumentBBs);
1172  unsigned NumCounters =
1173  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1174  // The number of counters here should match the number of counters
1175  // in profile. Return if they mismatch.
1176  if (NumCounters != CountFromProfile.size()) {
1177  return false;
1178  }
1179  auto *FuncEntry = &*F.begin();
1180 
1181  // Set the profile count to the Instrumented BBs.
1182  uint32_t I = 0;
1183  for (BasicBlock *InstrBB : InstrumentBBs) {
1184  uint64_t CountValue = CountFromProfile[I++];
1185  UseBBInfo &Info = getBBInfo(InstrBB);
1186  // If we reach here, we know that we have some nonzero count
1187  // values in this function. The entry count should not be 0.
1188  // Fix it if necessary.
1189  if (InstrBB == FuncEntry && CountValue == 0)
1190  CountValue = 1;
1191  Info.setBBInfoCount(CountValue);
1192  }
1193  ProfileCountSize = CountFromProfile.size();
1194  CountPosition = I;
1195 
1196  // Set the edge count and update the count of unknown edges for BBs.
1197  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1198  E->setEdgeCount(Value);
1199  this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1200  this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1201  };
1202 
1203  // Set the profile count the Instrumented edges. There are BBs that not in
1204  // MST but not instrumented. Need to set the edge count value so that we can
1205  // populate the profile counts later.
1206  for (auto &E : FuncInfo.MST.AllEdges) {
1207  if (E->Removed || E->InMST)
1208  continue;
1209  const BasicBlock *SrcBB = E->SrcBB;
1210  UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1211 
1212  // If only one out-edge, the edge profile count should be the same as BB
1213  // profile count.
1214  if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1215  setEdgeCount(E.get(), SrcInfo.CountValue);
1216  else {
1217  const BasicBlock *DestBB = E->DestBB;
1218  UseBBInfo &DestInfo = getBBInfo(DestBB);
1219  // If only one in-edge, the edge profile count should be the same as BB
1220  // profile count.
1221  if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1222  setEdgeCount(E.get(), DestInfo.CountValue);
1223  }
1224  if (E->CountValid)
1225  continue;
1226  // E's count should have been set from profile. If not, this meenas E skips
1227  // the instrumentation. We set the count to 0.
1228  setEdgeCount(E.get(), 0);
1229  }
1230  return true;
1231 }
1232 
1233 // Set the count value for the unknown edge. There should be one and only one
1234 // unknown edge in Edges vector.
1235 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1236  for (auto &E : Edges) {
1237  if (E->CountValid)
1238  continue;
1239  E->setEdgeCount(Value);
1240 
1241  getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1242  getBBInfo(E->DestBB).UnknownCountInEdge--;
1243  return;
1244  }
1245  llvm_unreachable("Cannot find the unknown count edge");
1246 }
1247 
1248 // Emit function metadata indicating PGO profile mismatch.
1250  LLVMContext &ctx) {
1251  const char MetadataName[] = "instr_prof_hash_mismatch";
1253  // If this metadata already exists, ignore.
1254  auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1255  if (Existing) {
1256  MDTuple *Tuple = cast<MDTuple>(Existing);
1257  for (auto &N : Tuple->operands()) {
1258  if (cast<MDString>(N.get())->getString() == MetadataName)
1259  return;
1260  Names.push_back(N.get());
1261  }
1262  }
1263 
1264  MDBuilder MDB(ctx);
1265  Names.push_back(MDB.createString(MetadataName));
1266  MDNode *MD = MDTuple::get(ctx, Names);
1267  F.setMetadata(LLVMContext::MD_annotation, MD);
1268 }
1269 
1270 // Read the profile from ProfileFileName and assign the value to the
1271 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1272 // Return true if the profile are successfully read, and false on errors.
1273 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1274  bool &AllMinusOnes) {
1275  auto &Ctx = M->getContext();
1277  PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1278  if (Error E = Result.takeError()) {
1279  handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1280  auto Err = IPE.get();
1281  bool SkipWarning = false;
1282  LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1283  << FuncInfo.FuncName << ": ");
1284  if (Err == instrprof_error::unknown_function) {
1285  IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1286  SkipWarning = !PGOWarnMissing;
1287  LLVM_DEBUG(dbgs() << "unknown function");
1288  } else if (Err == instrprof_error::hash_mismatch ||
1289  Err == instrprof_error::malformed) {
1290  IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1291  SkipWarning =
1294  (F.hasComdat() ||
1295  F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1296  LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1297  // Emit function metadata indicating PGO profile mismatch.
1298  annotateFunctionWithHashMismatch(F, M->getContext());
1299  }
1300 
1301  LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1302  if (SkipWarning)
1303  return;
1304 
1305  std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1306  std::string(" Hash = ") +
1307  std::to_string(FuncInfo.FunctionHash);
1308 
1309  Ctx.diagnose(
1310  DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1311  });
1312  return false;
1313  }
1314  ProfileRecord = std::move(Result.get());
1315  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1316 
1317  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1318  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1319  AllMinusOnes = (CountFromProfile.size() > 0);
1320  uint64_t ValueSum = 0;
1321  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1322  LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1323  ValueSum += CountFromProfile[I];
1324  if (CountFromProfile[I] != (uint64_t)-1)
1325  AllMinusOnes = false;
1326  }
1327  AllZeros = (ValueSum == 0);
1328 
1329  LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1330 
1331  getBBInfo(nullptr).UnknownCountOutEdge = 2;
1332  getBBInfo(nullptr).UnknownCountInEdge = 2;
1333 
1334  if (!setInstrumentedCounts(CountFromProfile)) {
1335  LLVM_DEBUG(
1336  dbgs() << "Inconsistent number of counts, skipping this function");
1337  Ctx.diagnose(DiagnosticInfoPGOProfile(
1338  M->getName().data(),
1339  Twine("Inconsistent number of counts in ") + F.getName().str()
1340  + Twine(": the profile may be stale or there is a function name collision."),
1341  DS_Warning));
1342  return false;
1343  }
1344  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1345  return true;
1346 }
1347 
1348 // Populate the counters from instrumented BBs to all BBs.
1349 // In the end of this operation, all BBs should have a valid count value.
1350 void PGOUseFunc::populateCounters() {
1351  bool Changes = true;
1352  unsigned NumPasses = 0;
1353  while (Changes) {
1354  NumPasses++;
1355  Changes = false;
1356 
1357  // For efficient traversal, it's better to start from the end as most
1358  // of the instrumented edges are at the end.
1359  for (auto &BB : reverse(F)) {
1360  UseBBInfo *Count = findBBInfo(&BB);
1361  if (Count == nullptr)
1362  continue;
1363  if (!Count->CountValid) {
1364  if (Count->UnknownCountOutEdge == 0) {
1365  Count->CountValue = sumEdgeCount(Count->OutEdges);
1366  Count->CountValid = true;
1367  Changes = true;
1368  } else if (Count->UnknownCountInEdge == 0) {
1369  Count->CountValue = sumEdgeCount(Count->InEdges);
1370  Count->CountValid = true;
1371  Changes = true;
1372  }
1373  }
1374  if (Count->CountValid) {
1375  if (Count->UnknownCountOutEdge == 1) {
1376  uint64_t Total = 0;
1377  uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1378  // If the one of the successor block can early terminate (no-return),
1379  // we can end up with situation where out edge sum count is larger as
1380  // the source BB's count is collected by a post-dominated block.
1381  if (Count->CountValue > OutSum)
1382  Total = Count->CountValue - OutSum;
1383  setEdgeCount(Count->OutEdges, Total);
1384  Changes = true;
1385  }
1386  if (Count->UnknownCountInEdge == 1) {
1387  uint64_t Total = 0;
1388  uint64_t InSum = sumEdgeCount(Count->InEdges);
1389  if (Count->CountValue > InSum)
1390  Total = Count->CountValue - InSum;
1391  setEdgeCount(Count->InEdges, Total);
1392  Changes = true;
1393  }
1394  }
1395  }
1396  }
1397 
1398  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1399 #ifndef NDEBUG
1400  // Assert every BB has a valid counter.
1401  for (auto &BB : F) {
1402  auto BI = findBBInfo(&BB);
1403  if (BI == nullptr)
1404  continue;
1405  assert(BI->CountValid && "BB count is not valid");
1406  }
1407 #endif
1408  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1409  uint64_t FuncMaxCount = FuncEntryCount;
1410  for (auto &BB : F) {
1411  auto BI = findBBInfo(&BB);
1412  if (BI == nullptr)
1413  continue;
1414  FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1415  }
1416 
1417  // Fix the obviously inconsistent entry count.
1418  if (FuncMaxCount > 0 && FuncEntryCount == 0)
1419  FuncEntryCount = 1;
1420  F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1421  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1422 
1423  // Now annotate select instructions
1424  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1425  assert(CountPosition == ProfileCountSize);
1426 
1427  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1428 }
1429 
1430 // Assign the scaled count values to the BB with multiple out edges.
1432  // Generate MD_prof metadata for every branch instruction.
1433  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1434  << " IsCS=" << IsCS << "\n");
1435  for (auto &BB : F) {
1436  Instruction *TI = BB.getTerminator();
1437  if (TI->getNumSuccessors() < 2)
1438  continue;
1439  if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1440  isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
1441  continue;
1442 
1443  if (getBBInfo(&BB).CountValue == 0)
1444  continue;
1445 
1446  // We have a non-zero Branch BB.
1447  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1448  unsigned Size = BBCountInfo.OutEdges.size();
1449  SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1450  uint64_t MaxCount = 0;
1451  for (unsigned s = 0; s < Size; s++) {
1452  const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1453  const BasicBlock *SrcBB = E->SrcBB;
1454  const BasicBlock *DestBB = E->DestBB;
1455  if (DestBB == nullptr)
1456  continue;
1457  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1458  uint64_t EdgeCount = E->CountValue;
1459  if (EdgeCount > MaxCount)
1460  MaxCount = EdgeCount;
1461  EdgeCounts[SuccNum] = EdgeCount;
1462  }
1463  setProfMetadata(M, TI, EdgeCounts, MaxCount);
1464  }
1465 }
1466 
1468  for (BasicBlock *Pred : predecessors(BB)) {
1469  if (isa<IndirectBrInst>(Pred->getTerminator()))
1470  return true;
1471  }
1472  return false;
1473 }
1474 
1475 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1476  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1477  // Find irr loop headers
1478  for (auto &BB : F) {
1479  // As a heuristic also annotate indrectbr targets as they have a high chance
1480  // to become an irreducible loop header after the indirectbr tail
1481  // duplication.
1482  if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1483  Instruction *TI = BB.getTerminator();
1484  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1485  setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1486  }
1487  }
1488 }
1489 
1490 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1491  Module *M = F.getParent();
1493  Type *Int64Ty = Builder.getInt64Ty();
1494  Type *I8PtrTy = Builder.getInt8PtrTy();
1495  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1496  Builder.CreateCall(
1497  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1498  {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1499  Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1500  Builder.getInt32(*CurCtrIdx), Step});
1501  ++(*CurCtrIdx);
1502 }
1503 
1504 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1505  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1506  assert(*CurCtrIdx < CountFromProfile.size() &&
1507  "Out of bound access of counters");
1508  uint64_t SCounts[2];
1509  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1510  ++(*CurCtrIdx);
1511  uint64_t TotalCount = 0;
1512  auto BI = UseFunc->findBBInfo(SI.getParent());
1513  if (BI != nullptr)
1514  TotalCount = BI->CountValue;
1515  // False Count
1516  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1517  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1518  if (MaxCount)
1519  setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1520 }
1521 
1522 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1523  if (!PGOInstrSelect)
1524  return;
1525  // FIXME: do not handle this yet.
1526  if (SI.getCondition()->getType()->isVectorTy())
1527  return;
1528 
1529  switch (Mode) {
1530  case VM_counting:
1531  NSIs++;
1532  return;
1533  case VM_instrument:
1534  instrumentOneSelectInst(SI);
1535  return;
1536  case VM_annotate:
1537  annotateOneSelectInst(SI);
1538  return;
1539  }
1540 
1541  llvm_unreachable("Unknown visiting mode");
1542 }
1543 
1544 // Traverse all valuesites and annotate the instructions for all value kind.
1545 void PGOUseFunc::annotateValueSites() {
1547  return;
1548 
1549  // Create the PGOFuncName meta data.
1550  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1551 
1552  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1553  annotateValueSites(Kind);
1554 }
1555 
1556 // Annotate the instructions for a specific value kind.
1557 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1558  assert(Kind <= IPVK_Last);
1559  unsigned ValueSiteIndex = 0;
1560  auto &ValueSites = FuncInfo.ValueSites[Kind];
1561  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1562  if (NumValueSites != ValueSites.size()) {
1563  auto &Ctx = M->getContext();
1564  Ctx.diagnose(DiagnosticInfoPGOProfile(
1565  M->getName().data(),
1566  Twine("Inconsistent number of value sites for ") +
1568  Twine(" profiling in \"") + F.getName().str() +
1569  Twine("\", possibly due to the use of a stale profile."),
1570  DS_Warning));
1571  return;
1572  }
1573 
1574  for (VPCandidateInfo &I : ValueSites) {
1575  LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1576  << "): Index = " << ValueSiteIndex << " out of "
1577  << NumValueSites << "\n");
1578  annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1579  static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1580  Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1581  : MaxNumAnnotations);
1582  ValueSiteIndex++;
1583  }
1584 }
1585 
1586 // Collect the set of members for each Comdat in module M and store
1587 // in ComdatMembers.
1589  Module &M,
1590  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1591  if (!DoComdatRenaming)
1592  return;
1593  for (Function &F : M)
1594  if (Comdat *C = F.getComdat())
1595  ComdatMembers.insert(std::make_pair(C, &F));
1596  for (GlobalVariable &GV : M.globals())
1597  if (Comdat *C = GV.getComdat())
1598  ComdatMembers.insert(std::make_pair(C, &GV));
1599  for (GlobalAlias &GA : M.aliases())
1600  if (Comdat *C = GA.getComdat())
1601  ComdatMembers.insert(std::make_pair(C, &GA));
1602 }
1603 
1605  Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1607  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1608  // For the context-sensitve instrumentation, we should have a separated pass
1609  // (before LTO/ThinLTO linking) to create these variables.
1610  if (!IsCS)
1611  createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
1612  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1613  collectComdatMembers(M, ComdatMembers);
1614 
1615  for (auto &F : M) {
1616  if (F.isDeclaration())
1617  continue;
1618  if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1619  continue;
1620  auto &TLI = LookupTLI(F);
1621  auto *BPI = LookupBPI(F);
1622  auto *BFI = LookupBFI(F);
1623  instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1624  }
1625  return true;
1626 }
1627 
1630  createProfileFileNameVar(M, CSInstrName);
1632  return PreservedAnalyses::all();
1633 }
1634 
1635 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1636  if (skipModule(M))
1637  return false;
1638 
1639  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
1640  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1641  };
1642  auto LookupBPI = [this](Function &F) {
1643  return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1644  };
1645  auto LookupBFI = [this](Function &F) {
1646  return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1647  };
1648  return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
1649 }
1650 
1652  ModuleAnalysisManager &AM) {
1653  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1654  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1655  return FAM.getResult<TargetLibraryAnalysis>(F);
1656  };
1657  auto LookupBPI = [&FAM](Function &F) {
1658  return &FAM.getResult<BranchProbabilityAnalysis>(F);
1659  };
1660  auto LookupBFI = [&FAM](Function &F) {
1661  return &FAM.getResult<BlockFrequencyAnalysis>(F);
1662  };
1663 
1664  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1665  return PreservedAnalyses::all();
1666 
1667  return PreservedAnalyses::none();
1668 }
1669 
1670 // Using the ratio b/w sums of profile count values and BFI count values to
1671 // adjust the func entry count.
1672 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1673  BranchProbabilityInfo &NBPI) {
1674  Function &F = Func.getFunc();
1675  BlockFrequencyInfo NBFI(F, NBPI, LI);
1676 #ifndef NDEBUG
1677  auto BFIEntryCount = F.getEntryCount();
1678  assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
1679  "Invalid BFI Entrycount");
1680 #endif
1681  auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1682  auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1683  for (auto &BBI : F) {
1684  uint64_t CountValue = 0;
1685  uint64_t BFICountValue = 0;
1686  if (!Func.findBBInfo(&BBI))
1687  continue;
1688  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1689  CountValue = Func.getBBInfo(&BBI).CountValue;
1690  BFICountValue = BFICount.getValue();
1691  SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1692  SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1693  }
1694  if (SumCount.isZero())
1695  return;
1696 
1697  assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1698  "Incorrect sum of BFI counts");
1699  if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1700  return;
1701  double Scale = (SumCount / SumBFICount).convertToDouble();
1702  if (Scale < 1.001 && Scale > 0.999)
1703  return;
1704 
1705  uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1706  uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1707  if (NewEntryCount == 0)
1708  NewEntryCount = 1;
1709  if (NewEntryCount != FuncEntryCount) {
1710  F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1711  LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1712  << ", entry_count " << FuncEntryCount << " --> "
1713  << NewEntryCount << "\n");
1714  }
1715 }
1716 
1717 // Compare the profile count values with BFI count values, and print out
1718 // the non-matching ones.
1719 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1720  BranchProbabilityInfo &NBPI,
1721  uint64_t HotCountThreshold,
1722  uint64_t ColdCountThreshold) {
1723  Function &F = Func.getFunc();
1724  BlockFrequencyInfo NBFI(F, NBPI, LI);
1725  // bool PrintFunc = false;
1726  bool HotBBOnly = PGOVerifyHotBFI;
1727  std::string Msg;
1729 
1730  unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1731  for (auto &BBI : F) {
1732  uint64_t CountValue = 0;
1733  uint64_t BFICountValue = 0;
1734 
1735  if (Func.getBBInfo(&BBI).CountValid)
1736  CountValue = Func.getBBInfo(&BBI).CountValue;
1737 
1738  BBNum++;
1739  if (CountValue)
1740  NonZeroBBNum++;
1741  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1742  if (BFICount)
1743  BFICountValue = BFICount.getValue();
1744 
1745  if (HotBBOnly) {
1746  bool rawIsHot = CountValue >= HotCountThreshold;
1747  bool BFIIsHot = BFICountValue >= HotCountThreshold;
1748  bool rawIsCold = CountValue <= ColdCountThreshold;
1749  bool ShowCount = false;
1750  if (rawIsHot && !BFIIsHot) {
1751  Msg = "raw-Hot to BFI-nonHot";
1752  ShowCount = true;
1753  } else if (rawIsCold && BFIIsHot) {
1754  Msg = "raw-Cold to BFI-Hot";
1755  ShowCount = true;
1756  }
1757  if (!ShowCount)
1758  continue;
1759  } else {
1760  if ((CountValue < PGOVerifyBFICutoff) &&
1761  (BFICountValue < PGOVerifyBFICutoff))
1762  continue;
1763  uint64_t Diff = (BFICountValue >= CountValue)
1764  ? BFICountValue - CountValue
1765  : CountValue - BFICountValue;
1766  if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
1767  continue;
1768  }
1769  BBMisMatchNum++;
1770 
1771  ORE.emit([&]() {
1773  F.getSubprogram(), &BBI);
1774  Remark << "BB " << ore::NV("Block", BBI.getName())
1775  << " Count=" << ore::NV("Count", CountValue)
1776  << " BFI_Count=" << ore::NV("Count", BFICountValue);
1777  if (!Msg.empty())
1778  Remark << " (" << Msg << ")";
1779  return Remark;
1780  });
1781  }
1782  if (BBMisMatchNum)
1783  ORE.emit([&]() {
1784  return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1785  F.getSubprogram(), &F.getEntryBlock())
1786  << "In Func " << ore::NV("Function", F.getName())
1787  << ": Num_of_BB=" << ore::NV("Count", BBNum)
1788  << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1789  << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1790  });
1791 }
1792 
1794  Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1795  function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1797  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1798  ProfileSummaryInfo *PSI, bool IsCS) {
1799  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1800  auto &Ctx = M.getContext();
1801  // Read the counter array from file.
1802  auto ReaderOrErr =
1803  IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1804  if (Error E = ReaderOrErr.takeError()) {
1805  handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1806  Ctx.diagnose(
1807  DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1808  });
1809  return false;
1810  }
1811 
1812  std::unique_ptr<IndexedInstrProfReader> PGOReader =
1813  std::move(ReaderOrErr.get());
1814  if (!PGOReader) {
1815  Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1816  StringRef("Cannot get PGOReader")));
1817  return false;
1818  }
1819  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1820  return false;
1821 
1822  // TODO: might need to change the warning once the clang option is finalized.
1823  if (!PGOReader->isIRLevelProfile()) {
1824  Ctx.diagnose(DiagnosticInfoPGOProfile(
1825  ProfileFileName.data(), "Not an IR level instrumentation profile"));
1826  return false;
1827  }
1828 
1829  // Add the profile summary (read from the header of the indexed summary) here
1830  // so that we can use it below when reading counters (which checks if the
1831  // function should be marked with a cold or inlinehint attribute).
1832  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1835  PSI->refresh();
1836 
1837  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1838  collectComdatMembers(M, ComdatMembers);
1839  std::vector<Function *> HotFunctions;
1840  std::vector<Function *> ColdFunctions;
1841 
1842  // If the profile marked as always instrument the entry BB, do the
1843  // same. Note this can be overwritten by the internal option in CFGMST.h
1844  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
1846  InstrumentFuncEntry = PGOInstrumentEntry;
1847  for (auto &F : M) {
1848  if (F.isDeclaration())
1849  continue;
1850  auto &TLI = LookupTLI(F);
1851  auto *BPI = LookupBPI(F);
1852  auto *BFI = LookupBFI(F);
1853  // Split indirectbr critical edges here before computing the MST rather than
1854  // later in getInstrBB() to avoid invalidating it.
1856  PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
1857  InstrumentFuncEntry);
1858  // When AllMinusOnes is true, it means the profile for the function
1859  // is unrepresentative and this function is actually hot. Set the
1860  // entry count of the function to be multiple times of hot threshold
1861  // and drop all its internal counters.
1862  bool AllMinusOnes = false;
1863  bool AllZeros = false;
1864  if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
1865  continue;
1866  if (AllZeros) {
1867  F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1868  if (Func.getProgramMaxCount() != 0)
1869  ColdFunctions.push_back(&F);
1870  continue;
1871  }
1872  const unsigned MultiplyFactor = 3;
1873  if (AllMinusOnes) {
1874  uint64_t HotThreshold = PSI->getHotCountThreshold();
1875  if (HotThreshold)
1876  F.setEntryCount(
1877  ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
1878  HotFunctions.push_back(&F);
1879  continue;
1880  }
1881  Func.populateCounters();
1882  Func.setBranchWeights();
1883  Func.annotateValueSites();
1884  Func.annotateIrrLoopHeaderWeights();
1885  PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1886  if (FreqAttr == PGOUseFunc::FFA_Cold)
1887  ColdFunctions.push_back(&F);
1888  else if (FreqAttr == PGOUseFunc::FFA_Hot)
1889  HotFunctions.push_back(&F);
1890  if (PGOViewCounts != PGOVCT_None &&
1891  (ViewBlockFreqFuncName.empty() ||
1892  F.getName().equals(ViewBlockFreqFuncName))) {
1893  LoopInfo LI{DominatorTree(F)};
1894  std::unique_ptr<BranchProbabilityInfo> NewBPI =
1895  std::make_unique<BranchProbabilityInfo>(F, LI);
1896  std::unique_ptr<BlockFrequencyInfo> NewBFI =
1897  std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1898  if (PGOViewCounts == PGOVCT_Graph)
1899  NewBFI->view();
1900  else if (PGOViewCounts == PGOVCT_Text) {
1901  dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1902  NewBFI->print(dbgs());
1903  }
1904  }
1905  if (PGOViewRawCounts != PGOVCT_None &&
1906  (ViewBlockFreqFuncName.empty() ||
1907  F.getName().equals(ViewBlockFreqFuncName))) {
1909  if (ViewBlockFreqFuncName.empty())
1910  WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1911  else
1912  ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1913  else if (PGOViewRawCounts == PGOVCT_Text) {
1914  dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1915  Func.dumpInfo();
1916  }
1917  }
1918 
1920  LoopInfo LI{DominatorTree(F)};
1921  BranchProbabilityInfo NBPI(F, LI);
1922 
1923  // Fix func entry count.
1924  if (PGOFixEntryCount)
1925  fixFuncEntryCount(Func, LI, NBPI);
1926 
1927  // Verify BlockFrequency information.
1928  uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
1929  if (PGOVerifyHotBFI) {
1930  HotCountThreshold = PSI->getOrCompHotCountThreshold();
1932  }
1933  verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
1934  }
1935  }
1936 
1937  // Set function hotness attribute from the profile.
1938  // We have to apply these attributes at the end because their presence
1939  // can affect the BranchProbabilityInfo of any callers, resulting in an
1940  // inconsistent MST between prof-gen and prof-use.
1941  for (auto &F : HotFunctions) {
1942  F->addFnAttr(Attribute::InlineHint);
1943  LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1944  << "\n");
1945  }
1946  for (auto &F : ColdFunctions) {
1947  // Only set when there is no Attribute::Hot set by the user. For Hot
1948  // attribute, user's annotation has the precedence over the profile.
1949  if (F->hasFnAttribute(Attribute::Hot)) {
1950  auto &Ctx = M.getContext();
1951  std::string Msg = std::string("Function ") + F->getName().str() +
1952  std::string(" is annotated as a hot function but"
1953  " the profile is cold");
1954  Ctx.diagnose(
1955  DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1956  continue;
1957  }
1958  F->addFnAttr(Attribute::Cold);
1959  LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1960  << "\n");
1961  }
1962  return true;
1963 }
1964 
1966  std::string RemappingFilename,
1967  bool IsCS)
1968  : ProfileFileName(std::move(Filename)),
1969  ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1970  if (!PGOTestProfileFile.empty())
1971  ProfileFileName = PGOTestProfileFile;
1972  if (!PGOTestProfileRemappingFile.empty())
1973  ProfileRemappingFileName = PGOTestProfileRemappingFile;
1974 }
1975 
1977  ModuleAnalysisManager &AM) {
1978 
1979  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1980  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1981  return FAM.getResult<TargetLibraryAnalysis>(F);
1982  };
1983  auto LookupBPI = [&FAM](Function &F) {
1984  return &FAM.getResult<BranchProbabilityAnalysis>(F);
1985  };
1986  auto LookupBFI = [&FAM](Function &F) {
1987  return &FAM.getResult<BlockFrequencyAnalysis>(F);
1988  };
1989 
1990  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1991 
1992  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1993  LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
1994  return PreservedAnalyses::all();
1995 
1996  return PreservedAnalyses::none();
1997 }
1998 
1999 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
2000  if (skipModule(M))
2001  return false;
2002 
2003  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
2004  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2005  };
2006  auto LookupBPI = [this](Function &F) {
2007  return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
2008  };
2009  auto LookupBFI = [this](Function &F) {
2010  return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2011  };
2012 
2013  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2014  return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
2015  LookupBFI, PSI, IsCS);
2016 }
2017 
2018 static std::string getSimpleNodeName(const BasicBlock *Node) {
2019  if (!Node->getName().empty())
2020  return std::string(Node->getName());
2021 
2022  std::string SimpleNodeName;
2023  raw_string_ostream OS(SimpleNodeName);
2024  Node->printAsOperand(OS, false);
2025  return OS.str();
2026 }
2027 
2029  ArrayRef<uint64_t> EdgeCounts,
2030  uint64_t MaxCount) {
2031  MDBuilder MDB(M->getContext());
2032  assert(MaxCount > 0 && "Bad max count");
2033  uint64_t Scale = calculateCountScale(MaxCount);
2034  SmallVector<unsigned, 4> Weights;
2035  for (const auto &ECI : EdgeCounts)
2036  Weights.push_back(scaleBranchCount(ECI, Scale));
2037 
2038  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2039  : Weights) {
2040  dbgs() << W << " ";
2041  } dbgs() << "\n";);
2042 
2043  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2044  if (EmitBranchProbability) {
2045  std::string BrCondStr = getBranchCondString(TI);
2046  if (BrCondStr.empty())
2047  return;
2048 
2049  uint64_t WSum =
2050  std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2051  [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2052  uint64_t TotalCount =
2053  std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2054  [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2055  Scale = calculateCountScale(WSum);
2056  BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2057  scaleBranchCount(WSum, Scale));
2058  std::string BranchProbStr;
2059  raw_string_ostream OS(BranchProbStr);
2060  OS << BP;
2061  OS << " (total count : " << TotalCount << ")";
2062  OS.flush();
2063  Function *F = TI->getParent()->getParent();
2065  ORE.emit([&]() {
2066  return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2067  << BrCondStr << " is true with probability : " << BranchProbStr;
2068  });
2069  }
2070 }
2071 
2072 namespace llvm {
2073 
2074 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2075  MDBuilder MDB(M->getContext());
2076  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2077  MDB.createIrrLoopHeaderWeight(Count));
2078 }
2079 
2080 template <> struct GraphTraits<PGOUseFunc *> {
2081  using NodeRef = const BasicBlock *;
2084 
2085  static NodeRef getEntryNode(const PGOUseFunc *G) {
2086  return &G->getFunc().front();
2087  }
2088 
2090  return succ_begin(N);
2091  }
2092 
2093  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2094 
2095  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2096  return nodes_iterator(G->getFunc().begin());
2097  }
2098 
2099  static nodes_iterator nodes_end(const PGOUseFunc *G) {
2100  return nodes_iterator(G->getFunc().end());
2101  }
2102 };
2103 
2104 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2105  explicit DOTGraphTraits(bool isSimple = false)
2107 
2108  static std::string getGraphName(const PGOUseFunc *G) {
2109  return std::string(G->getFunc().getName());
2110  }
2111 
2112  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2113  std::string Result;
2114  raw_string_ostream OS(Result);
2115 
2116  OS << getSimpleNodeName(Node) << ":\\l";
2117  UseBBInfo *BI = Graph->findBBInfo(Node);
2118  OS << "Count : ";
2119  if (BI && BI->CountValid)
2120  OS << BI->CountValue << "\\l";
2121  else
2122  OS << "Unknown\\l";
2123 
2124  if (!PGOInstrSelect)
2125  return Result;
2126 
2127  for (const Instruction &I : *Node) {
2128  if (!isa<SelectInst>(&I))
2129  continue;
2130  // Display scaled counts for SELECT instruction:
2131  OS << "SELECT : { T = ";
2132  uint64_t TC, FC;
2133  bool HasProf = I.extractProfMetadata(TC, FC);
2134  if (!HasProf)
2135  OS << "Unknown, F = Unknown }\\l";
2136  else
2137  OS << TC << ", F = " << FC << " }\\l";
2138  }
2139  return Result;
2140  }
2141 };
2142 
2143 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
DoComdatRenaming
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
llvm::SuccIterator
Definition: CFG.h:139
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:487
Instrumentation.h
llvm::createPGOFuncNameMetadata
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1059
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::PGOInstrumentationUse::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1976
llvm
Definition: AllocatorList.h:23
fixFuncEntryCount
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
Definition: PGOInstrumentation.cpp:1672
llvm::DOTGraphTraits< PGOUseFunc * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: PGOInstrumentation.cpp:2105
llvm::APFloatBase::cmpGreaterThan
@ cmpGreaterThan
Definition: APFloat.h:183
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
use
pgo instr use
Definition: PGOInstrumentation.cpp:492
c2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps c2(%esp) ... xorps %xmm0
Comdat.h
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::Comdat::getName
StringRef getName() const
Definition: Comdat.cpp:26
llvm::JamCRC::update
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
ProfileCount
Function::ProfileCount ProfileCount
Definition: PGOInstrumentation.cpp:124
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1295
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:256
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::InstrProfError::message
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:218
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::CFGMST::getBBInfo
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
llvm::Function
Definition: Function.h:61
ProfileSummary.h
StringRef.h
Pass.h
llvm::BlockFrequencyInfoWrapperPass
Legacy analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:138
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::DOTGraphTraits< PGOUseFunc * >::getNodeLabel
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
Definition: PGOInstrumentation.cpp:2112
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:130
llvm::ProfileSummaryInfo::getOrCompHotCountThreshold
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:358
PGOTestProfileRemappingFile
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
llvm::getPGOFuncName
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:262
sumEdgeCount
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
Definition: PGOInstrumentation.cpp:1043
ErrorHandling.h
llvm::InstrProfError::get
instrprof_error get() const
Definition: InstrProf.h:323
llvm::IRBuilder<>
PGOVerifyBFI
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
MapVector.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2207
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
llvm::GlobalAlias
Definition: GlobalAlias.h:27
llvm::createPGOInstrumentationUseLegacyPass
ModulePass * createPGOInstrumentationUseLegacyPass(StringRef Filename=StringRef(""), bool IsCS=false)
Definition: PGOInstrumentation.cpp:495
Error.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::createPGOFuncNameVar
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:341
PGOVerifyBFIRatio
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-big -- only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
llvm::coverage::accessors::getFuncHash
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
Definition: CoverageMapping.h:772
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
APInt.h
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
PGOInstrSelect
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:103
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::GraphTraits< PGOUseFunc * >::nodes_end
static nodes_iterator nodes_end(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2099
EHPersonalities.h
llvm::CFGMST::AllEdges
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
llvm::Optional
Definition: APInt.h:34
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
llvm::JamCRC
Definition: CRC.h:45
MaxNumMemOPAnnotations
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
llvm::GraphTraits< PGOUseFunc * >::child_end
static ChildIteratorType child_end(const NodeRef N)
Definition: PGOInstrumentation.cpp:2093
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:374
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
STLExtras.h
llvm::createProfileFileNameVar
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1160
llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:3700
CFGMST.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::PGOVCT_Text
@ PGOVCT_Text
Definition: BlockFrequencyInfo.h:33
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::InstrProfRecord
Profiling information for a single function.
Definition: InstrProf.h:691
llvm::OperandBundleDef
OperandBundleDefT< Value * > OperandBundleDef
Definition: InstrTypes.h:1143
F
#define F(x, y, z)
Definition: MD5.cpp:56
InstrProfData.inc
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1100
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
MaxNumAnnotations
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of annotations for a single indirect " "call callsite"))
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
PGOWarnMissing
static cl::opt< bool > PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about missing profile data for " "functions."))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
Instruction.h
PGOInstrumentEntry
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
CommandLine.h
llvm::APFloat::getZero
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:885
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:736
llvm::PGOVCT_Graph
@ PGOVCT_Graph
Definition: BlockFrequencyInfo.h:33
llvm::BranchProbabilityAnalysis
Analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:420
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::ProfileSummaryInfo::isColdCount
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
Definition: ProfileSummaryInfo.cpp:334
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::calculateCountScale
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
Definition: Instrumentation.h:180
PGOViewRawCounts
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Constants.h
llvm::ValueProfileCollector::CandidateInfo::AnnotatedInst
Instruction * AnnotatedInst
Definition: ValueProfileCollector.h:62
llvm::BranchProbabilityInfoWrapperPass
Legacy analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:446
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:46
getBranchCondString
static std::string getBranchCondString(Instruction *TI)
Definition: PGOInstrumentation.cpp:297
profile
pgo instr Read PGO instrumentation profile
Definition: PGOInstrumentation.cpp:493
llvm::PGOInstrumentationGen::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1651
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::setIrrLoopHeaderMetadata
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
Definition: PGOInstrumentation.cpp:2074
Twine.h
InstrTypes.h
llvm::Type::print
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Definition: AsmWriter.cpp:4588
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MDTuple
Tuple of metadata.
Definition: Metadata.h:1139
llvm::canRenameComdatFunc
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1115
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::createPGOInstrumentationGenCreateVarLegacyPass
ModulePass * createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName=StringRef(""))
llvm::InstrProfRecord::getNumValueSites
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:882
CRC.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
InstrProf.h
MDBuilder.h
INITIALIZE_PASS
INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, "pgo-instr-gen-create-var", "Create PGO instrumentation version variable for CSPGO.", false, false) ModulePass *llvm
Definition: PGOInstrumentation.cpp:502
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:183
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:401
PGOFixEntryCount
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:277
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:748
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::CFGMST
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
PGOViewCounts
cl::opt< PGOViewCountsType > PGOViewCounts
llvm::Comdat
Definition: Comdat.h:31
llvm::scaleBranchCount
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
Definition: Instrumentation.h:190
llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry &)
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::GraphTraits< PGOUseFunc * >::getEntryNode
static NodeRef getEntryNode(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2085
llvm::ErrorInfoBase
Base class for error info classes.
Definition: Error.h:48
llvm::ErrorInfoBase::message
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:56
Type.h
BranchProbability.h
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:161
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3086
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
instrumentation
pgo instr PGO instrumentation
Definition: PGOInstrumentation.cpp:479
getSimpleNodeName
static std::string getSimpleNodeName(const BasicBlock *Node)
Definition: PGOInstrumentation.cpp:2018
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:117
EmitBranchProbability
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:176
instrumentOneFunc
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
Definition: PGOInstrumentation.cpp:892
llvm::MDBuilder::createString
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
PGOVerifyHotBFI
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::pointer_iterator
Definition: iterator.h:320
llvm::HighlightColor::Remark
@ Remark
llvm::IndexedInstrProfReader::getInstrProfRecord
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Definition: InstrProfReader.cpp:879
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::APFloat
Definition: APFloat.h:701
InstrProfReader.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:696
BranchProbabilityInfo.h
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1178
llvm::ProfileSummaryInfo::isHotCount
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Definition: ProfileSummaryInfo.cpp:330
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
llvm::colorEHFunclets
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
Definition: EHPersonalities.cpp:81
ProfileSummaryInfo.h
PGOOldCFGHashing
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
setBranchWeights
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
Definition: SimplifyCFG.cpp:819
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::IndexedInstrProfReader
Reader for the indexed binary instrprof format.
Definition: InstrProfReader.h:451
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
llvm::InstrProfError
Definition: InstrProf.h:309
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
PGOInstrumentation.h
llvm::DenseMap
Definition: DenseMap.h:714
iterator.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::SplitIndirectBrCriticalEdges
bool SplitIndirectBrCriticalEdges(Function &F, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Definition: BreakCriticalEdges.cpp:351
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:188
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass
ArrayRef.h
llvm::createPGOInstrumentationGenLegacyPass
ModulePass * createPGOInstrumentationGenLegacyPass(bool IsCS=false)
Definition: PGOInstrumentation.cpp:481
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::DiagnosticInfoPGOProfile
Diagnostic information for the PGO profiler.
Definition: DiagnosticInfo.h:324
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1563
llvm::elfabi::ELFSymbolType::Func
@ Func
llvm::PGOVCT_None
@ PGOVCT_None
Definition: BlockFrequencyInfo.h:33
llvm::instrprof_error::unknown_function
@ unknown_function
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::WriteGraph
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:307
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1715
iterator_range.h
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::MDTuple::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1165
annotateAllFunctions
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
Definition: PGOInstrumentation.cpp:1793
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::InstrProfValueKind
InstrProfValueKind
Definition: InstrProf.h:238
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::IndexedInstrProfReader::create
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Definition: InstrProfReader.cpp:93
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOInstrumentation.cpp:127
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
PGOVerifyBFICutoff
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " "profile count value is below."))
Triple.h
llvm::GetSuccessorNumber
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
CFG.h
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:206
llvm::ProfileSummaryInfo::refresh
void refresh()
If no summary is present, attempt to refresh.
Definition: ProfileSummaryInfo.cpp:94
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::DOTGraphTraits< PGOUseFunc * >::getGraphName
static std::string getGraphName(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2108
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::NamedInstrProfRecord::setCSFlagInHash
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:863
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:166
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
PGOInstrMemOP
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::PGOInstrumentationUse::PGOInstrumentationUse
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false)
Definition: PGOInstrumentation.cpp:1965
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:204
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:671
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::initializePGOInstrumentationUseLegacyPassPass
void initializePGOInstrumentationUseLegacyPassPass(PassRegistry &)
BlockFrequencyInfo.h
llvm::ProfileSummary::PSK_CSInstr
@ PSK_CSInstr
Definition: ProfileSummary.h:47
llvm::GlobalValue::AvailableExternallyLinkage
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:419
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:770
llvm::instrprof_error::hash_mismatch
@ hash_mismatch
llvm::GraphTraits< PGOUseFunc * >::nodes_begin
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2095
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::PGOInstrumentationGenCreateVar::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1629
llvm::createIRLevelProfileFlagVar
void createIRLevelProfileFlagVar(Module &M, bool IsCS, bool InstrEntryBBEnabled)
Definition: InstrProf.cpp:1139
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:954
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
annotateFunctionWithHashMismatch
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
Definition: PGOInstrumentation.cpp:1249
Attributes.h
llvm::ValueProfileCollector
Utility analysis that determines what values are worth profiling.
Definition: ValueProfileCollector.h:57
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
ValueProfKindDescr
static const char * ValueProfKindDescr[]
Definition: PGOInstrumentation.cpp:328
GraphWriter.h
std
Definition: BitVector.h:838
ValueProfileCollector.h
llvm::JamCRC::getCRC
uint32_t getCRC() const
Definition: CRC.h:52
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:485
canRenameComdat
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:713
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
ViewBlockFreqFuncName
cl::opt< std::string > ViewBlockFreqFuncName
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::ProfileSummaryInfo::getOrCompColdCountThreshold
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:362
GlobalVariable.h
Casting.h
DiagnosticInfo.h
Function.h
DOTGraphTraits.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
llvm::ValueProfileCollector::CandidateInfo
Definition: ValueProfileCollector.h:59
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:644
getInstrBB
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
Definition: GCOVProfiling.cpp:755
llvm::cl::value_desc
Definition: CommandLine.h:421
llvm::CFGMST::BBInfos
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
llvm::CFGMST::addEdge
Edge & addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:524
GlobalAlias.h
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::Comdat::setSelectionKind
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:45
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:684
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
Instructions.h
llvm::support::endian::write64le
void write64le(void *P, uint64_t V)
Definition: Endian.h:417
SmallVector.h
llvm::GraphTraits< PGOUseFunc * >::child_begin
static ChildIteratorType child_begin(const NodeRef N)
Definition: PGOInstrumentation.cpp:2089
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:207
Dominators.h
DisableValueProfiling
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
llvm::ProfileSummary::PSK_Instr
@ PSK_Instr
Definition: ProfileSummary.h:47
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:799
PGOTestProfileFile
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
NoPGOWarnMismatch
static cl::opt< bool > NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on " "warnings about profile cfg mismatch."))
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:2028
llvm::ProfileSummaryInfo::getHotCountThreshold
uint64_t getHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.h:167
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:152
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28
llvm::instrprof_error::malformed
@ malformed
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
verifyFuncBFI
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
Definition: PGOInstrumentation.cpp:1719
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:945
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:198
populateEHOperandBundle
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
Definition: PGOInstrumentation.cpp:865
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
isIndirectBrTarget
static bool isIndirectBrTarget(BasicBlock *BB)
Definition: PGOInstrumentation.cpp:1467
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::GraphTraits
Definition: GraphTraits.h:35
LLVMContext.h
llvm::const_succ_iterator
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:244
llvm::IndexedInstrProfReader::getMaximumFunctionCount
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Definition: InstrProfReader.h:512
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:282
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
llvm::Comdat::getSelectionKind
SelectionKind getSelectionKind() const
Definition: Comdat.h:44
raw_ostream.h
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::LLVMContext::OB_funclet
@ OB_funclet
Definition: LLVMContext.h:91
llvm::initializePGOInstrumentationGenLegacyPassPass
void initializePGOInstrumentationGenLegacyPassPass(PassRegistry &)
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:182
BasicBlockUtils.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1789
collectComdatMembers
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:1588
Value.h
llvm::raw_string_ostream::str
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:632
llvm::CFGMST::dumpEdges
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
InitializePasses.h
llvm::handleAllErrors
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:929
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::CFGMST::findBBInfo
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:421
llvm::InstrProfRecord::Counts
std::vector< uint64_t > Counts
Definition: InstrProf.h:692
gen
pgo instr gen
Definition: PGOInstrumentation.cpp:478
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3084
llvm::ValueProfileCollector::get
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
Definition: ValueProfileCollector.cpp:76
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
NoPGOWarnMismatchComdat
static cl::opt< bool > NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "functions."))
llvm::isFuncletEHPersonality
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
Definition: EHPersonalities.h:65
llvm::MDBuilder::createIrrLoopHeaderWeight
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:301
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
InstrumentAllFunctions
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
Definition: PGOInstrumentation.cpp:1604