LLVM  14.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 // Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
51 #include "CFGMST.h"
52 #include "ValueProfileCollector.h"
53 #include "llvm/ADT/APInt.h"
54 #include "llvm/ADT/ArrayRef.h"
55 #include "llvm/ADT/MapVector.h"
56 #include "llvm/ADT/STLExtras.h"
57 #include "llvm/ADT/SmallVector.h"
58 #include "llvm/ADT/Statistic.h"
59 #include "llvm/ADT/StringRef.h"
60 #include "llvm/ADT/Triple.h"
61 #include "llvm/ADT/Twine.h"
62 #include "llvm/ADT/iterator.h"
66 #include "llvm/Analysis/CFG.h"
68 #include "llvm/Analysis/LoopInfo.h"
71 #include "llvm/IR/Attributes.h"
72 #include "llvm/IR/BasicBlock.h"
73 #include "llvm/IR/CFG.h"
74 #include "llvm/IR/Comdat.h"
75 #include "llvm/IR/Constant.h"
76 #include "llvm/IR/Constants.h"
77 #include "llvm/IR/DiagnosticInfo.h"
78 #include "llvm/IR/Dominators.h"
79 #include "llvm/IR/Function.h"
80 #include "llvm/IR/GlobalAlias.h"
81 #include "llvm/IR/GlobalValue.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/IRBuilder.h"
84 #include "llvm/IR/InstVisitor.h"
85 #include "llvm/IR/InstrTypes.h"
86 #include "llvm/IR/Instruction.h"
87 #include "llvm/IR/Instructions.h"
88 #include "llvm/IR/IntrinsicInst.h"
89 #include "llvm/IR/Intrinsics.h"
90 #include "llvm/IR/LLVMContext.h"
91 #include "llvm/IR/MDBuilder.h"
92 #include "llvm/IR/Module.h"
93 #include "llvm/IR/PassManager.h"
94 #include "llvm/IR/ProfileSummary.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/Value.h"
97 #include "llvm/InitializePasses.h"
98 #include "llvm/Pass.h"
102 #include "llvm/Support/CRC.h"
103 #include "llvm/Support/Casting.h"
106 #include "llvm/Support/Debug.h"
107 #include "llvm/Support/Error.h"
114 #include <algorithm>
115 #include <cassert>
116 #include <cstdint>
117 #include <memory>
118 #include <numeric>
119 #include <string>
120 #include <unordered_map>
121 #include <utility>
122 #include <vector>
123 
124 using namespace llvm;
127 
128 #define DEBUG_TYPE "pgo-instrumentation"
129 
130 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
131 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
132 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
133 STATISTIC(NumOfPGOEdge, "Number of edges.");
134 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
135 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
136 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
137 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
138 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
139 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
140 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
141 STATISTIC(NumOfCSPGOSelectInsts,
142  "Number of select instruction instrumented in CSPGO.");
143 STATISTIC(NumOfCSPGOMemIntrinsics,
144  "Number of mem intrinsics instrumented in CSPGO.");
145 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
146 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
147 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
148 STATISTIC(NumOfCSPGOFunc,
149  "Number of functions having valid profile counts in CSPGO.");
150 STATISTIC(NumOfCSPGOMismatch,
151  "Number of functions having mismatch profile in CSPGO.");
152 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
153 
154 // Command line option to specify the file to read profile from. This is
155 // mainly used for testing.
157  PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
158  cl::value_desc("filename"),
159  cl::desc("Specify the path of profile data file. This is"
160  "mainly for test purpose."));
162  "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
163  cl::value_desc("filename"),
164  cl::desc("Specify the path of profile remapping file. This is mainly for "
165  "test purpose."));
166 
167 // Command line option to disable value profiling. The default is false:
168 // i.e. value profiling is enabled by default. This is for debug purpose.
169 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
170  cl::Hidden,
171  cl::desc("Disable Value Profiling"));
172 
173 // Command line option to set the maximum number of VP annotations to write to
174 // the metadata for a single indirect call callsite.
176  "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
177  cl::desc("Max number of annotations for a single indirect "
178  "call callsite"));
179 
180 // Command line option to set the maximum number of value annotations
181 // to write to the metadata for a single memop intrinsic.
183  "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
184  cl::desc("Max number of preicise value annotations for a single memop"
185  "intrinsic"));
186 
187 // Command line option to control appending FunctionHash to the name of a COMDAT
188 // function. This is to avoid the hash mismatch caused by the preinliner.
190  "do-comdat-renaming", cl::init(false), cl::Hidden,
191  cl::desc("Append function hash to the name of COMDAT function to avoid "
192  "function hash mismatch due to the preinliner"));
193 
194 // Command line option to enable/disable the warning about missing profile
195 // information.
196 static cl::opt<bool>
197  PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
198  cl::desc("Use this option to turn on/off "
199  "warnings about missing profile data for "
200  "functions."));
201 
202 namespace llvm {
203 // Command line option to enable/disable the warning about a hash mismatch in
204 // the profile data.
206  NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
207  cl::desc("Use this option to turn off/on "
208  "warnings about profile cfg mismatch."));
209 } // namespace llvm
210 
211 // Command line option to enable/disable the warning about a hash mismatch in
212 // the profile data for Comdat functions, which often turns out to be false
213 // positive due to the pre-instrumentation inline.
214 static cl::opt<bool>
215  NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
216  cl::Hidden,
217  cl::desc("The option is used to turn on/off "
218  "warnings about hash mismatch for comdat "
219  "functions."));
220 
221 // Command line option to enable/disable select instruction instrumentation.
222 static cl::opt<bool>
223  PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
224  cl::desc("Use this option to turn on/off SELECT "
225  "instruction instrumentation. "));
226 
227 // Command line option to turn on CFG dot or text dump of raw profile counts
229  "pgo-view-raw-counts", cl::Hidden,
230  cl::desc("A boolean option to show CFG dag or text "
231  "with raw profile counts from "
232  "profile data. See also option "
233  "-pgo-view-counts. To limit graph "
234  "display to only one function, use "
235  "filtering option -view-bfi-func-name."),
236  cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
237  clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
238  clEnumValN(PGOVCT_Text, "text", "show in text.")));
239 
240 // Command line option to enable/disable memop intrinsic call.size profiling.
241 static cl::opt<bool>
242  PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
243  cl::desc("Use this option to turn on/off "
244  "memory intrinsic size profiling."));
245 
246 // Emit branch probability as optimization remarks.
247 static cl::opt<bool>
248  EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
249  cl::desc("When this option is on, the annotated "
250  "branch probability will be emitted as "
251  "optimization remarks: -{Rpass|"
252  "pass-remarks}=pgo-instrumentation"));
253 
255  "pgo-instrument-entry", cl::init(false), cl::Hidden,
256  cl::desc("Force to instrument function entry basicblock."));
257 
258 static cl::opt<bool>
259  PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
260  cl::desc("Fix function entry count in profile use."));
261 
263  "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
264  cl::desc("Print out the non-match BFI count if a hot raw profile count "
265  "becomes non-hot, or a cold raw profile count becomes hot. "
266  "The print is enabled under -Rpass-analysis=pgo, or "
267  "internal option -pass-remakrs-analysis=pgo."));
268 
270  "pgo-verify-bfi", cl::init(false), cl::Hidden,
271  cl::desc("Print out mismatched BFI counts after setting profile metadata "
272  "The print is enabled under -Rpass-analysis=pgo, or "
273  "internal option -pass-remakrs-analysis=pgo."));
274 
276  "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
277  cl::desc("Set the threshold for pgo-verify-big -- only print out "
278  "mismatched BFI if the difference percentage is greater than "
279  "this value (in percentage)."));
280 
282  "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
283  cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
284  "profile count value is below."));
285 
286 namespace llvm {
287 // Command line option to turn on CFG dot dump after profile annotation.
288 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
290 
291 // Command line option to specify the name of the function for CFG dump
292 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
294 } // namespace llvm
295 
296 static cl::opt<bool>
297  PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
298  cl::desc("Use the old CFG function hashing"));
299 
300 // Return a string describing the branch condition that can be
301 // used in static branch probability heuristics:
302 static std::string getBranchCondString(Instruction *TI) {
303  BranchInst *BI = dyn_cast<BranchInst>(TI);
304  if (!BI || !BI->isConditional())
305  return std::string();
306 
307  Value *Cond = BI->getCondition();
308  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
309  if (!CI)
310  return std::string();
311 
312  std::string result;
314  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
315  CI->getOperand(0)->getType()->print(OS, true);
316 
317  Value *RHS = CI->getOperand(1);
318  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
319  if (CV) {
320  if (CV->isZero())
321  OS << "_Zero";
322  else if (CV->isOne())
323  OS << "_One";
324  else if (CV->isMinusOne())
325  OS << "_MinusOne";
326  else
327  OS << "_Const";
328  }
329  OS.flush();
330  return result;
331 }
332 
333 static const char *ValueProfKindDescr[] = {
334 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
336 };
337 
338 namespace {
339 
340 /// The select instruction visitor plays three roles specified
341 /// by the mode. In \c VM_counting mode, it simply counts the number of
342 /// select instructions. In \c VM_instrument mode, it inserts code to count
343 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
344 /// it reads the profile data and annotate the select instruction with metadata.
345 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
346 class PGOUseFunc;
347 
348 /// Instruction Visitor class to visit select instructions.
349 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
350  Function &F;
351  unsigned NSIs = 0; // Number of select instructions instrumented.
352  VisitMode Mode = VM_counting; // Visiting mode.
353  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
354  unsigned TotalNumCtrs = 0; // Total number of counters
355  GlobalVariable *FuncNameVar = nullptr;
356  uint64_t FuncHash = 0;
357  PGOUseFunc *UseFunc = nullptr;
358 
359  SelectInstVisitor(Function &Func) : F(Func) {}
360 
361  void countSelects(Function &Func) {
362  NSIs = 0;
363  Mode = VM_counting;
364  visit(Func);
365  }
366 
367  // Visit the IR stream and instrument all select instructions. \p
368  // Ind is a pointer to the counter index variable; \p TotalNC
369  // is the total number of counters; \p FNV is the pointer to the
370  // PGO function name var; \p FHash is the function hash.
371  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
372  GlobalVariable *FNV, uint64_t FHash) {
373  Mode = VM_instrument;
374  CurCtrIdx = Ind;
375  TotalNumCtrs = TotalNC;
376  FuncHash = FHash;
377  FuncNameVar = FNV;
378  visit(Func);
379  }
380 
381  // Visit the IR stream and annotate all select instructions.
382  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
383  Mode = VM_annotate;
384  UseFunc = UF;
385  CurCtrIdx = Ind;
386  visit(Func);
387  }
388 
389  void instrumentOneSelectInst(SelectInst &SI);
390  void annotateOneSelectInst(SelectInst &SI);
391 
392  // Visit \p SI instruction and perform tasks according to visit mode.
393  void visitSelectInst(SelectInst &SI);
394 
395  // Return the number of select instructions. This needs be called after
396  // countSelects().
397  unsigned getNumOfSelectInsts() const { return NSIs; }
398 };
399 
400 
401 class PGOInstrumentationGenLegacyPass : public ModulePass {
402 public:
403  static char ID;
404 
405  PGOInstrumentationGenLegacyPass(bool IsCS = false)
406  : ModulePass(ID), IsCS(IsCS) {
409  }
410 
411  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
412 
413 private:
414  // Is this is context-sensitive instrumentation.
415  bool IsCS;
416  bool runOnModule(Module &M) override;
417 
418  void getAnalysisUsage(AnalysisUsage &AU) const override {
421  }
422 };
423 
424 class PGOInstrumentationUseLegacyPass : public ModulePass {
425 public:
426  static char ID;
427 
428  // Provide the profile filename as the parameter.
429  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
430  : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
431  if (!PGOTestProfileFile.empty())
432  ProfileFileName = PGOTestProfileFile;
435  }
436 
437  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
438 
439 private:
440  std::string ProfileFileName;
441  // Is this is context-sensitive instrumentation use.
442  bool IsCS;
443 
444  bool runOnModule(Module &M) override;
445 
446  void getAnalysisUsage(AnalysisUsage &AU) const override {
450  }
451 };
452 
453 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
454 public:
455  static char ID;
456  StringRef getPassName() const override {
457  return "PGOInstrumentationGenCreateVarPass";
458  }
459  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
460  : ModulePass(ID), InstrProfileOutput(CSInstrName) {
463  }
464 
465 private:
466  bool runOnModule(Module &M) override {
467  createProfileFileNameVar(M, InstrProfileOutput);
468  // The variable in a comdat may be discarded by LTO. Ensure the
469  // declaration will be retained.
471  M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
472  return false;
473  }
474  std::string InstrProfileOutput;
475 };
476 
477 } // end anonymous namespace
478 
480 
481 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
482  "PGO instrumentation.", false, false)
486 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
488 
490  return new PGOInstrumentationGenLegacyPass(IsCS);
491 }
492 
494 
495 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
496  "Read PGO instrumentation profile.", false, false)
500 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
502 
504  bool IsCS) {
505  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
506 }
507 
509 
510 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
511  "pgo-instr-gen-create-var",
512  "Create PGO instrumentation version variable for CSPGO.", false,
513  false)
514 
515 ModulePass *
517  return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
518 }
519 
520 namespace {
521 
522 /// An MST based instrumentation for PGO
523 ///
524 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
525 /// in the function level.
526 struct PGOEdge {
527  // This class implements the CFG edges. Note the CFG can be a multi-graph.
528  // So there might be multiple edges with same SrcBB and DestBB.
529  const BasicBlock *SrcBB;
530  const BasicBlock *DestBB;
531  uint64_t Weight;
532  bool InMST = false;
533  bool Removed = false;
534  bool IsCritical = false;
535 
536  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
537  : SrcBB(Src), DestBB(Dest), Weight(W) {}
538 
539  // Return the information string of an edge.
540  std::string infoString() const {
541  return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
542  (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
543  }
544 };
545 
546 // This class stores the auxiliary information for each BB.
547 struct BBInfo {
548  BBInfo *Group;
549  uint32_t Index;
550  uint32_t Rank = 0;
551 
552  BBInfo(unsigned IX) : Group(this), Index(IX) {}
553 
554  // Return the information string of this object.
555  std::string infoString() const {
556  return (Twine("Index=") + Twine(Index)).str();
557  }
558 
559  // Empty function -- only applicable to UseBBInfo.
560  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
561 
562  // Empty function -- only applicable to UseBBInfo.
563  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
564 };
565 
566 // This class implements the CFG edges. Note the CFG can be a multi-graph.
567 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
568 private:
569  Function &F;
570 
571  // Is this is context-sensitive instrumentation.
572  bool IsCS;
573 
574  // A map that stores the Comdat group in function F.
575  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
576 
578 
579  void computeCFGHash();
580  void renameComdatFunction();
581 
582 public:
583  std::vector<std::vector<VPCandidateInfo>> ValueSites;
584  SelectInstVisitor SIVisitor;
585  std::string FuncName;
586  GlobalVariable *FuncNameVar;
587 
588  // CFG hash value for this function.
589  uint64_t FunctionHash = 0;
590 
591  // The Minimum Spanning Tree of function CFG.
593 
594  // Collect all the BBs that will be instrumented, and store them in
595  // InstrumentBBs.
596  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
597 
598  // Give an edge, find the BB that will be instrumented.
599  // Return nullptr if there is no BB to be instrumented.
600  BasicBlock *getInstrBB(Edge *E);
601 
602  // Return the auxiliary BB information.
603  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
604 
605  // Return the auxiliary BB information if available.
606  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
607 
608  // Dump edges and BB information.
609  void dumpInfo(std::string Str = "") const {
610  MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
611  Twine(FunctionHash) + "\t" + Str);
612  }
613 
614  FuncPGOInstrumentation(
615  Function &Func, TargetLibraryInfo &TLI,
616  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
617  bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
618  BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
619  bool InstrumentFuncEntry = true)
620  : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
621  ValueSites(IPVK_Last + 1), SIVisitor(Func),
622  MST(F, InstrumentFuncEntry, BPI, BFI) {
623  // This should be done before CFG hash computation.
624  SIVisitor.countSelects(Func);
625  ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
626  if (!IsCS) {
627  NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
628  NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
629  NumOfPGOBB += MST.BBInfos.size();
630  ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
631  } else {
632  NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
633  NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
634  NumOfCSPGOBB += MST.BBInfos.size();
635  }
636 
637  FuncName = getPGOFuncName(F);
638  computeCFGHash();
639  if (!ComdatMembers.empty())
640  renameComdatFunction();
641  LLVM_DEBUG(dumpInfo("after CFGMST"));
642 
643  for (auto &E : MST.AllEdges) {
644  if (E->Removed)
645  continue;
646  IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
647  if (!E->InMST)
648  IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
649  }
650 
651  if (CreateGlobalVar)
652  FuncNameVar = createPGOFuncNameVar(F, FuncName);
653  }
654 };
655 
656 } // end anonymous namespace
657 
658 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
659 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
660 // of selects, indirect calls, mem ops and edges.
661 template <class Edge, class BBInfo>
662 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
663  std::vector<uint8_t> Indexes;
664  JamCRC JC;
665  for (auto &BB : F) {
666  const Instruction *TI = BB.getTerminator();
667  for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
668  BasicBlock *Succ = TI->getSuccessor(I);
669  auto BI = findBBInfo(Succ);
670  if (BI == nullptr)
671  continue;
672  uint32_t Index = BI->Index;
673  for (int J = 0; J < 4; J++)
674  Indexes.push_back((uint8_t)(Index >> (J * 8)));
675  }
676  }
677  JC.update(Indexes);
678 
679  JamCRC JCH;
680  if (PGOOldCFGHashing) {
681  // Hash format for context sensitive profile. Reserve 4 bits for other
682  // information.
683  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
684  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
685  //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
686  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
687  } else {
688  // The higher 32 bits.
689  auto updateJCH = [&JCH](uint64_t Num) {
690  uint8_t Data[8];
692  JCH.update(Data);
693  };
694  updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
695  updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
696  updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
697  updateJCH((uint64_t)MST.AllEdges.size());
698 
699  // Hash format for context sensitive profile. Reserve 4 bits for other
700  // information.
701  FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
702  }
703 
704  // Reserve bit 60-63 for other information purpose.
705  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
706  if (IsCS)
708  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
709  << " CRC = " << JC.getCRC()
710  << ", Selects = " << SIVisitor.getNumOfSelectInsts()
711  << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
712  << ValueSites[IPVK_IndirectCallTarget].size());
713  if (!PGOOldCFGHashing) {
714  LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
715  << ", High32 CRC = " << JCH.getCRC());
716  }
717  LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
718 }
719 
720 // Check if we can safely rename this Comdat function.
721 static bool canRenameComdat(
722  Function &F,
723  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
724  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
725  return false;
726 
727  // FIXME: Current only handle those Comdat groups that only containing one
728  // function.
729  // (1) For a Comdat group containing multiple functions, we need to have a
730  // unique postfix based on the hashes for each function. There is a
731  // non-trivial code refactoring to do this efficiently.
732  // (2) Variables can not be renamed, so we can not rename Comdat function in a
733  // group including global vars.
734  Comdat *C = F.getComdat();
735  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
736  assert(!isa<GlobalAlias>(CM.second));
737  Function *FM = dyn_cast<Function>(CM.second);
738  if (FM != &F)
739  return false;
740  }
741  return true;
742 }
743 
744 // Append the CFGHash to the Comdat function name.
745 template <class Edge, class BBInfo>
746 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
747  if (!canRenameComdat(F, ComdatMembers))
748  return;
749  std::string OrigName = F.getName().str();
750  std::string NewFuncName =
751  Twine(F.getName() + "." + Twine(FunctionHash)).str();
752  F.setName(Twine(NewFuncName));
754  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
755  Comdat *NewComdat;
756  Module *M = F.getParent();
757  // For AvailableExternallyLinkage functions, change the linkage to
758  // LinkOnceODR and put them into comdat. This is because after renaming, there
759  // is no backup external copy available for the function.
760  if (!F.hasComdat()) {
762  NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
764  F.setComdat(NewComdat);
765  return;
766  }
767 
768  // This function belongs to a single function Comdat group.
769  Comdat *OrigComdat = F.getComdat();
770  std::string NewComdatName =
771  Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
772  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
773  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
774 
775  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
776  // Must be a function.
777  cast<Function>(CM.second)->setComdat(NewComdat);
778  }
779 }
780 
781 // Collect all the BBs that will be instruments and return them in
782 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
783 template <class Edge, class BBInfo>
784 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
785  std::vector<BasicBlock *> &InstrumentBBs) {
786  // Use a worklist as we will update the vector during the iteration.
787  std::vector<Edge *> EdgeList;
788  EdgeList.reserve(MST.AllEdges.size());
789  for (auto &E : MST.AllEdges)
790  EdgeList.push_back(E.get());
791 
792  for (auto &E : EdgeList) {
793  BasicBlock *InstrBB = getInstrBB(E);
794  if (InstrBB)
795  InstrumentBBs.push_back(InstrBB);
796  }
797 
798  // Set up InEdges/OutEdges for all BBs.
799  for (auto &E : MST.AllEdges) {
800  if (E->Removed)
801  continue;
802  const BasicBlock *SrcBB = E->SrcBB;
803  const BasicBlock *DestBB = E->DestBB;
804  BBInfo &SrcInfo = getBBInfo(SrcBB);
805  BBInfo &DestInfo = getBBInfo(DestBB);
806  SrcInfo.addOutEdge(E.get());
807  DestInfo.addInEdge(E.get());
808  }
809 }
810 
811 // Given a CFG E to be instrumented, find which BB to place the instrumented
812 // code. The function will split the critical edge if necessary.
813 template <class Edge, class BBInfo>
815  if (E->InMST || E->Removed)
816  return nullptr;
817 
818  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
819  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
820  // For a fake edge, instrument the real BB.
821  if (SrcBB == nullptr)
822  return DestBB;
823  if (DestBB == nullptr)
824  return SrcBB;
825 
826  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
827  // There are basic blocks (such as catchswitch) cannot be instrumented.
828  // If the returned first insertion point is the end of BB, skip this BB.
829  if (BB->getFirstInsertionPt() == BB->end())
830  return nullptr;
831  return BB;
832  };
833 
834  // Instrument the SrcBB if it has a single successor,
835  // otherwise, the DestBB if this is not a critical edge.
836  Instruction *TI = SrcBB->getTerminator();
837  if (TI->getNumSuccessors() <= 1)
838  return canInstrument(SrcBB);
839  if (!E->IsCritical)
840  return canInstrument(DestBB);
841 
842  // Some IndirectBr critical edges cannot be split by the previous
843  // SplitIndirectBrCriticalEdges call. Bail out.
844  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
845  BasicBlock *InstrBB =
846  isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
847  if (!InstrBB) {
848  LLVM_DEBUG(
849  dbgs() << "Fail to split critical edge: not instrument this edge.\n");
850  return nullptr;
851  }
852  // For a critical edge, we have to split. Instrument the newly
853  // created BB.
854  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
855  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
856  << " --> " << getBBInfo(DestBB).Index << "\n");
857  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
858  MST.addEdge(SrcBB, InstrBB, 0);
859  // Second one: Add new edge of InstrBB->DestBB.
860  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
861  NewEdge1.InMST = true;
862  E->Removed = true;
863 
864  return canInstrument(InstrBB);
865 }
866 
867 // When generating value profiling calls on Windows routines that make use of
868 // handler funclets for exception processing an operand bundle needs to attached
869 // to the called function. This routine will set \p OpBundles to contain the
870 // funclet information, if any is needed, that should be placed on the generated
871 // value profiling call for the value profile candidate call.
872 static void
876  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
877  if (OrigCall && !isa<IntrinsicInst>(OrigCall)) {
878  // The instrumentation call should belong to the same funclet as a
879  // non-intrinsic call, so just copy the operand bundle, if any exists.
880  Optional<OperandBundleUse> ParentFunclet =
881  OrigCall->getOperandBundle(LLVMContext::OB_funclet);
882  if (ParentFunclet)
883  OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
884  } else {
885  // Intrinsics or other instructions do not get funclet information from the
886  // front-end. Need to use the BlockColors that was computed by the routine
887  // colorEHFunclets to determine whether a funclet is needed.
888  if (!BlockColors.empty()) {
889  const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
890  assert(CV.size() == 1 && "non-unique color for block!");
891  Instruction *EHPad = CV.front()->getFirstNonPHI();
892  if (EHPad->isEHPad())
893  OpBundles.emplace_back("funclet", EHPad);
894  }
895  }
896 }
897 
898 // Visit all edge and instrument the edges not in MST, and do value profiling.
899 // Critical edges will be split.
900 static void instrumentOneFunc(
903  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
904  bool IsCS) {
905  // Split indirectbr critical edges here before computing the MST rather than
906  // later in getInstrBB() to avoid invalidating it.
908 
909  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
910  F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
911  std::vector<BasicBlock *> InstrumentBBs;
912  FuncInfo.getInstrumentBBs(InstrumentBBs);
913  unsigned NumCounters =
914  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
915 
916  uint32_t I = 0;
917  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
918  for (auto *InstrBB : InstrumentBBs) {
919  IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
920  assert(Builder.GetInsertPoint() != InstrBB->end() &&
921  "Cannot get the Instrumentation point");
922  Builder.CreateCall(
923  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
924  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
925  Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
926  Builder.getInt32(I++)});
927  }
928 
929  // Now instrument select instructions:
930  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
931  FuncInfo.FunctionHash);
932  assert(I == NumCounters);
933 
935  return;
936 
937  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
938 
939  // Intrinsic function calls do not have funclet operand bundles needed for
940  // Windows exception handling attached to them. However, if value profiling is
941  // inserted for one of these calls, then a funclet value will need to be set
942  // on the instrumentation call based on the funclet coloring.
944  if (F.hasPersonalityFn() &&
945  isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
946  BlockColors = colorEHFunclets(F);
947 
948  // For each VP Kind, walk the VP candidates and instrument each one.
949  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
950  unsigned SiteIndex = 0;
951  if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
952  continue;
953 
954  for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
955  LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
956  << " site: CallSite Index = " << SiteIndex << "\n");
957 
958  IRBuilder<> Builder(Cand.InsertPt);
959  assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
960  "Cannot get the Instrumentation point");
961 
962  Value *ToProfile = nullptr;
963  if (Cand.V->getType()->isIntegerTy())
964  ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
965  else if (Cand.V->getType()->isPointerTy())
966  ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
967  assert(ToProfile && "value profiling Value is of unexpected type");
968 
970  populateEHOperandBundle(Cand, BlockColors, OpBundles);
971  Builder.CreateCall(
972  Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
973  {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
974  Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
975  Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
976  OpBundles);
977  }
978  } // IPVK_First <= Kind <= IPVK_Last
979 }
980 
981 namespace {
982 
983 // This class represents a CFG edge in profile use compilation.
984 struct PGOUseEdge : public PGOEdge {
985  bool CountValid = false;
986  uint64_t CountValue = 0;
987 
988  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
989  : PGOEdge(Src, Dest, W) {}
990 
991  // Set edge count value
992  void setEdgeCount(uint64_t Value) {
993  CountValue = Value;
994  CountValid = true;
995  }
996 
997  // Return the information string for this object.
998  std::string infoString() const {
999  if (!CountValid)
1000  return PGOEdge::infoString();
1001  return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
1002  .str();
1003  }
1004 };
1005 
1006 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1007 
1008 // This class stores the auxiliary information for each BB.
1009 struct UseBBInfo : public BBInfo {
1010  uint64_t CountValue = 0;
1011  bool CountValid;
1012  int32_t UnknownCountInEdge = 0;
1013  int32_t UnknownCountOutEdge = 0;
1014  DirectEdges InEdges;
1015  DirectEdges OutEdges;
1016 
1017  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
1018 
1019  UseBBInfo(unsigned IX, uint64_t C)
1020  : BBInfo(IX), CountValue(C), CountValid(true) {}
1021 
1022  // Set the profile count value for this BB.
1023  void setBBInfoCount(uint64_t Value) {
1024  CountValue = Value;
1025  CountValid = true;
1026  }
1027 
1028  // Return the information string of this object.
1029  std::string infoString() const {
1030  if (!CountValid)
1031  return BBInfo::infoString();
1032  return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
1033  }
1034 
1035  // Add an OutEdge and update the edge count.
1036  void addOutEdge(PGOUseEdge *E) {
1037  OutEdges.push_back(E);
1038  UnknownCountOutEdge++;
1039  }
1040 
1041  // Add an InEdge and update the edge count.
1042  void addInEdge(PGOUseEdge *E) {
1043  InEdges.push_back(E);
1044  UnknownCountInEdge++;
1045  }
1046 };
1047 
1048 } // end anonymous namespace
1049 
1050 // Sum up the count values for all the edges.
1052  uint64_t Total = 0;
1053  for (auto &E : Edges) {
1054  if (E->Removed)
1055  continue;
1056  Total += E->CountValue;
1057  }
1058  return Total;
1059 }
1060 
1061 namespace {
1062 
1063 class PGOUseFunc {
1064 public:
1065  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1066  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1068  ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1069  : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1070  FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1071  InstrumentFuncEntry),
1072  FreqAttr(FFA_Normal), IsCS(IsCS) {}
1073 
1074  // Read counts for the instrumented BB from profile.
1075  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1076  bool &AllMinusOnes);
1077 
1078  // Populate the counts for all BBs.
1079  void populateCounters();
1080 
1081  // Set the branch weights based on the count values.
1082  void setBranchWeights();
1083 
1084  // Annotate the value profile call sites for all value kind.
1085  void annotateValueSites();
1086 
1087  // Annotate the value profile call sites for one value kind.
1088  void annotateValueSites(uint32_t Kind);
1089 
1090  // Annotate the irreducible loop header weights.
1091  void annotateIrrLoopHeaderWeights();
1092 
1093  // The hotness of the function from the profile count.
1094  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1095 
1096  // Return the function hotness from the profile.
1097  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1098 
1099  // Return the function hash.
1100  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1101 
1102  // Return the profile record for this function;
1103  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1104 
1105  // Return the auxiliary BB information.
1106  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1107  return FuncInfo.getBBInfo(BB);
1108  }
1109 
1110  // Return the auxiliary BB information if available.
1111  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1112  return FuncInfo.findBBInfo(BB);
1113  }
1114 
1115  Function &getFunc() const { return F; }
1116 
1117  void dumpInfo(std::string Str = "") const {
1118  FuncInfo.dumpInfo(Str);
1119  }
1120 
1121  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1122 private:
1123  Function &F;
1124  Module *M;
1126  ProfileSummaryInfo *PSI;
1127 
1128  // This member stores the shared information with class PGOGenFunc.
1129  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1130 
1131  // The maximum count value in the profile. This is only used in PGO use
1132  // compilation.
1133  uint64_t ProgramMaxCount;
1134 
1135  // Position of counter that remains to be read.
1136  uint32_t CountPosition = 0;
1137 
1138  // Total size of the profile count for this function.
1139  uint32_t ProfileCountSize = 0;
1140 
1141  // ProfileRecord for this function.
1142  InstrProfRecord ProfileRecord;
1143 
1144  // Function hotness info derived from profile.
1145  FuncFreqAttr FreqAttr;
1146 
1147  // Is to use the context sensitive profile.
1148  bool IsCS;
1149 
1150  // Find the Instrumented BB and set the value. Return false on error.
1151  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1152 
1153  // Set the edge counter value for the unknown edge -- there should be only
1154  // one unknown edge.
1155  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1156 
1157  // Return FuncName string;
1158  std::string getFuncName() const { return FuncInfo.FuncName; }
1159 
1160  // Set the hot/cold inline hints based on the count values.
1161  // FIXME: This function should be removed once the functionality in
1162  // the inliner is implemented.
1163  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1164  if (PSI->isHotCount(EntryCount))
1165  FreqAttr = FFA_Hot;
1166  else if (PSI->isColdCount(MaxCount))
1167  FreqAttr = FFA_Cold;
1168  }
1169 };
1170 
1171 } // end anonymous namespace
1172 
1173 // Visit all the edges and assign the count value for the instrumented
1174 // edges and the BB. Return false on error.
1175 bool PGOUseFunc::setInstrumentedCounts(
1176  const std::vector<uint64_t> &CountFromProfile) {
1177 
1178  std::vector<BasicBlock *> InstrumentBBs;
1179  FuncInfo.getInstrumentBBs(InstrumentBBs);
1180  unsigned NumCounters =
1181  InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1182  // The number of counters here should match the number of counters
1183  // in profile. Return if they mismatch.
1184  if (NumCounters != CountFromProfile.size()) {
1185  return false;
1186  }
1187  auto *FuncEntry = &*F.begin();
1188 
1189  // Set the profile count to the Instrumented BBs.
1190  uint32_t I = 0;
1191  for (BasicBlock *InstrBB : InstrumentBBs) {
1192  uint64_t CountValue = CountFromProfile[I++];
1193  UseBBInfo &Info = getBBInfo(InstrBB);
1194  // If we reach here, we know that we have some nonzero count
1195  // values in this function. The entry count should not be 0.
1196  // Fix it if necessary.
1197  if (InstrBB == FuncEntry && CountValue == 0)
1198  CountValue = 1;
1199  Info.setBBInfoCount(CountValue);
1200  }
1201  ProfileCountSize = CountFromProfile.size();
1202  CountPosition = I;
1203 
1204  // Set the edge count and update the count of unknown edges for BBs.
1205  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1206  E->setEdgeCount(Value);
1207  this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1208  this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1209  };
1210 
1211  // Set the profile count the Instrumented edges. There are BBs that not in
1212  // MST but not instrumented. Need to set the edge count value so that we can
1213  // populate the profile counts later.
1214  for (auto &E : FuncInfo.MST.AllEdges) {
1215  if (E->Removed || E->InMST)
1216  continue;
1217  const BasicBlock *SrcBB = E->SrcBB;
1218  UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1219 
1220  // If only one out-edge, the edge profile count should be the same as BB
1221  // profile count.
1222  if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1223  setEdgeCount(E.get(), SrcInfo.CountValue);
1224  else {
1225  const BasicBlock *DestBB = E->DestBB;
1226  UseBBInfo &DestInfo = getBBInfo(DestBB);
1227  // If only one in-edge, the edge profile count should be the same as BB
1228  // profile count.
1229  if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1230  setEdgeCount(E.get(), DestInfo.CountValue);
1231  }
1232  if (E->CountValid)
1233  continue;
1234  // E's count should have been set from profile. If not, this meenas E skips
1235  // the instrumentation. We set the count to 0.
1236  setEdgeCount(E.get(), 0);
1237  }
1238  return true;
1239 }
1240 
1241 // Set the count value for the unknown edge. There should be one and only one
1242 // unknown edge in Edges vector.
1243 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1244  for (auto &E : Edges) {
1245  if (E->CountValid)
1246  continue;
1247  E->setEdgeCount(Value);
1248 
1249  getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1250  getBBInfo(E->DestBB).UnknownCountInEdge--;
1251  return;
1252  }
1253  llvm_unreachable("Cannot find the unknown count edge");
1254 }
1255 
1256 // Emit function metadata indicating PGO profile mismatch.
1258  LLVMContext &ctx) {
1259  const char MetadataName[] = "instr_prof_hash_mismatch";
1261  // If this metadata already exists, ignore.
1262  auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1263  if (Existing) {
1264  MDTuple *Tuple = cast<MDTuple>(Existing);
1265  for (auto &N : Tuple->operands()) {
1266  if (cast<MDString>(N.get())->getString() == MetadataName)
1267  return;
1268  Names.push_back(N.get());
1269  }
1270  }
1271 
1272  MDBuilder MDB(ctx);
1273  Names.push_back(MDB.createString(MetadataName));
1274  MDNode *MD = MDTuple::get(ctx, Names);
1275  F.setMetadata(LLVMContext::MD_annotation, MD);
1276 }
1277 
1278 // Read the profile from ProfileFileName and assign the value to the
1279 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1280 // Return true if the profile are successfully read, and false on errors.
1281 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1282  bool &AllMinusOnes) {
1283  auto &Ctx = M->getContext();
1285  PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1286  if (Error E = Result.takeError()) {
1287  handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1288  auto Err = IPE.get();
1289  bool SkipWarning = false;
1290  LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1291  << FuncInfo.FuncName << ": ");
1292  if (Err == instrprof_error::unknown_function) {
1293  IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1294  SkipWarning = !PGOWarnMissing;
1295  LLVM_DEBUG(dbgs() << "unknown function");
1296  } else if (Err == instrprof_error::hash_mismatch ||
1297  Err == instrprof_error::malformed) {
1298  IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1299  SkipWarning =
1302  (F.hasComdat() ||
1303  F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1304  LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1305  // Emit function metadata indicating PGO profile mismatch.
1306  annotateFunctionWithHashMismatch(F, M->getContext());
1307  }
1308 
1309  LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1310  if (SkipWarning)
1311  return;
1312 
1313  std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1314  std::string(" Hash = ") +
1315  std::to_string(FuncInfo.FunctionHash);
1316 
1317  Ctx.diagnose(
1318  DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1319  });
1320  return false;
1321  }
1322  ProfileRecord = std::move(Result.get());
1323  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1324 
1325  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1326  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1327  AllMinusOnes = (CountFromProfile.size() > 0);
1328  uint64_t ValueSum = 0;
1329  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1330  LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1331  ValueSum += CountFromProfile[I];
1332  if (CountFromProfile[I] != (uint64_t)-1)
1333  AllMinusOnes = false;
1334  }
1335  AllZeros = (ValueSum == 0);
1336 
1337  LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1338 
1339  getBBInfo(nullptr).UnknownCountOutEdge = 2;
1340  getBBInfo(nullptr).UnknownCountInEdge = 2;
1341 
1342  if (!setInstrumentedCounts(CountFromProfile)) {
1343  LLVM_DEBUG(
1344  dbgs() << "Inconsistent number of counts, skipping this function");
1345  Ctx.diagnose(DiagnosticInfoPGOProfile(
1346  M->getName().data(),
1347  Twine("Inconsistent number of counts in ") + F.getName().str()
1348  + Twine(": the profile may be stale or there is a function name collision."),
1349  DS_Warning));
1350  return false;
1351  }
1352  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1353  return true;
1354 }
1355 
1356 // Populate the counters from instrumented BBs to all BBs.
1357 // In the end of this operation, all BBs should have a valid count value.
1358 void PGOUseFunc::populateCounters() {
1359  bool Changes = true;
1360  unsigned NumPasses = 0;
1361  while (Changes) {
1362  NumPasses++;
1363  Changes = false;
1364 
1365  // For efficient traversal, it's better to start from the end as most
1366  // of the instrumented edges are at the end.
1367  for (auto &BB : reverse(F)) {
1368  UseBBInfo *Count = findBBInfo(&BB);
1369  if (Count == nullptr)
1370  continue;
1371  if (!Count->CountValid) {
1372  if (Count->UnknownCountOutEdge == 0) {
1373  Count->CountValue = sumEdgeCount(Count->OutEdges);
1374  Count->CountValid = true;
1375  Changes = true;
1376  } else if (Count->UnknownCountInEdge == 0) {
1377  Count->CountValue = sumEdgeCount(Count->InEdges);
1378  Count->CountValid = true;
1379  Changes = true;
1380  }
1381  }
1382  if (Count->CountValid) {
1383  if (Count->UnknownCountOutEdge == 1) {
1384  uint64_t Total = 0;
1385  uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1386  // If the one of the successor block can early terminate (no-return),
1387  // we can end up with situation where out edge sum count is larger as
1388  // the source BB's count is collected by a post-dominated block.
1389  if (Count->CountValue > OutSum)
1390  Total = Count->CountValue - OutSum;
1391  setEdgeCount(Count->OutEdges, Total);
1392  Changes = true;
1393  }
1394  if (Count->UnknownCountInEdge == 1) {
1395  uint64_t Total = 0;
1396  uint64_t InSum = sumEdgeCount(Count->InEdges);
1397  if (Count->CountValue > InSum)
1398  Total = Count->CountValue - InSum;
1399  setEdgeCount(Count->InEdges, Total);
1400  Changes = true;
1401  }
1402  }
1403  }
1404  }
1405 
1406  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1407 #ifndef NDEBUG
1408  // Assert every BB has a valid counter.
1409  for (auto &BB : F) {
1410  auto BI = findBBInfo(&BB);
1411  if (BI == nullptr)
1412  continue;
1413  assert(BI->CountValid && "BB count is not valid");
1414  }
1415 #endif
1416  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1417  uint64_t FuncMaxCount = FuncEntryCount;
1418  for (auto &BB : F) {
1419  auto BI = findBBInfo(&BB);
1420  if (BI == nullptr)
1421  continue;
1422  FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1423  }
1424 
1425  // Fix the obviously inconsistent entry count.
1426  if (FuncMaxCount > 0 && FuncEntryCount == 0)
1427  FuncEntryCount = 1;
1428  F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1429  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1430 
1431  // Now annotate select instructions
1432  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1433  assert(CountPosition == ProfileCountSize);
1434 
1435  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1436 }
1437 
1438 // Assign the scaled count values to the BB with multiple out edges.
1440  // Generate MD_prof metadata for every branch instruction.
1441  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1442  << " IsCS=" << IsCS << "\n");
1443  for (auto &BB : F) {
1444  Instruction *TI = BB.getTerminator();
1445  if (TI->getNumSuccessors() < 2)
1446  continue;
1447  if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1448  isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
1449  continue;
1450 
1451  if (getBBInfo(&BB).CountValue == 0)
1452  continue;
1453 
1454  // We have a non-zero Branch BB.
1455  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1456  unsigned Size = BBCountInfo.OutEdges.size();
1457  SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1458  uint64_t MaxCount = 0;
1459  for (unsigned s = 0; s < Size; s++) {
1460  const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1461  const BasicBlock *SrcBB = E->SrcBB;
1462  const BasicBlock *DestBB = E->DestBB;
1463  if (DestBB == nullptr)
1464  continue;
1465  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1466  uint64_t EdgeCount = E->CountValue;
1467  if (EdgeCount > MaxCount)
1468  MaxCount = EdgeCount;
1469  EdgeCounts[SuccNum] = EdgeCount;
1470  }
1471  setProfMetadata(M, TI, EdgeCounts, MaxCount);
1472  }
1473 }
1474 
1476  for (BasicBlock *Pred : predecessors(BB)) {
1477  if (isa<IndirectBrInst>(Pred->getTerminator()))
1478  return true;
1479  }
1480  return false;
1481 }
1482 
1483 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1484  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1485  // Find irr loop headers
1486  for (auto &BB : F) {
1487  // As a heuristic also annotate indrectbr targets as they have a high chance
1488  // to become an irreducible loop header after the indirectbr tail
1489  // duplication.
1490  if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1491  Instruction *TI = BB.getTerminator();
1492  const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1493  setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1494  }
1495  }
1496 }
1497 
1498 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1499  Module *M = F.getParent();
1501  Type *Int64Ty = Builder.getInt64Ty();
1502  Type *I8PtrTy = Builder.getInt8PtrTy();
1503  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1504  Builder.CreateCall(
1505  Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1506  {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1507  Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1508  Builder.getInt32(*CurCtrIdx), Step});
1509  ++(*CurCtrIdx);
1510 }
1511 
1512 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1513  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1514  assert(*CurCtrIdx < CountFromProfile.size() &&
1515  "Out of bound access of counters");
1516  uint64_t SCounts[2];
1517  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1518  ++(*CurCtrIdx);
1519  uint64_t TotalCount = 0;
1520  auto BI = UseFunc->findBBInfo(SI.getParent());
1521  if (BI != nullptr)
1522  TotalCount = BI->CountValue;
1523  // False Count
1524  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1525  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1526  if (MaxCount)
1527  setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1528 }
1529 
1530 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1531  if (!PGOInstrSelect)
1532  return;
1533  // FIXME: do not handle this yet.
1534  if (SI.getCondition()->getType()->isVectorTy())
1535  return;
1536 
1537  switch (Mode) {
1538  case VM_counting:
1539  NSIs++;
1540  return;
1541  case VM_instrument:
1542  instrumentOneSelectInst(SI);
1543  return;
1544  case VM_annotate:
1545  annotateOneSelectInst(SI);
1546  return;
1547  }
1548 
1549  llvm_unreachable("Unknown visiting mode");
1550 }
1551 
1552 // Traverse all valuesites and annotate the instructions for all value kind.
1553 void PGOUseFunc::annotateValueSites() {
1555  return;
1556 
1557  // Create the PGOFuncName meta data.
1558  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1559 
1560  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1561  annotateValueSites(Kind);
1562 }
1563 
1564 // Annotate the instructions for a specific value kind.
1565 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1566  assert(Kind <= IPVK_Last);
1567  unsigned ValueSiteIndex = 0;
1568  auto &ValueSites = FuncInfo.ValueSites[Kind];
1569  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1570  if (NumValueSites != ValueSites.size()) {
1571  auto &Ctx = M->getContext();
1572  Ctx.diagnose(DiagnosticInfoPGOProfile(
1573  M->getName().data(),
1574  Twine("Inconsistent number of value sites for ") +
1576  Twine(" profiling in \"") + F.getName().str() +
1577  Twine("\", possibly due to the use of a stale profile."),
1578  DS_Warning));
1579  return;
1580  }
1581 
1582  for (VPCandidateInfo &I : ValueSites) {
1583  LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1584  << "): Index = " << ValueSiteIndex << " out of "
1585  << NumValueSites << "\n");
1586  annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1587  static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1588  Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1589  : MaxNumAnnotations);
1590  ValueSiteIndex++;
1591  }
1592 }
1593 
1594 // Collect the set of members for each Comdat in module M and store
1595 // in ComdatMembers.
1597  Module &M,
1598  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1599  if (!DoComdatRenaming)
1600  return;
1601  for (Function &F : M)
1602  if (Comdat *C = F.getComdat())
1603  ComdatMembers.insert(std::make_pair(C, &F));
1604  for (GlobalVariable &GV : M.globals())
1605  if (Comdat *C = GV.getComdat())
1606  ComdatMembers.insert(std::make_pair(C, &GV));
1607  for (GlobalAlias &GA : M.aliases())
1608  if (Comdat *C = GA.getComdat())
1609  ComdatMembers.insert(std::make_pair(C, &GA));
1610 }
1611 
1613  Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1615  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1616  // For the context-sensitve instrumentation, we should have a separated pass
1617  // (before LTO/ThinLTO linking) to create these variables.
1618  if (!IsCS)
1620  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1621  collectComdatMembers(M, ComdatMembers);
1622 
1623  for (auto &F : M) {
1624  if (F.isDeclaration())
1625  continue;
1626  if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1627  continue;
1628  auto &TLI = LookupTLI(F);
1629  auto *BPI = LookupBPI(F);
1630  auto *BFI = LookupBFI(F);
1631  instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1632  }
1633  return true;
1634 }
1635 
1638  createProfileFileNameVar(M, CSInstrName);
1639  // The variable in a comdat may be discarded by LTO. Ensure the declaration
1640  // will be retained.
1643  return PreservedAnalyses::all();
1644 }
1645 
1646 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1647  if (skipModule(M))
1648  return false;
1649 
1650  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
1651  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1652  };
1653  auto LookupBPI = [this](Function &F) {
1654  return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1655  };
1656  auto LookupBFI = [this](Function &F) {
1657  return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1658  };
1659  return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
1660 }
1661 
1663  ModuleAnalysisManager &AM) {
1664  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1665  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1667  };
1668  auto LookupBPI = [&FAM](Function &F) {
1670  };
1671  auto LookupBFI = [&FAM](Function &F) {
1673  };
1674 
1675  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1676  return PreservedAnalyses::all();
1677 
1678  return PreservedAnalyses::none();
1679 }
1680 
1681 // Using the ratio b/w sums of profile count values and BFI count values to
1682 // adjust the func entry count.
1683 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1684  BranchProbabilityInfo &NBPI) {
1685  Function &F = Func.getFunc();
1686  BlockFrequencyInfo NBFI(F, NBPI, LI);
1687 #ifndef NDEBUG
1688  auto BFIEntryCount = F.getEntryCount();
1689  assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
1690  "Invalid BFI Entrycount");
1691 #endif
1692  auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1693  auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1694  for (auto &BBI : F) {
1695  uint64_t CountValue = 0;
1696  uint64_t BFICountValue = 0;
1697  if (!Func.findBBInfo(&BBI))
1698  continue;
1699  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1700  CountValue = Func.getBBInfo(&BBI).CountValue;
1701  BFICountValue = BFICount.getValue();
1702  SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1703  SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1704  }
1705  if (SumCount.isZero())
1706  return;
1707 
1708  assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1709  "Incorrect sum of BFI counts");
1710  if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1711  return;
1712  double Scale = (SumCount / SumBFICount).convertToDouble();
1713  if (Scale < 1.001 && Scale > 0.999)
1714  return;
1715 
1716  uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1717  uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1718  if (NewEntryCount == 0)
1719  NewEntryCount = 1;
1720  if (NewEntryCount != FuncEntryCount) {
1721  F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1722  LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1723  << ", entry_count " << FuncEntryCount << " --> "
1724  << NewEntryCount << "\n");
1725  }
1726 }
1727 
1728 // Compare the profile count values with BFI count values, and print out
1729 // the non-matching ones.
1730 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1731  BranchProbabilityInfo &NBPI,
1732  uint64_t HotCountThreshold,
1734  Function &F = Func.getFunc();
1735  BlockFrequencyInfo NBFI(F, NBPI, LI);
1736  // bool PrintFunc = false;
1737  bool HotBBOnly = PGOVerifyHotBFI;
1738  std::string Msg;
1740 
1741  unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1742  for (auto &BBI : F) {
1743  uint64_t CountValue = 0;
1744  uint64_t BFICountValue = 0;
1745 
1746  if (Func.getBBInfo(&BBI).CountValid)
1747  CountValue = Func.getBBInfo(&BBI).CountValue;
1748 
1749  BBNum++;
1750  if (CountValue)
1751  NonZeroBBNum++;
1752  auto BFICount = NBFI.getBlockProfileCount(&BBI);
1753  if (BFICount)
1754  BFICountValue = BFICount.getValue();
1755 
1756  if (HotBBOnly) {
1757  bool rawIsHot = CountValue >= HotCountThreshold;
1758  bool BFIIsHot = BFICountValue >= HotCountThreshold;
1759  bool rawIsCold = CountValue <= ColdCountThreshold;
1760  bool ShowCount = false;
1761  if (rawIsHot && !BFIIsHot) {
1762  Msg = "raw-Hot to BFI-nonHot";
1763  ShowCount = true;
1764  } else if (rawIsCold && BFIIsHot) {
1765  Msg = "raw-Cold to BFI-Hot";
1766  ShowCount = true;
1767  }
1768  if (!ShowCount)
1769  continue;
1770  } else {
1771  if ((CountValue < PGOVerifyBFICutoff) &&
1772  (BFICountValue < PGOVerifyBFICutoff))
1773  continue;
1774  uint64_t Diff = (BFICountValue >= CountValue)
1775  ? BFICountValue - CountValue
1776  : CountValue - BFICountValue;
1777  if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
1778  continue;
1779  }
1780  BBMisMatchNum++;
1781 
1782  ORE.emit([&]() {
1784  F.getSubprogram(), &BBI);
1785  Remark << "BB " << ore::NV("Block", BBI.getName())
1786  << " Count=" << ore::NV("Count", CountValue)
1787  << " BFI_Count=" << ore::NV("Count", BFICountValue);
1788  if (!Msg.empty())
1789  Remark << " (" << Msg << ")";
1790  return Remark;
1791  });
1792  }
1793  if (BBMisMatchNum)
1794  ORE.emit([&]() {
1795  return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1796  F.getSubprogram(), &F.getEntryBlock())
1797  << "In Func " << ore::NV("Function", F.getName())
1798  << ": Num_of_BB=" << ore::NV("Count", BBNum)
1799  << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1800  << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1801  });
1802 }
1803 
1805  Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1806  function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1808  function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1809  ProfileSummaryInfo *PSI, bool IsCS) {
1810  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1811  auto &Ctx = M.getContext();
1812  // Read the counter array from file.
1813  auto ReaderOrErr =
1814  IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1815  if (Error E = ReaderOrErr.takeError()) {
1816  handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1817  Ctx.diagnose(
1818  DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1819  });
1820  return false;
1821  }
1822 
1823  std::unique_ptr<IndexedInstrProfReader> PGOReader =
1824  std::move(ReaderOrErr.get());
1825  if (!PGOReader) {
1826  Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1827  StringRef("Cannot get PGOReader")));
1828  return false;
1829  }
1830  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1831  return false;
1832 
1833  // TODO: might need to change the warning once the clang option is finalized.
1834  if (!PGOReader->isIRLevelProfile()) {
1835  Ctx.diagnose(DiagnosticInfoPGOProfile(
1836  ProfileFileName.data(), "Not an IR level instrumentation profile"));
1837  return false;
1838  }
1839 
1840  // Add the profile summary (read from the header of the indexed summary) here
1841  // so that we can use it below when reading counters (which checks if the
1842  // function should be marked with a cold or inlinehint attribute).
1843  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1846  PSI->refresh();
1847 
1848  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1849  collectComdatMembers(M, ComdatMembers);
1850  std::vector<Function *> HotFunctions;
1851  std::vector<Function *> ColdFunctions;
1852 
1853  // If the profile marked as always instrument the entry BB, do the
1854  // same. Note this can be overwritten by the internal option in CFGMST.h
1855  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
1857  InstrumentFuncEntry = PGOInstrumentEntry;
1858  for (auto &F : M) {
1859  if (F.isDeclaration())
1860  continue;
1861  auto &TLI = LookupTLI(F);
1862  auto *BPI = LookupBPI(F);
1863  auto *BFI = LookupBFI(F);
1864  // Split indirectbr critical edges here before computing the MST rather than
1865  // later in getInstrBB() to avoid invalidating it.
1867  PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
1868  InstrumentFuncEntry);
1869  // When AllMinusOnes is true, it means the profile for the function
1870  // is unrepresentative and this function is actually hot. Set the
1871  // entry count of the function to be multiple times of hot threshold
1872  // and drop all its internal counters.
1873  bool AllMinusOnes = false;
1874  bool AllZeros = false;
1875  if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
1876  continue;
1877  if (AllZeros) {
1878  F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1879  if (Func.getProgramMaxCount() != 0)
1880  ColdFunctions.push_back(&F);
1881  continue;
1882  }
1883  const unsigned MultiplyFactor = 3;
1884  if (AllMinusOnes) {
1885  uint64_t HotThreshold = PSI->getHotCountThreshold();
1886  if (HotThreshold)
1887  F.setEntryCount(
1888  ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
1889  HotFunctions.push_back(&F);
1890  continue;
1891  }
1892  Func.populateCounters();
1893  Func.setBranchWeights();
1894  Func.annotateValueSites();
1895  Func.annotateIrrLoopHeaderWeights();
1896  PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1897  if (FreqAttr == PGOUseFunc::FFA_Cold)
1898  ColdFunctions.push_back(&F);
1899  else if (FreqAttr == PGOUseFunc::FFA_Hot)
1900  HotFunctions.push_back(&F);
1901  if (PGOViewCounts != PGOVCT_None &&
1902  (ViewBlockFreqFuncName.empty() ||
1903  F.getName().equals(ViewBlockFreqFuncName))) {
1904  LoopInfo LI{DominatorTree(F)};
1905  std::unique_ptr<BranchProbabilityInfo> NewBPI =
1906  std::make_unique<BranchProbabilityInfo>(F, LI);
1907  std::unique_ptr<BlockFrequencyInfo> NewBFI =
1908  std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1909  if (PGOViewCounts == PGOVCT_Graph)
1910  NewBFI->view();
1911  else if (PGOViewCounts == PGOVCT_Text) {
1912  dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1913  NewBFI->print(dbgs());
1914  }
1915  }
1916  if (PGOViewRawCounts != PGOVCT_None &&
1917  (ViewBlockFreqFuncName.empty() ||
1918  F.getName().equals(ViewBlockFreqFuncName))) {
1920  if (ViewBlockFreqFuncName.empty())
1921  WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1922  else
1923  ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1924  else if (PGOViewRawCounts == PGOVCT_Text) {
1925  dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1926  Func.dumpInfo();
1927  }
1928  }
1929 
1931  LoopInfo LI{DominatorTree(F)};
1932  BranchProbabilityInfo NBPI(F, LI);
1933 
1934  // Fix func entry count.
1935  if (PGOFixEntryCount)
1936  fixFuncEntryCount(Func, LI, NBPI);
1937 
1938  // Verify BlockFrequency information.
1939  uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
1940  if (PGOVerifyHotBFI) {
1941  HotCountThreshold = PSI->getOrCompHotCountThreshold();
1943  }
1944  verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
1945  }
1946  }
1947 
1948  // Set function hotness attribute from the profile.
1949  // We have to apply these attributes at the end because their presence
1950  // can affect the BranchProbabilityInfo of any callers, resulting in an
1951  // inconsistent MST between prof-gen and prof-use.
1952  for (auto &F : HotFunctions) {
1953  F->addFnAttr(Attribute::InlineHint);
1954  LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1955  << "\n");
1956  }
1957  for (auto &F : ColdFunctions) {
1958  // Only set when there is no Attribute::Hot set by the user. For Hot
1959  // attribute, user's annotation has the precedence over the profile.
1960  if (F->hasFnAttribute(Attribute::Hot)) {
1961  auto &Ctx = M.getContext();
1962  std::string Msg = std::string("Function ") + F->getName().str() +
1963  std::string(" is annotated as a hot function but"
1964  " the profile is cold");
1965  Ctx.diagnose(
1966  DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1967  continue;
1968  }
1969  F->addFnAttr(Attribute::Cold);
1970  LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1971  << "\n");
1972  }
1973  return true;
1974 }
1975 
1977  std::string RemappingFilename,
1978  bool IsCS)
1979  : ProfileFileName(std::move(Filename)),
1980  ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1981  if (!PGOTestProfileFile.empty())
1982  ProfileFileName = PGOTestProfileFile;
1983  if (!PGOTestProfileRemappingFile.empty())
1984  ProfileRemappingFileName = PGOTestProfileRemappingFile;
1985 }
1986 
1988  ModuleAnalysisManager &AM) {
1989 
1990  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1991  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1993  };
1994  auto LookupBPI = [&FAM](Function &F) {
1996  };
1997  auto LookupBFI = [&FAM](Function &F) {
1999  };
2000 
2001  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
2002 
2003  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
2004  LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2005  return PreservedAnalyses::all();
2006 
2007  return PreservedAnalyses::none();
2008 }
2009 
2010 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
2011  if (skipModule(M))
2012  return false;
2013 
2014  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
2015  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2016  };
2017  auto LookupBPI = [this](Function &F) {
2018  return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
2019  };
2020  auto LookupBFI = [this](Function &F) {
2021  return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2022  };
2023 
2024  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2025  return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
2026  LookupBFI, PSI, IsCS);
2027 }
2028 
2029 static std::string getSimpleNodeName(const BasicBlock *Node) {
2030  if (!Node->getName().empty())
2031  return std::string(Node->getName());
2032 
2033  std::string SimpleNodeName;
2034  raw_string_ostream OS(SimpleNodeName);
2035  Node->printAsOperand(OS, false);
2036  return OS.str();
2037 }
2038 
2040  ArrayRef<uint64_t> EdgeCounts,
2041  uint64_t MaxCount) {
2042  MDBuilder MDB(M->getContext());
2043  assert(MaxCount > 0 && "Bad max count");
2044  uint64_t Scale = calculateCountScale(MaxCount);
2045  SmallVector<unsigned, 4> Weights;
2046  for (const auto &ECI : EdgeCounts)
2047  Weights.push_back(scaleBranchCount(ECI, Scale));
2048 
2049  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2050  : Weights) {
2051  dbgs() << W << " ";
2052  } dbgs() << "\n";);
2053 
2054  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2055  if (EmitBranchProbability) {
2056  std::string BrCondStr = getBranchCondString(TI);
2057  if (BrCondStr.empty())
2058  return;
2059 
2060  uint64_t WSum =
2061  std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2062  [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2063  uint64_t TotalCount =
2064  std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2065  [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2066  Scale = calculateCountScale(WSum);
2067  BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2068  scaleBranchCount(WSum, Scale));
2069  std::string BranchProbStr;
2070  raw_string_ostream OS(BranchProbStr);
2071  OS << BP;
2072  OS << " (total count : " << TotalCount << ")";
2073  OS.flush();
2074  Function *F = TI->getParent()->getParent();
2076  ORE.emit([&]() {
2077  return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2078  << BrCondStr << " is true with probability : " << BranchProbStr;
2079  });
2080  }
2081 }
2082 
2083 namespace llvm {
2084 
2086  MDBuilder MDB(M->getContext());
2087  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2088  MDB.createIrrLoopHeaderWeight(Count));
2089 }
2090 
2091 template <> struct GraphTraits<PGOUseFunc *> {
2092  using NodeRef = const BasicBlock *;
2095 
2096  static NodeRef getEntryNode(const PGOUseFunc *G) {
2097  return &G->getFunc().front();
2098  }
2099 
2101  return succ_begin(N);
2102  }
2103 
2104  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2105 
2106  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2107  return nodes_iterator(G->getFunc().begin());
2108  }
2109 
2110  static nodes_iterator nodes_end(const PGOUseFunc *G) {
2111  return nodes_iterator(G->getFunc().end());
2112  }
2113 };
2114 
2115 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2116  explicit DOTGraphTraits(bool isSimple = false)
2118 
2119  static std::string getGraphName(const PGOUseFunc *G) {
2120  return std::string(G->getFunc().getName());
2121  }
2122 
2123  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2124  std::string Result;
2125  raw_string_ostream OS(Result);
2126 
2127  OS << getSimpleNodeName(Node) << ":\\l";
2128  UseBBInfo *BI = Graph->findBBInfo(Node);
2129  OS << "Count : ";
2130  if (BI && BI->CountValid)
2131  OS << BI->CountValue << "\\l";
2132  else
2133  OS << "Unknown\\l";
2134 
2135  if (!PGOInstrSelect)
2136  return Result;
2137 
2138  for (const Instruction &I : *Node) {
2139  if (!isa<SelectInst>(&I))
2140  continue;
2141  // Display scaled counts for SELECT instruction:
2142  OS << "SELECT : { T = ";
2143  uint64_t TC, FC;
2144  bool HasProf = I.extractProfMetadata(TC, FC);
2145  if (!HasProf)
2146  OS << "Unknown, F = Unknown }\\l";
2147  else
2148  OS << TC << ", F = " << FC << " }\\l";
2149  }
2150  return Result;
2151  }
2152 };
2153 
2154 } // end namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
DoComdatRenaming
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
llvm::SuccIterator
Definition: CFG.h:139
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:495
Instrumentation.h
llvm::createPGOFuncNameMetadata
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1060
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::PGOInstrumentationUse::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1987
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
fixFuncEntryCount
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
Definition: PGOInstrumentation.cpp:1683
llvm::DOTGraphTraits< PGOUseFunc * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: PGOInstrumentation.cpp:2116
llvm::APFloatBase::cmpGreaterThan
@ cmpGreaterThan
Definition: APFloat.h:183
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
use
pgo instr use
Definition: PGOInstrumentation.cpp:500
c2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps c2(%esp) ... xorps %xmm0
Comdat.h
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::Comdat::getName
StringRef getName() const
Definition: Comdat.cpp:26
llvm::JamCRC::update
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
ProfileCount
Function::ProfileCount ProfileCount
Definition: PGOInstrumentation.cpp:125
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::InstrProfError::message
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:219
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::CFGMST::getBBInfo
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
llvm::Function
Definition: Function.h:62
ProfileSummary.h
StringRef.h
Pass.h
llvm::BlockFrequencyInfoWrapperPass
Legacy analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:138
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::DOTGraphTraits< PGOUseFunc * >::getNodeLabel
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
Definition: PGOInstrumentation.cpp:2123
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:199
llvm::ProfileSummaryInfo::getOrCompHotCountThreshold
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:318
PGOTestProfileRemappingFile
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
llvm::getPGOFuncName
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:263
sumEdgeCount
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
Definition: PGOInstrumentation.cpp:1051
ErrorHandling.h
llvm::InstrProfError::get
instrprof_error get() const
Definition: InstrProf.h:323
llvm::IRBuilder<>
PGOVerifyBFI
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
MapVector.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2233
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::createPGOInstrumentationUseLegacyPass
ModulePass * createPGOInstrumentationUseLegacyPass(StringRef Filename=StringRef(""), bool IsCS=false)
Definition: PGOInstrumentation.cpp:503
Error.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createPGOFuncNameVar
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:342
PGOVerifyBFIRatio
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-big -- only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
llvm::coverage::accessors::getFuncHash
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
Definition: CoverageMapping.h:774
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
APInt.h
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
PGOInstrSelect
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::SplitCriticalEdge
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition: BreakCriticalEdges.cpp:103
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::GraphTraits< PGOUseFunc * >::nodes_end
static nodes_iterator nodes_end(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2110
EHPersonalities.h
llvm::CFGMST::AllEdges
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
llvm::TinyPtrVector::front
EltTy front() const
Definition: TinyPtrVector.h:230
llvm::Optional
Definition: APInt.h:33
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::JamCRC
Definition: CRC.h:45
MaxNumMemOPAnnotations
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
llvm::GraphTraits< PGOUseFunc * >::child_end
static ChildIteratorType child_end(const NodeRef N)
Definition: PGOInstrumentation.cpp:2104
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:375
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
STLExtras.h
llvm::createProfileFileNameVar
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1166
llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:3831
CFGMST.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::PGOVCT_Text
@ PGOVCT_Text
Definition: BlockFrequencyInfo.h:33
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::InstrProfRecord
Profiling information for a single function.
Definition: InstrProf.h:691
llvm::OperandBundleDef
OperandBundleDefT< Value * > OperandBundleDef
Definition: InstrTypes.h:1140
F
#define F(x, y, z)
Definition: MD5.cpp:56
InstrProfData.inc
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1135
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
MaxNumAnnotations
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of annotations for a single indirect " "call callsite"))
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
PGOWarnMissing
static cl::opt< bool > PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about missing profile data for " "functions."))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
Instruction.h
PGOInstrumentEntry
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
CommandLine.h
llvm::APFloat::getZero
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:885
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::PGOVCT_Graph
@ PGOVCT_Graph
Definition: BlockFrequencyInfo.h:33
llvm::BranchProbabilityAnalysis
Analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:414
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::ProfileSummaryInfo::isColdCount
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
Definition: ProfileSummaryInfo.cpp:294
GlobalValue.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::calculateCountScale
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
Definition: Instrumentation.h:180
PGOViewRawCounts
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Constants.h
llvm::ValueProfileCollector::CandidateInfo::AnnotatedInst
Instruction * AnnotatedInst
Definition: ValueProfileCollector.h:62
llvm::BranchProbabilityInfoWrapperPass
Legacy analysis pass which computes BranchProbabilityInfo.
Definition: BranchProbabilityInfo.h:440
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:47
getBranchCondString
static std::string getBranchCondString(Instruction *TI)
Definition: PGOInstrumentation.cpp:302
profile
pgo instr Read PGO instrumentation profile
Definition: PGOInstrumentation.cpp:501
llvm::PGOInstrumentationGen::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1662
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::setIrrLoopHeaderMetadata
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
Definition: PGOInstrumentation.cpp:2085
Twine.h
InstrTypes.h
llvm::Type::print
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Definition: AsmWriter.cpp:4567
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MDTuple
Tuple of metadata.
Definition: Metadata.h:1174
llvm::canRenameComdatFunc
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1120
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::createPGOInstrumentationGenCreateVarLegacyPass
ModulePass * createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName=StringRef(""))
llvm::InstrProfRecord::getNumValueSites
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:882
llvm::LLVMContext::OB_funclet
@ OB_funclet
Definition: LLVMContext.h:91
CRC.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
false
Definition: StackSlotColoring.cpp:142
llvm::Instruction
Definition: Instruction.h:45
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
InstrProf.h
MDBuilder.h
INITIALIZE_PASS
INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, "pgo-instr-gen-create-var", "Create PGO instrumentation version variable for CSPGO.", false, false) ModulePass *llvm
Definition: PGOInstrumentation.cpp:510
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:110
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
PGOFixEntryCount
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:250
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CFGMST
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
llvm::Comdat
Definition: Comdat.h:31
llvm::scaleBranchCount
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
Definition: Instrumentation.h:190
llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry &)
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::GraphTraits< PGOUseFunc * >::getEntryNode
static NodeRef getEntryNode(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2096
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::ErrorInfoBase
Base class for error info classes.
Definition: Error.h:48
llvm::ErrorInfoBase::message
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:56
Type.h
BranchProbability.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:161
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3149
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
instrumentation
pgo instr PGO instrumentation
Definition: PGOInstrumentation.cpp:487
getSimpleNodeName
static std::string getSimpleNodeName(const BasicBlock *Node)
Definition: PGOInstrumentation.cpp:2029
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
EmitBranchProbability
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
instrumentOneFunc
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
Definition: PGOInstrumentation.cpp:900
llvm::MDBuilder::createString
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
PGOVerifyHotBFI
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
llvm::pointer_iterator
Definition: iterator.h:338
llvm::HighlightColor::Remark
@ Remark
llvm::IndexedInstrProfReader::getInstrProfRecord
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Definition: InstrProfReader.cpp:945
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::APFloat
Definition: APFloat.h:701
InstrProfReader.h
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
BranchProbabilityInfo.h
llvm::PGOViewCounts
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
Definition: PGOInstrumentation.cpp:289
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
llvm::ProfileSummaryInfo::isHotCount
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Definition: ProfileSummaryInfo.cpp:290
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
llvm::colorEHFunclets
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
Definition: EHPersonalities.cpp:81
ProfileSummaryInfo.h
PGOOldCFGHashing
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
setBranchWeights
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
Definition: SimplifyCFG.cpp:822
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::IndexedInstrProfReader
Reader for the indexed binary instrprof format.
Definition: InstrProfReader.h:458
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
llvm::InstrProfError
Definition: InstrProf.h:309
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
PGOInstrumentation.h
llvm::DenseMap
Definition: DenseMap.h:714
iterator.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::SplitIndirectBrCriticalEdges
bool SplitIndirectBrCriticalEdges(Function &F, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Definition: BreakCriticalEdges.cpp:351
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass
ArrayRef.h
llvm::createPGOInstrumentationGenLegacyPass
ModulePass * createPGOInstrumentationGenLegacyPass(bool IsCS=false)
Definition: PGOInstrumentation.cpp:489
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::DiagnosticInfoPGOProfile
Diagnostic information for the PGO profiler.
Definition: DiagnosticInfo.h:325
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
llvm::PGOVCT_None
@ PGOVCT_None
Definition: BlockFrequencyInfo.h:33
llvm::instrprof_error::unknown_function
@ unknown_function
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::WriteGraph
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:307
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1738
iterator_range.h
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::MDTuple::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1200
annotateAllFunctions
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
Definition: PGOInstrumentation.cpp:1804
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::InstrProfValueKind
InstrProfValueKind
Definition: InstrProf.h:238
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::IndexedInstrProfReader::create
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Definition: InstrProfReader.cpp:93
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PGOInstrumentation.cpp:128
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
PGOVerifyBFICutoff
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " "profile count value is below."))
Triple.h
llvm::GetSuccessorNumber
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
CFG.h
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::ProfileSummaryInfo::refresh
void refresh()
If no summary is present, attempt to refresh.
Definition: ProfileSummaryInfo.cpp:58
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::DOTGraphTraits< PGOUseFunc * >::getGraphName
static std::string getGraphName(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2119
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::NamedInstrProfRecord::setCSFlagInHash
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:863
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::TinyPtrVector::size
unsigned size() const
Definition: TinyPtrVector.h:172
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
PGOInstrMemOP
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::PGOInstrumentationUse::PGOInstrumentationUse
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false)
Definition: PGOInstrumentation.cpp:1976
llvm::ConstantInt::isMinusOne
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:206
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::initializePGOInstrumentationUseLegacyPassPass
void initializePGOInstrumentationUseLegacyPassPass(PassRegistry &)
BlockFrequencyInfo.h
llvm::ProfileSummary::PSK_CSInstr
@ PSK_CSInstr
Definition: ProfileSummary.h:47
llvm::GlobalValue::AvailableExternallyLinkage
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::instrprof_error::hash_mismatch
@ hash_mismatch
llvm::NoPGOWarnMismatch
cl::opt< bool > NoPGOWarnMismatch
llvm::GraphTraits< PGOUseFunc * >::nodes_begin
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Definition: PGOInstrumentation.cpp:2106
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::PGOInstrumentationGenCreateVar::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PGOInstrumentation.cpp:1637
llvm::annotateValueSite
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:955
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
annotateFunctionWithHashMismatch
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
Definition: PGOInstrumentation.cpp:1257
Attributes.h
llvm::ValueProfileCollector
Utility analysis that determines what values are worth profiling.
Definition: ValueProfileCollector.h:57
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
ValueProfKindDescr
static const char * ValueProfKindDescr[]
Definition: PGOInstrumentation.cpp:333
GraphWriter.h
std
Definition: BitVector.h:838
ValueProfileCollector.h
llvm::JamCRC::getCRC
uint32_t getCRC() const
Definition: CRC.h:52
llvm::GlobalAlias::create
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:467
canRenameComdat
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:721
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::ProfileSummaryInfo::getOrCompColdCountThreshold
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
Definition: ProfileSummaryInfo.cpp:322
GlobalVariable.h
Casting.h
DiagnosticInfo.h
Function.h
DOTGraphTraits.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
llvm::ValueProfileCollector::CandidateInfo
Definition: ValueProfileCollector.h:59
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:661
getInstrBB
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
Definition: GCOVProfiling.cpp:756
llvm::cl::value_desc
Definition: CommandLine.h:422
llvm::CFGMST::BBInfos
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
llvm::CFGMST::addEdge
Edge & addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
isSimple
static bool isSimple(Instruction *I)
Definition: SLPVectorizer.cpp:552
GlobalAlias.h
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::Comdat::setSelectionKind
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:45
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
Instructions.h
llvm::support::endian::write64le
void write64le(void *P, uint64_t V)
Definition: Endian.h:417
SmallVector.h
llvm::GraphTraits< PGOUseFunc * >::child_begin
static ChildIteratorType child_begin(const NodeRef N)
Definition: PGOInstrumentation.cpp:2100
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
Dominators.h
DisableValueProfiling
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
ModuleUtils.h
llvm::ProfileSummary::PSK_Instr
@ PSK_Instr
Definition: ProfileSummary.h:47
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:796
PGOTestProfileFile
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::setProfMetadata
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
Definition: PGOInstrumentation.cpp:2039
llvm::ProfileSummaryInfo::getHotCountThreshold
uint64_t getHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.h:172
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28
llvm::instrprof_error::malformed
@ malformed
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
verifyFuncBFI
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
Definition: PGOInstrumentation.cpp:1730
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
populateEHOperandBundle
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
Definition: PGOInstrumentation.cpp:873
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ViewBlockFreqFuncName
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
Definition: MachineBlockFrequencyInfo.cpp:66
isIndirectBrTarget
static bool isIndirectBrTarget(BasicBlock *BB)
Definition: PGOInstrumentation.cpp:1475
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::GraphTraits
Definition: GraphTraits.h:35
LLVMContext.h
llvm::const_succ_iterator
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:244
llvm::IndexedInstrProfReader::getMaximumFunctionCount
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Definition: InstrProfReader.h:519
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::Comdat::getSelectionKind
SelectionKind getSelectionKind() const
Definition: Comdat.h:44
raw_ostream.h
llvm::TinyPtrVector
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
llvm::initializePGOInstrumentationGenLegacyPassPass
void initializePGOInstrumentationGenLegacyPassPass(PassRegistry &)
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:182
BasicBlockUtils.h
llvm::GlobalValue::LinkOnceODRLinkage
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1927
collectComdatMembers
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
Definition: PGOInstrumentation.cpp:1596
Value.h
llvm::raw_string_ostream::str
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:643
llvm::CFGMST::dumpEdges
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
InitializePasses.h
llvm::handleAllErrors
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:968
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::CFGMST::findBBInfo
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::InstrProfRecord::Counts
std::vector< uint64_t > Counts
Definition: InstrProf.h:692
gen
pgo instr gen
Definition: PGOInstrumentation.cpp:486
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3147
llvm::ValueProfileCollector::get
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
Definition: ValueProfileCollector.cpp:76
NoPGOWarnMismatchComdat
static cl::opt< bool > NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "functions."))
llvm::isFuncletEHPersonality
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
Definition: EHPersonalities.h:65
llvm::MDBuilder::createIrrLoopHeaderWeight
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:301
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
InstrumentAllFunctions
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
Definition: PGOInstrumentation.cpp:1612
llvm::createIRLevelProfileFlagVar
GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS, bool InstrEntryBBEnabled)
Definition: InstrProf.cpp:1144