clang  3.9.0
CodeGenPGO.cpp
Go to the documentation of this file.
1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Instrumentation-based profile-guided optimization
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenPGO.h"
15 #include "CodeGenFunction.h"
16 #include "CoverageMappingGen.h"
18 #include "clang/AST/StmtVisitor.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/IR/MDBuilder.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/MD5.h"
24 
25 static llvm::cl::opt<bool> EnableValueProfiling(
26  "enable-value-profiling", llvm::cl::ZeroOrMore,
27  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
28 
29 using namespace clang;
30 using namespace CodeGen;
31 
32 void CodeGenPGO::setFuncName(StringRef Name,
33  llvm::GlobalValue::LinkageTypes Linkage) {
34  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
35  FuncName = llvm::getPGOFuncName(
36  Name, Linkage, CGM.getCodeGenOpts().MainFileName,
37  PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
38 
39  // If we're generating a profile, create a variable for the name.
41  FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
42 }
43 
44 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
45  setFuncName(Fn->getName(), Fn->getLinkage());
46  // Create PGOFuncName meta data.
47  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
48 }
49 
50 namespace {
51 /// \brief Stable hasher for PGO region counters.
52 ///
53 /// PGOHash produces a stable hash of a given function's control flow.
54 ///
55 /// Changing the output of this hash will invalidate all previously generated
56 /// profiles -- i.e., don't do it.
57 ///
58 /// \note When this hash does eventually change (years?), we still need to
59 /// support old hashes. We'll need to pull in the version number from the
60 /// profile data format and use the matching hash function.
61 class PGOHash {
62  uint64_t Working;
63  unsigned Count;
64  llvm::MD5 MD5;
65 
66  static const int NumBitsPerType = 6;
67  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
68  static const unsigned TooBig = 1u << NumBitsPerType;
69 
70 public:
71  /// \brief Hash values for AST nodes.
72  ///
73  /// Distinct values for AST nodes that have region counters attached.
74  ///
75  /// These values must be stable. All new members must be added at the end,
76  /// and no members should be removed. Changing the enumeration value for an
77  /// AST node will affect the hash of every function that contains that node.
78  enum HashType : unsigned char {
79  None = 0,
80  LabelStmt = 1,
81  WhileStmt,
82  DoStmt,
83  ForStmt,
86  SwitchStmt,
87  CaseStmt,
89  IfStmt,
90  CXXTryStmt,
93  BinaryOperatorLAnd,
94  BinaryOperatorLOr,
96 
97  // Keep this last. It's for the static assert that follows.
98  LastHashType
99  };
100  static_assert(LastHashType <= TooBig, "Too many types in HashType");
101 
102  // TODO: When this format changes, take in a version number here, and use the
103  // old hash calculation for file formats that used the old hash.
104  PGOHash() : Working(0), Count(0) {}
105  void combine(HashType Type);
106  uint64_t finalize();
107 };
108 const int PGOHash::NumBitsPerType;
109 const unsigned PGOHash::NumTypesPerWord;
110 const unsigned PGOHash::TooBig;
111 
112 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
113 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
114  /// The next counter value to assign.
115  unsigned NextCounter;
116  /// The function hash.
117  PGOHash Hash;
118  /// The map of statements to counters.
119  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
120 
121  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
122  : NextCounter(0), CounterMap(CounterMap) {}
123 
124  // Blocks and lambdas are handled as separate functions, so we need not
125  // traverse them in the parent context.
126  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
127  bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
128  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
129 
130  bool VisitDecl(const Decl *D) {
131  switch (D->getKind()) {
132  default:
133  break;
134  case Decl::Function:
135  case Decl::CXXMethod:
136  case Decl::CXXConstructor:
137  case Decl::CXXDestructor:
138  case Decl::CXXConversion:
139  case Decl::ObjCMethod:
140  case Decl::Block:
141  case Decl::Captured:
142  CounterMap[D->getBody()] = NextCounter++;
143  break;
144  }
145  return true;
146  }
147 
148  bool VisitStmt(const Stmt *S) {
149  auto Type = getHashType(S);
150  if (Type == PGOHash::None)
151  return true;
152 
153  CounterMap[S] = NextCounter++;
154  Hash.combine(Type);
155  return true;
156  }
157  PGOHash::HashType getHashType(const Stmt *S) {
158  switch (S->getStmtClass()) {
159  default:
160  break;
161  case Stmt::LabelStmtClass:
162  return PGOHash::LabelStmt;
163  case Stmt::WhileStmtClass:
164  return PGOHash::WhileStmt;
165  case Stmt::DoStmtClass:
166  return PGOHash::DoStmt;
167  case Stmt::ForStmtClass:
168  return PGOHash::ForStmt;
169  case Stmt::CXXForRangeStmtClass:
170  return PGOHash::CXXForRangeStmt;
171  case Stmt::ObjCForCollectionStmtClass:
172  return PGOHash::ObjCForCollectionStmt;
173  case Stmt::SwitchStmtClass:
174  return PGOHash::SwitchStmt;
175  case Stmt::CaseStmtClass:
176  return PGOHash::CaseStmt;
177  case Stmt::DefaultStmtClass:
178  return PGOHash::DefaultStmt;
179  case Stmt::IfStmtClass:
180  return PGOHash::IfStmt;
181  case Stmt::CXXTryStmtClass:
182  return PGOHash::CXXTryStmt;
183  case Stmt::CXXCatchStmtClass:
184  return PGOHash::CXXCatchStmt;
185  case Stmt::ConditionalOperatorClass:
186  return PGOHash::ConditionalOperator;
187  case Stmt::BinaryConditionalOperatorClass:
188  return PGOHash::BinaryConditionalOperator;
189  case Stmt::BinaryOperatorClass: {
190  const BinaryOperator *BO = cast<BinaryOperator>(S);
191  if (BO->getOpcode() == BO_LAnd)
192  return PGOHash::BinaryOperatorLAnd;
193  if (BO->getOpcode() == BO_LOr)
194  return PGOHash::BinaryOperatorLOr;
195  break;
196  }
197  }
198  return PGOHash::None;
199  }
200 };
201 
202 /// A StmtVisitor that propagates the raw counts through the AST and
203 /// records the count at statements where the value may change.
204 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
205  /// PGO state.
206  CodeGenPGO &PGO;
207 
208  /// A flag that is set when the current count should be recorded on the
209  /// next statement, such as at the exit of a loop.
210  bool RecordNextStmtCount;
211 
212  /// The count at the current location in the traversal.
213  uint64_t CurrentCount;
214 
215  /// The map of statements to count values.
216  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
217 
218  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
219  struct BreakContinue {
220  uint64_t BreakCount;
221  uint64_t ContinueCount;
222  BreakContinue() : BreakCount(0), ContinueCount(0) {}
223  };
224  SmallVector<BreakContinue, 8> BreakContinueStack;
225 
226  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
227  CodeGenPGO &PGO)
228  : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
229 
230  void RecordStmtCount(const Stmt *S) {
231  if (RecordNextStmtCount) {
232  CountMap[S] = CurrentCount;
233  RecordNextStmtCount = false;
234  }
235  }
236 
237  /// Set and return the current count.
238  uint64_t setCount(uint64_t Count) {
239  CurrentCount = Count;
240  return Count;
241  }
242 
243  void VisitStmt(const Stmt *S) {
244  RecordStmtCount(S);
245  for (const Stmt *Child : S->children())
246  if (Child)
247  this->Visit(Child);
248  }
249 
250  void VisitFunctionDecl(const FunctionDecl *D) {
251  // Counter tracks entry to the function body.
252  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
253  CountMap[D->getBody()] = BodyCount;
254  Visit(D->getBody());
255  }
256 
257  // Skip lambda expressions. We visit these as FunctionDecls when we're
258  // generating them and aren't interested in the body when generating a
259  // parent context.
260  void VisitLambdaExpr(const LambdaExpr *LE) {}
261 
262  void VisitCapturedDecl(const CapturedDecl *D) {
263  // Counter tracks entry to the capture body.
264  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
265  CountMap[D->getBody()] = BodyCount;
266  Visit(D->getBody());
267  }
268 
269  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
270  // Counter tracks entry to the method body.
271  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
272  CountMap[D->getBody()] = BodyCount;
273  Visit(D->getBody());
274  }
275 
276  void VisitBlockDecl(const BlockDecl *D) {
277  // Counter tracks entry to the block body.
278  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
279  CountMap[D->getBody()] = BodyCount;
280  Visit(D->getBody());
281  }
282 
283  void VisitReturnStmt(const ReturnStmt *S) {
284  RecordStmtCount(S);
285  if (S->getRetValue())
286  Visit(S->getRetValue());
287  CurrentCount = 0;
288  RecordNextStmtCount = true;
289  }
290 
291  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
292  RecordStmtCount(E);
293  if (E->getSubExpr())
294  Visit(E->getSubExpr());
295  CurrentCount = 0;
296  RecordNextStmtCount = true;
297  }
298 
299  void VisitGotoStmt(const GotoStmt *S) {
300  RecordStmtCount(S);
301  CurrentCount = 0;
302  RecordNextStmtCount = true;
303  }
304 
305  void VisitLabelStmt(const LabelStmt *S) {
306  RecordNextStmtCount = false;
307  // Counter tracks the block following the label.
308  uint64_t BlockCount = setCount(PGO.getRegionCount(S));
309  CountMap[S] = BlockCount;
310  Visit(S->getSubStmt());
311  }
312 
313  void VisitBreakStmt(const BreakStmt *S) {
314  RecordStmtCount(S);
315  assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
316  BreakContinueStack.back().BreakCount += CurrentCount;
317  CurrentCount = 0;
318  RecordNextStmtCount = true;
319  }
320 
321  void VisitContinueStmt(const ContinueStmt *S) {
322  RecordStmtCount(S);
323  assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
324  BreakContinueStack.back().ContinueCount += CurrentCount;
325  CurrentCount = 0;
326  RecordNextStmtCount = true;
327  }
328 
329  void VisitWhileStmt(const WhileStmt *S) {
330  RecordStmtCount(S);
331  uint64_t ParentCount = CurrentCount;
332 
333  BreakContinueStack.push_back(BreakContinue());
334  // Visit the body region first so the break/continue adjustments can be
335  // included when visiting the condition.
336  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
337  CountMap[S->getBody()] = CurrentCount;
338  Visit(S->getBody());
339  uint64_t BackedgeCount = CurrentCount;
340 
341  // ...then go back and propagate counts through the condition. The count
342  // at the start of the condition is the sum of the incoming edges,
343  // the backedge from the end of the loop body, and the edges from
344  // continue statements.
345  BreakContinue BC = BreakContinueStack.pop_back_val();
346  uint64_t CondCount =
347  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
348  CountMap[S->getCond()] = CondCount;
349  Visit(S->getCond());
350  setCount(BC.BreakCount + CondCount - BodyCount);
351  RecordNextStmtCount = true;
352  }
353 
354  void VisitDoStmt(const DoStmt *S) {
355  RecordStmtCount(S);
356  uint64_t LoopCount = PGO.getRegionCount(S);
357 
358  BreakContinueStack.push_back(BreakContinue());
359  // The count doesn't include the fallthrough from the parent scope. Add it.
360  uint64_t BodyCount = setCount(LoopCount + CurrentCount);
361  CountMap[S->getBody()] = BodyCount;
362  Visit(S->getBody());
363  uint64_t BackedgeCount = CurrentCount;
364 
365  BreakContinue BC = BreakContinueStack.pop_back_val();
366  // The count at the start of the condition is equal to the count at the
367  // end of the body, plus any continues.
368  uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
369  CountMap[S->getCond()] = CondCount;
370  Visit(S->getCond());
371  setCount(BC.BreakCount + CondCount - LoopCount);
372  RecordNextStmtCount = true;
373  }
374 
375  void VisitForStmt(const ForStmt *S) {
376  RecordStmtCount(S);
377  if (S->getInit())
378  Visit(S->getInit());
379 
380  uint64_t ParentCount = CurrentCount;
381 
382  BreakContinueStack.push_back(BreakContinue());
383  // Visit the body region first. (This is basically the same as a while
384  // loop; see further comments in VisitWhileStmt.)
385  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
386  CountMap[S->getBody()] = BodyCount;
387  Visit(S->getBody());
388  uint64_t BackedgeCount = CurrentCount;
389  BreakContinue BC = BreakContinueStack.pop_back_val();
390 
391  // The increment is essentially part of the body but it needs to include
392  // the count for all the continue statements.
393  if (S->getInc()) {
394  uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
395  CountMap[S->getInc()] = IncCount;
396  Visit(S->getInc());
397  }
398 
399  // ...then go back and propagate counts through the condition.
400  uint64_t CondCount =
401  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
402  if (S->getCond()) {
403  CountMap[S->getCond()] = CondCount;
404  Visit(S->getCond());
405  }
406  setCount(BC.BreakCount + CondCount - BodyCount);
407  RecordNextStmtCount = true;
408  }
409 
410  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
411  RecordStmtCount(S);
412  Visit(S->getLoopVarStmt());
413  Visit(S->getRangeStmt());
414  Visit(S->getBeginStmt());
415  Visit(S->getEndStmt());
416 
417  uint64_t ParentCount = CurrentCount;
418  BreakContinueStack.push_back(BreakContinue());
419  // Visit the body region first. (This is basically the same as a while
420  // loop; see further comments in VisitWhileStmt.)
421  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
422  CountMap[S->getBody()] = BodyCount;
423  Visit(S->getBody());
424  uint64_t BackedgeCount = CurrentCount;
425  BreakContinue BC = BreakContinueStack.pop_back_val();
426 
427  // The increment is essentially part of the body but it needs to include
428  // the count for all the continue statements.
429  uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
430  CountMap[S->getInc()] = IncCount;
431  Visit(S->getInc());
432 
433  // ...then go back and propagate counts through the condition.
434  uint64_t CondCount =
435  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
436  CountMap[S->getCond()] = CondCount;
437  Visit(S->getCond());
438  setCount(BC.BreakCount + CondCount - BodyCount);
439  RecordNextStmtCount = true;
440  }
441 
442  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
443  RecordStmtCount(S);
444  Visit(S->getElement());
445  uint64_t ParentCount = CurrentCount;
446  BreakContinueStack.push_back(BreakContinue());
447  // Counter tracks the body of the loop.
448  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
449  CountMap[S->getBody()] = BodyCount;
450  Visit(S->getBody());
451  uint64_t BackedgeCount = CurrentCount;
452  BreakContinue BC = BreakContinueStack.pop_back_val();
453 
454  setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
455  BodyCount);
456  RecordNextStmtCount = true;
457  }
458 
459  void VisitSwitchStmt(const SwitchStmt *S) {
460  RecordStmtCount(S);
461  Visit(S->getCond());
462  CurrentCount = 0;
463  BreakContinueStack.push_back(BreakContinue());
464  Visit(S->getBody());
465  // If the switch is inside a loop, add the continue counts.
466  BreakContinue BC = BreakContinueStack.pop_back_val();
467  if (!BreakContinueStack.empty())
468  BreakContinueStack.back().ContinueCount += BC.ContinueCount;
469  // Counter tracks the exit block of the switch.
470  setCount(PGO.getRegionCount(S));
471  RecordNextStmtCount = true;
472  }
473 
474  void VisitSwitchCase(const SwitchCase *S) {
475  RecordNextStmtCount = false;
476  // Counter for this particular case. This counts only jumps from the
477  // switch header and does not include fallthrough from the case before
478  // this one.
479  uint64_t CaseCount = PGO.getRegionCount(S);
480  setCount(CurrentCount + CaseCount);
481  // We need the count without fallthrough in the mapping, so it's more useful
482  // for branch probabilities.
483  CountMap[S] = CaseCount;
484  RecordNextStmtCount = true;
485  Visit(S->getSubStmt());
486  }
487 
488  void VisitIfStmt(const IfStmt *S) {
489  RecordStmtCount(S);
490  uint64_t ParentCount = CurrentCount;
491  Visit(S->getCond());
492 
493  // Counter tracks the "then" part of an if statement. The count for
494  // the "else" part, if it exists, will be calculated from this counter.
495  uint64_t ThenCount = setCount(PGO.getRegionCount(S));
496  CountMap[S->getThen()] = ThenCount;
497  Visit(S->getThen());
498  uint64_t OutCount = CurrentCount;
499 
500  uint64_t ElseCount = ParentCount - ThenCount;
501  if (S->getElse()) {
502  setCount(ElseCount);
503  CountMap[S->getElse()] = ElseCount;
504  Visit(S->getElse());
505  OutCount += CurrentCount;
506  } else
507  OutCount += ElseCount;
508  setCount(OutCount);
509  RecordNextStmtCount = true;
510  }
511 
512  void VisitCXXTryStmt(const CXXTryStmt *S) {
513  RecordStmtCount(S);
514  Visit(S->getTryBlock());
515  for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
516  Visit(S->getHandler(I));
517  // Counter tracks the continuation block of the try statement.
518  setCount(PGO.getRegionCount(S));
519  RecordNextStmtCount = true;
520  }
521 
522  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
523  RecordNextStmtCount = false;
524  // Counter tracks the catch statement's handler block.
525  uint64_t CatchCount = setCount(PGO.getRegionCount(S));
526  CountMap[S] = CatchCount;
527  Visit(S->getHandlerBlock());
528  }
529 
530  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
531  RecordStmtCount(E);
532  uint64_t ParentCount = CurrentCount;
533  Visit(E->getCond());
534 
535  // Counter tracks the "true" part of a conditional operator. The
536  // count in the "false" part will be calculated from this counter.
537  uint64_t TrueCount = setCount(PGO.getRegionCount(E));
538  CountMap[E->getTrueExpr()] = TrueCount;
539  Visit(E->getTrueExpr());
540  uint64_t OutCount = CurrentCount;
541 
542  uint64_t FalseCount = setCount(ParentCount - TrueCount);
543  CountMap[E->getFalseExpr()] = FalseCount;
544  Visit(E->getFalseExpr());
545  OutCount += CurrentCount;
546 
547  setCount(OutCount);
548  RecordNextStmtCount = true;
549  }
550 
551  void VisitBinLAnd(const BinaryOperator *E) {
552  RecordStmtCount(E);
553  uint64_t ParentCount = CurrentCount;
554  Visit(E->getLHS());
555  // Counter tracks the right hand side of a logical and operator.
556  uint64_t RHSCount = setCount(PGO.getRegionCount(E));
557  CountMap[E->getRHS()] = RHSCount;
558  Visit(E->getRHS());
559  setCount(ParentCount + RHSCount - CurrentCount);
560  RecordNextStmtCount = true;
561  }
562 
563  void VisitBinLOr(const BinaryOperator *E) {
564  RecordStmtCount(E);
565  uint64_t ParentCount = CurrentCount;
566  Visit(E->getLHS());
567  // Counter tracks the right hand side of a logical or operator.
568  uint64_t RHSCount = setCount(PGO.getRegionCount(E));
569  CountMap[E->getRHS()] = RHSCount;
570  Visit(E->getRHS());
571  setCount(ParentCount + RHSCount - CurrentCount);
572  RecordNextStmtCount = true;
573  }
574 };
575 } // end anonymous namespace
576 
577 void PGOHash::combine(HashType Type) {
578  // Check that we never combine 0 and only have six bits.
579  assert(Type && "Hash is invalid: unexpected type 0");
580  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
581 
582  // Pass through MD5 if enough work has built up.
583  if (Count && Count % NumTypesPerWord == 0) {
584  using namespace llvm::support;
585  uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
586  MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
587  Working = 0;
588  }
589 
590  // Accumulate the current type.
591  ++Count;
592  Working = Working << NumBitsPerType | Type;
593 }
594 
595 uint64_t PGOHash::finalize() {
596  // Use Working as the hash directly if we never used MD5.
597  if (Count <= NumTypesPerWord)
598  // No need to byte swap here, since none of the math was endian-dependent.
599  // This number will be byte-swapped as required on endianness transitions,
600  // so we will see the same value on the other side.
601  return Working;
602 
603  // Check for remaining work in Working.
604  if (Working)
605  MD5.update(Working);
606 
607  // Finalize the MD5 and return the hash.
608  llvm::MD5::MD5Result Result;
609  MD5.final(Result);
610  using namespace llvm::support;
611  return endian::read<uint64_t, little, unaligned>(Result);
612 }
613 
614 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
615  const Decl *D = GD.getDecl();
616  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
617  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
618  if (!InstrumentRegions && !PGOReader)
619  return;
620  if (D->isImplicit())
621  return;
622  // Constructors and destructors may be represented by several functions in IR.
623  // If so, instrument only base variant, others are implemented by delegation
624  // to the base one, it would be counted twice otherwise.
626  ((isa<CXXConstructorDecl>(GD.getDecl()) &&
627  GD.getCtorType() != Ctor_Base) ||
628  (isa<CXXDestructorDecl>(GD.getDecl()) &&
629  GD.getDtorType() != Dtor_Base))) {
630  return;
631  }
633  setFuncName(Fn);
634 
635  mapRegionCounters(D);
636  if (CGM.getCodeGenOpts().CoverageMapping)
637  emitCounterRegionMapping(D);
638  if (PGOReader) {
640  loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
641  computeRegionCounts(D);
642  applyFunctionAttributes(PGOReader, Fn);
643  }
644 }
645 
646 void CodeGenPGO::mapRegionCounters(const Decl *D) {
647  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
648  MapRegionCounters Walker(*RegionCounterMap);
649  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
650  Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
651  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
652  Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
653  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
654  Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
655  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
656  Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
657  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
658  NumRegionCounters = Walker.NextCounter;
659  FunctionHash = Walker.Hash.finalize();
660 }
661 
662 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
663  if (SkipCoverageMapping)
664  return true;
665 
666  // Don't map the functions in system headers.
667  const auto &SM = CGM.getContext().getSourceManager();
668  auto Loc = D->getBody()->getLocStart();
669  return SM.isInSystemHeader(Loc);
670 }
671 
672 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
673  if (skipRegionMappingForDecl(D))
674  return;
675 
676  std::string CoverageMapping;
677  llvm::raw_string_ostream OS(CoverageMapping);
678  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
680  CGM.getLangOpts(), RegionCounterMap.get());
681  MappingGen.emitCounterMapping(D, OS);
682  OS.flush();
683 
684  if (CoverageMapping.empty())
685  return;
686 
688  FuncNameVar, FuncName, FunctionHash, CoverageMapping);
689 }
690 
691 void
692 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
693  llvm::GlobalValue::LinkageTypes Linkage) {
694  if (skipRegionMappingForDecl(D))
695  return;
696 
697  std::string CoverageMapping;
698  llvm::raw_string_ostream OS(CoverageMapping);
699  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
701  CGM.getLangOpts());
702  MappingGen.emitEmptyMapping(D, OS);
703  OS.flush();
704 
705  if (CoverageMapping.empty())
706  return;
707 
708  setFuncName(Name, Linkage);
710  FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
711 }
712 
713 void CodeGenPGO::computeRegionCounts(const Decl *D) {
714  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
715  ComputeRegionCounts Walker(*StmtCountMap, *this);
716  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
717  Walker.VisitFunctionDecl(FD);
718  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
719  Walker.VisitObjCMethodDecl(MD);
720  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
721  Walker.VisitBlockDecl(BD);
722  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
723  Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
724 }
725 
726 void
727 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
728  llvm::Function *Fn) {
729  if (!haveRegionCounts())
730  return;
731 
732  uint64_t FunctionCount = getRegionCount(nullptr);
733  Fn->setEntryCount(FunctionCount);
734 }
735 
737  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
738  return;
739  if (!Builder.GetInsertBlock())
740  return;
741 
742  unsigned Counter = (*RegionCounterMap)[S];
743  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
744  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
745  {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
746  Builder.getInt64(FunctionHash),
747  Builder.getInt32(NumRegionCounters),
748  Builder.getInt32(Counter)});
749 }
750 
751 // This method either inserts a call to the profile run-time during
752 // instrumentation or puts profile data into metadata for PGO use.
753 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
754  llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
755 
757  return;
758 
759  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
760  return;
761 
762  if (isa<llvm::Constant>(ValuePtr))
763  return;
764 
765  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
766  if (InstrumentValueSites && RegionCounterMap) {
767  auto BuilderInsertPoint = Builder.saveIP();
768  Builder.SetInsertPoint(ValueSite);
769  llvm::Value *Args[5] = {
770  llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
771  Builder.getInt64(FunctionHash),
772  Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
773  Builder.getInt32(ValueKind),
774  Builder.getInt32(NumValueSites[ValueKind]++)
775  };
776  Builder.CreateCall(
777  CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
778  Builder.restoreIP(BuilderInsertPoint);
779  return;
780  }
781 
782  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
783  if (PGOReader && haveRegionCounts()) {
784  // We record the top most called three functions at each call site.
785  // Profile metadata contains "VP" string identifying this metadata
786  // as value profiling data, then a uint32_t value for the value profiling
787  // kind, a uint64_t value for the total number of times the call is
788  // executed, followed by the function hash and execution count (uint64_t)
789  // pairs for each function.
790  if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
791  return;
792 
793  llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
794  (llvm::InstrProfValueKind)ValueKind,
795  NumValueSites[ValueKind]);
796 
797  NumValueSites[ValueKind]++;
798  }
799 }
800 
801 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
802  bool IsInMainFile) {
803  CGM.getPGOStats().addVisited(IsInMainFile);
804  RegionCounts.clear();
805  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
806  PGOReader->getInstrProfRecord(FuncName, FunctionHash);
807  if (auto E = RecordExpected.takeError()) {
808  auto IPE = llvm::InstrProfError::take(std::move(E));
809  if (IPE == llvm::instrprof_error::unknown_function)
810  CGM.getPGOStats().addMissing(IsInMainFile);
811  else if (IPE == llvm::instrprof_error::hash_mismatch)
812  CGM.getPGOStats().addMismatched(IsInMainFile);
813  else if (IPE == llvm::instrprof_error::malformed)
814  // TODO: Consider a more specific warning for this case.
815  CGM.getPGOStats().addMismatched(IsInMainFile);
816  return;
817  }
818  ProfRecord =
819  llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
820  RegionCounts = ProfRecord->Counts;
821 }
822 
823 /// \brief Calculate what to divide by to scale weights.
824 ///
825 /// Given the maximum weight, calculate a divisor that will scale all the
826 /// weights to strictly less than UINT32_MAX.
827 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
828  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
829 }
830 
831 /// \brief Scale an individual branch weight (and add 1).
832 ///
833 /// Scale a 64-bit weight down to 32-bits using \c Scale.
834 ///
835 /// According to Laplace's Rule of Succession, it is better to compute the
836 /// weight based on the count plus 1, so universally add 1 to the value.
837 ///
838 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
839 /// greater than \c Weight.
840 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
841  assert(Scale && "scale by 0?");
842  uint64_t Scaled = Weight / Scale + 1;
843  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
844  return Scaled;
845 }
846 
847 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
848  uint64_t FalseCount) {
849  // Check for empty weights.
850  if (!TrueCount && !FalseCount)
851  return nullptr;
852 
853  // Calculate how to scale down to 32-bits.
854  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
855 
856  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
857  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
858  scaleBranchWeight(FalseCount, Scale));
859 }
860 
861 llvm::MDNode *
862 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
863  // We need at least two elements to create meaningful weights.
864  if (Weights.size() < 2)
865  return nullptr;
866 
867  // Check for empty weights.
868  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
869  if (MaxWeight == 0)
870  return nullptr;
871 
872  // Calculate how to scale down to 32-bits.
873  uint64_t Scale = calculateWeightScale(MaxWeight);
874 
875  SmallVector<uint32_t, 16> ScaledWeights;
876  ScaledWeights.reserve(Weights.size());
877  for (uint64_t W : Weights)
878  ScaledWeights.push_back(scaleBranchWeight(W, Scale));
879 
880  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
881  return MDHelper.createBranchWeights(ScaledWeights);
882 }
883 
884 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
885  uint64_t LoopCount) {
886  if (!PGO.haveRegionCounts())
887  return nullptr;
888  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
889  assert(CondCount.hasValue() && "missing expected loop condition count");
890  if (*CondCount == 0)
891  return nullptr;
892  return createProfileWeights(LoopCount,
893  std::max(*CondCount, LoopCount) - LoopCount);
894 }
Expr * getInc()
Definition: Stmt.h:1187
Optional< uint64_t > getStmtCount(const Stmt *S)
Check if an execution count is known for a given statement.
Definition: CodeGenPGO.h:69
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1561
CXXCtorType getCtorType() const
Definition: GlobalDecl.h:64
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:187
Expr * getCond()
Definition: Stmt.h:1075
llvm::Module & getModule() const
static uint64_t calculateWeightScale(uint64_t MaxWeight)
Calculate what to divide by to scale weights.
Definition: CodeGenPGO.cpp:827
llvm::LLVMContext & getLLVMContext()
CXXCatchStmt * getHandler(unsigned i)
Definition: StmtCXX.h:104
IfStmt - This represents an if/then/else.
Definition: Stmt.h:881
void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S)
Definition: CodeGenPGO.cpp:736
Organizes the per-function state that is used while generating code coverage mapping data...
The base class of the type hierarchy.
Definition: Type.h:1281
static llvm::cl::opt< bool > EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore, llvm::cl::desc("Enable value profiling"), llvm::cl::init(false))
const Stmt * getElse() const
Definition: Stmt.h:921
ObjCMethodDecl - Represents an instance or class method declaration.
Definition: DeclObjC.h:113
llvm::ImmutableMap< CountKey, unsigned > CountMap
A C++ throw-expression (C++ [except.throw]).
Definition: ExprCXX.h:913
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have...
Definition: Linkage.h:25
LabelStmt - Represents a label, which has a substatement.
Definition: Stmt.h:789
Stmt * getBody()
Definition: Stmt.h:1123
class LLVM_ALIGNAS(8) DependentTemplateSpecializationType const IdentifierInfo * Name
Represents a template specialization type whose template cannot be resolved, e.g. ...
Definition: Type.h:4549
void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName, StringRef FunctionNameValue, uint64_t FunctionHash, const std::string &CoverageMapping, bool IsUsed=true)
Add a function's coverage mapping record to the collection of the function mapping records...
Stmt * getBody() const override
Definition: Decl.h:3536
const Decl * getDecl() const
Definition: GlobalDecl.h:62
Expr * getLHS() const
Definition: Expr.h:2943
Stmt * getBody() const override
Definition: Decl.cpp:4074
ForStmt - This represents a 'for (init;cond;inc)' stmt.
Definition: Stmt.h:1153
Base object ctor.
Definition: ABI.h:27
Expr * getTrueExpr() const
Definition: Expr.h:3326
Stmt * getHandlerBlock() const
Definition: StmtCXX.h:52
Stmt * getBody()
Definition: Stmt.h:1188
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:2897
Stmt * getInit()
Definition: Stmt.h:1167
CXXForRangeStmt - This represents C++0x [stmt.ranged]'s ranged for statement, represented as 'for (ra...
Definition: StmtCXX.h:128
void addMissing(bool MainFile)
Record that a function we've visited has no profile data.
Expr * getCond()
Definition: Stmt.h:1186
void assignRegionCounters(GlobalDecl GD, llvm::Function *Fn)
Assign counters to regions and configure them for PGO of a given function.
Definition: CodeGenPGO.cpp:614
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
Definition: ExprCXX.h:1503
A class that does preordor or postorder depth-first traversal on the entire Clang AST and visits each...
This represents the body of a CapturedStmt, and serves as its DeclContext.
Definition: Decl.h:3625
detail::InMemoryDirectory::const_iterator I
InstrProfStats & getPGOStats()
ConditionalOperator - The ?: ternary operator.
Definition: Expr.h:3170
const TargetInfo & getTarget() const
bool hasConstructorVariants() const
Does this ABI have different entrypoints for complete-object and base-subobject constructors?
Definition: TargetCXXABI.h:223
void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, llvm::Instruction *ValueSite, llvm::Value *ValuePtr)
Definition: CodeGenPGO.cpp:753
BlockDecl - This represents a block literal declaration, which is like an unnamed FunctionDecl...
Definition: Decl.h:3456
CXXDtorType getDtorType() const
Definition: GlobalDecl.h:69
DeclStmt * getEndStmt()
Definition: StmtCXX.h:158
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
Definition: Expr.h:4567
bool haveRegionCounts() const
Whether or not we have PGO region data for the current function.
Definition: CodeGenPGO.h:57
Stmt * getBody()
Definition: Stmt.h:1078
ASTContext & getContext() const
CXXTryStmt - A C++ try block, including all handlers.
Definition: StmtCXX.h:65
bool isInSystemHeader(SourceLocation Loc) const
Returns if a SourceLocation is in a system header.
Base object dtor.
Definition: ABI.h:37
void addMismatched(bool MainFile)
Record that a function we've visited has mismatched profile data.
ReturnStmt - This represents a return, optionally of an expression: return; return 4;...
Definition: Stmt.h:1366
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
The result type of a method or function.
const SourceManager & SM
Definition: Format.cpp:1184
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:29
DoStmt - This represents a 'do/while' stmt.
Definition: Stmt.h:1102
void addVisited(bool MainFile)
Record that we've visited a function and whether or not that function was in the main source file...
Stmt * getBody(const FunctionDecl *&Definition) const
getBody - Retrieve the body (definition) of the function.
Definition: Decl.cpp:2491
bool isInMainFile(SourceLocation Loc) const
Returns whether the PresumedLoc for a given SourceLocation is in the main file.
#define false
Definition: stdbool.h:33
This captures a statement into a function.
Definition: Stmt.h:2006
const Expr * getCond() const
Definition: Stmt.h:994
Stmt * getBody() const override
Retrieve the body of this method, if it has one.
Definition: DeclObjC.cpp:784
Per-function PGO state.
Definition: CodeGenPGO.h:31
const CodeGenOptions & getCodeGenOpts() const
const LangOptions & getLangOpts() const
const Expr * getSubExpr() const
Definition: ExprCXX.h:933
const Stmt * getBody() const
Definition: Stmt.h:995
unsigned getNumHandlers() const
Definition: StmtCXX.h:103
detail::InMemoryDirectory::const_iterator E
const Expr * getRetValue() const
Definition: Stmt.cpp:899
const Stmt * getThen() const
Definition: Stmt.h:919
void emitEmptyCounterMapping(const Decl *D, StringRef FuncName, llvm::GlobalValue::LinkageTypes Linkage)
Emit a coverage mapping range with a counter zero for an unused declaration.
Definition: CodeGenPGO.cpp:692
SwitchStmt - This represents a 'switch' stmt.
Definition: Stmt.h:957
Expr * getFalseExpr() const
Definition: Expr.h:3332
Represents Objective-C's collection statement.
Definition: StmtObjC.h:24
AbstractConditionalOperator - An abstract base class for ConditionalOperator and BinaryConditionalOpe...
Definition: Expr.h:3128
bool hasProfileClangInstr() const
Check if Clang profile instrumenation is on.
Stmt * getSubStmt()
Definition: Stmt.cpp:856
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
SourceManager & getSourceManager()
Definition: ASTContext.h:561
DeclStmt * getRangeStmt()
Definition: StmtCXX.h:154
GotoStmt - This represents a direct goto.
Definition: Stmt.h:1224
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale)
Scale an individual branch weight (and add 1).
Definition: CodeGenPGO.cpp:840
Expr * getCond()
Definition: Stmt.h:1120
void emitCounterMapping(const Decl *D, llvm::raw_ostream &OS)
Emit the coverage mapping data which maps the regions of code to counters that will be used to find t...
BoundNodesTreeBuilder *const Builder
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
ContinueStmt - This represents a continue.
Definition: Stmt.h:1302
std::string MainFileName
The user provided name for the "main file", if non-empty.
Opcode getOpcode() const
Definition: Expr.h:2940
BinaryConditionalOperator - The GNU extension to the conditional operator which allows the middle ope...
Definition: Expr.h:3240
CXXCatchStmt - This represents a C++ catch block.
Definition: StmtCXX.h:29
WhileStmt - This represents a 'while' stmt.
Definition: Stmt.h:1047
const Expr * getCond() const
Definition: Stmt.h:917
void ClearUnusedCoverageMapping(const Decl *D)
Remove the deferred empty coverage mapping as this declaration is actually instrumented.
CompoundStmt * getTryBlock()
Definition: StmtCXX.h:96
Expr * getRHS() const
Definition: Expr.h:2945
uint64_t getRegionCount(const Stmt *S)
Return the region count for the counter at the given index.
Definition: CodeGenPGO.h:113
BreakStmt - This represents a break.
Definition: Stmt.h:1328
CoverageMappingModuleGen * getCoverageMapping() const
Stmt * getSubStmt()
Definition: Stmt.h:809
DeclStmt * getLoopVarStmt()
Definition: StmtCXX.h:161
llvm::IndexedInstrProfReader * getPGOReader() const
DeclStmt * getBeginStmt()
Definition: StmtCXX.h:155
void emitEmptyMapping(const Decl *D, llvm::raw_ostream &OS)
Emit the coverage mapping data for an unused function.
This class handles loading and caching of source files into memory.