LLVM  15.0.0git
SampleProfileLoaderBaseImpl.h
Go to the documentation of this file.
1 ////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides the interface for the sampled PGO profile loader base
11 /// implementation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16 #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Module.h"
43 
44 namespace llvm {
45 using namespace sampleprof;
46 using namespace sampleprofutil;
48 
49 #define DEBUG_TYPE "sample-profile-impl"
50 
51 namespace afdo_detail {
52 
53 template <typename BlockT> struct IRTraits;
54 template <> struct IRTraits<BasicBlock> {
59  using LoopT = Loop;
60  using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
61  using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
63  using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
68  static Function &getFunction(Function &F) { return F; }
69  static const BasicBlock *getEntryBB(const Function *F) {
70  return &F->getEntryBlock();
71  }
74 };
75 
76 } // end namespace afdo_detail
77 
78 extern cl::opt<bool> SampleProfileUseProfi;
79 extern cl::opt<bool> SampleProfileInferEntryCount;
80 
81 template <typename BT> class SampleProfileLoaderBaseImpl {
82 public:
83  SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
84  : Filename(Name), RemappingFilename(RemapName) {}
85  void dump() { Reader->dump(); }
86 
89  using BlockFrequencyInfoT =
94  using DominatorTreePtrT =
96  using PostDominatorTreePtrT =
98  using PostDominatorTreeT =
100  using OptRemarkEmitterT =
102  using OptRemarkAnalysisT =
106 
108  using EquivalenceClassMap =
110  using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
112  using BlockEdgeMap =
114 
115 protected:
116  ~SampleProfileLoaderBaseImpl() = default;
117  friend class SampleCoverageTracker;
118 
121  }
124  }
127  }
130  }
131 
132  unsigned getFunctionLoc(FunctionT &Func);
133  virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
134  ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
135  ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
138  virtual const FunctionSamples *
139  findFunctionSamples(const InstructionT &I) const;
140  void printEdgeWeight(raw_ostream &OS, Edge E);
141  void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const;
142  void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB);
143  bool computeBlockWeights(FunctionT &F);
144  void findEquivalenceClasses(FunctionT &F);
145  void findEquivalencesFor(BasicBlockT *BB1,
146  ArrayRef<BasicBlockT *> Descendants,
147  PostDominatorTreeT *DomTree);
148  void propagateWeights(FunctionT &F);
149  void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
150  BlockWeightMap &SampleBlockWeights,
151  BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
152  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
153  void buildEdges(FunctionT &F);
154  bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
155  void clearFunctionData(bool ResetDT = true);
156  void computeDominanceAndLoopInfo(FunctionT &F);
157  bool
158  computeAndPropagateWeights(FunctionT &F,
159  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
160  void initWeightPropagation(FunctionT &F,
161  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
162  void
163  finalizeWeightPropagation(FunctionT &F,
164  const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
165  void emitCoverageRemarks(FunctionT &F);
166 
167  /// Map basic blocks to their computed weights.
168  ///
169  /// The weight of a basic block is defined to be the maximum
170  /// of all the instruction weights in that block.
172 
173  /// Map edges to their computed weights.
174  ///
175  /// Edge weights are computed by propagating basic block weights in
176  /// SampleProfile::propagateWeights.
178 
179  /// Set of visited blocks during propagation.
181 
182  /// Set of visited edges during propagation.
184 
185  /// Equivalence classes for block weights.
186  ///
187  /// Two blocks BB1 and BB2 are in the same equivalence class if they
188  /// dominate and post-dominate each other, and they are in the same loop
189  /// nest. When this happens, the two blocks are guaranteed to execute
190  /// the same number of times.
192 
193  /// Dominance, post-dominance and loop information.
197 
198  /// Predecessors for each basic block in the CFG.
200 
201  /// Successors for each basic block in the CFG.
203 
204  /// Profile coverage tracker.
206 
207  /// Profile reader object.
208  std::unique_ptr<SampleProfileReader> Reader;
209 
210  /// Samples collected for the body of this function.
211  FunctionSamples *Samples = nullptr;
212 
213  /// Name of the profile file to load.
214  std::string Filename;
215 
216  /// Name of the profile remapping file to load.
217  std::string RemappingFilename;
218 
219  /// Profile Summary Info computed from sample profile.
220  ProfileSummaryInfo *PSI = nullptr;
221 
222  /// Optimization Remark Emitter used to emit diagnostic remarks.
223  OptRemarkEmitterT *ORE = nullptr;
224 };
225 
226 /// Clear all the per-function data used to load samples and propagate weights.
227 template <typename BT>
229  BlockWeights.clear();
230  EdgeWeights.clear();
231  VisitedBlocks.clear();
232  VisitedEdges.clear();
233  EquivalenceClass.clear();
234  if (ResetDT) {
235  DT = nullptr;
236  PDT = nullptr;
237  LI = nullptr;
238  }
239  Predecessors.clear();
240  Successors.clear();
241  CoverageTracker.clear();
242 }
243 
244 #ifndef NDEBUG
245 /// Print the weight of edge \p E on stream \p OS.
246 ///
247 /// \param OS Stream to emit the output to.
248 /// \param E Edge to print.
249 template <typename BT>
251  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
252  << "]: " << EdgeWeights[E] << "\n";
253 }
254 
255 /// Print the equivalence class of block \p BB on stream \p OS.
256 ///
257 /// \param OS Stream to emit the output to.
258 /// \param BB Block to print.
259 template <typename BT>
261  raw_ostream &OS, const BasicBlockT *BB) {
262  const BasicBlockT *Equiv = EquivalenceClass[BB];
263  OS << "equivalence[" << BB->getName()
264  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
265 }
266 
267 /// Print the weight of block \p BB on stream \p OS.
268 ///
269 /// \param OS Stream to emit the output to.
270 /// \param BB Block to print.
271 template <typename BT>
273  raw_ostream &OS, const BasicBlockT *BB) const {
274  const auto &I = BlockWeights.find(BB);
275  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
276  OS << "weight[" << BB->getName() << "]: " << W << "\n";
277 }
278 #endif
279 
280 /// Get the weight for an instruction.
281 ///
282 /// The "weight" of an instruction \p Inst is the number of samples
283 /// collected on that instruction at runtime. To retrieve it, we
284 /// need to compute the line number of \p Inst relative to the start of its
285 /// function. We use HeaderLineno to compute the offset. We then
286 /// look up the samples collected for \p Inst using BodySamples.
287 ///
288 /// \param Inst Instruction to query.
289 ///
290 /// \returns the weight of \p Inst.
291 template <typename BT>
294  return getInstWeightImpl(Inst);
295 }
296 
297 template <typename BT>
300  const FunctionSamples *FS = findFunctionSamples(Inst);
301  if (!FS)
302  return std::error_code();
303 
304  const DebugLoc &DLoc = Inst.getDebugLoc();
305  if (!DLoc)
306  return std::error_code();
307 
308  const DILocation *DIL = DLoc;
309  uint32_t LineOffset = FunctionSamples::getOffset(DIL);
310  uint32_t Discriminator;
312  Discriminator = DIL->getDiscriminator();
313  else
314  Discriminator = DIL->getBaseDiscriminator();
315 
316  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
317  if (R) {
318  bool FirstMark =
319  CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
320  if (FirstMark) {
321  ORE->emit([&]() {
322  OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
323  Remark << "Applied " << ore::NV("NumSamples", *R);
324  Remark << " samples from profile (offset: ";
325  Remark << ore::NV("LineOffset", LineOffset);
326  if (Discriminator) {
327  Remark << ".";
328  Remark << ore::NV("Discriminator", Discriminator);
329  }
330  Remark << ")";
331  return Remark;
332  });
333  }
334  LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":"
335  << Inst << " (line offset: " << LineOffset << "."
336  << Discriminator << " - weight: " << R.get() << ")\n");
337  }
338  return R;
339 }
340 
341 /// Compute the weight of a basic block.
342 ///
343 /// The weight of basic block \p BB is the maximum weight of all the
344 /// instructions in BB.
345 ///
346 /// \param BB The basic block to query.
347 ///
348 /// \returns the weight for \p BB.
349 template <typename BT>
352  uint64_t Max = 0;
353  bool HasWeight = false;
354  for (auto &I : *BB) {
355  const ErrorOr<uint64_t> &R = getInstWeight(I);
356  if (R) {
357  Max = std::max(Max, R.get());
358  HasWeight = true;
359  }
360  }
361  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
362 }
363 
364 /// Compute and store the weights of every basic block.
365 ///
366 /// This populates the BlockWeights map by computing
367 /// the weights of every basic block in the CFG.
368 ///
369 /// \param F The function to query.
370 template <typename BT>
372  bool Changed = false;
373  LLVM_DEBUG(dbgs() << "Block weights\n");
374  for (const auto &BB : F) {
375  ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
376  if (Weight) {
377  BlockWeights[&BB] = Weight.get();
378  VisitedBlocks.insert(&BB);
379  Changed = true;
380  }
381  LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
382  }
383 
384  return Changed;
385 }
386 
387 /// Get the FunctionSamples for an instruction.
388 ///
389 /// The FunctionSamples of an instruction \p Inst is the inlined instance
390 /// in which that instruction is coming from. We traverse the inline stack
391 /// of that instruction, and match it with the tree nodes in the profile.
392 ///
393 /// \param Inst Instruction to query.
394 ///
395 /// \returns the FunctionSamples pointer to the inlined instance.
396 template <typename BT>
398  const InstructionT &Inst) const {
399  const DILocation *DIL = Inst.getDebugLoc();
400  if (!DIL)
401  return Samples;
402 
403  auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
404  if (it.second) {
405  it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
406  }
407  return it.first->second;
408 }
409 
410 /// Find equivalence classes for the given block.
411 ///
412 /// This finds all the blocks that are guaranteed to execute the same
413 /// number of times as \p BB1. To do this, it traverses all the
414 /// descendants of \p BB1 in the dominator or post-dominator tree.
415 ///
416 /// A block BB2 will be in the same equivalence class as \p BB1 if
417 /// the following holds:
418 ///
419 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
420 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
421 /// dominate BB1 in the post-dominator tree.
422 ///
423 /// 2- Both BB2 and \p BB1 must be in the same loop.
424 ///
425 /// For every block BB2 that meets those two requirements, we set BB2's
426 /// equivalence class to \p BB1.
427 ///
428 /// \param BB1 Block to check.
429 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
430 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
431 /// with blocks from \p BB1's dominator tree, then
432 /// this is the post-dominator tree, and vice versa.
433 template <typename BT>
435  BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
436  PostDominatorTreeT *DomTree) {
437  const BasicBlockT *EC = EquivalenceClass[BB1];
438  uint64_t Weight = BlockWeights[EC];
439  for (const auto *BB2 : Descendants) {
440  bool IsDomParent = DomTree->dominates(BB2, BB1);
441  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
442  if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
443  EquivalenceClass[BB2] = EC;
444  // If BB2 is visited, then the entire EC should be marked as visited.
445  if (VisitedBlocks.count(BB2)) {
446  VisitedBlocks.insert(EC);
447  }
448 
449  // If BB2 is heavier than BB1, make BB2 have the same weight
450  // as BB1.
451  //
452  // Note that we don't worry about the opposite situation here
453  // (when BB2 is lighter than BB1). We will deal with this
454  // during the propagation phase. Right now, we just want to
455  // make sure that BB1 has the largest weight of all the
456  // members of its equivalence set.
457  Weight = std::max(Weight, BlockWeights[BB2]);
458  }
459  }
460  const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
461  if (EC == EntryBB) {
462  BlockWeights[EC] = Samples->getHeadSamples() + 1;
463  } else {
464  BlockWeights[EC] = Weight;
465  }
466 }
467 
468 /// Find equivalence classes.
469 ///
470 /// Since samples may be missing from blocks, we can fill in the gaps by setting
471 /// the weights of all the blocks in the same equivalence class to the same
472 /// weight. To compute the concept of equivalence, we use dominance and loop
473 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
474 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
475 ///
476 /// \param F The function to query.
477 template <typename BT>
479  SmallVector<BasicBlockT *, 8> DominatedBBs;
480  LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
481  // Find equivalence sets based on dominance and post-dominance information.
482  for (auto &BB : F) {
483  BasicBlockT *BB1 = &BB;
484 
485  // Compute BB1's equivalence class once.
486  if (EquivalenceClass.count(BB1)) {
487  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
488  continue;
489  }
490 
491  // By default, blocks are in their own equivalence class.
492  EquivalenceClass[BB1] = BB1;
493 
494  // Traverse all the blocks dominated by BB1. We are looking for
495  // every basic block BB2 such that:
496  //
497  // 1- BB1 dominates BB2.
498  // 2- BB2 post-dominates BB1.
499  // 3- BB1 and BB2 are in the same loop nest.
500  //
501  // If all those conditions hold, it means that BB2 is executed
502  // as many times as BB1, so they are placed in the same equivalence
503  // class by making BB2's equivalence class be BB1.
504  DominatedBBs.clear();
505  DT->getDescendants(BB1, DominatedBBs);
506  findEquivalencesFor(BB1, DominatedBBs, &*PDT);
507 
508  LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
509  }
510 
511  // Assign weights to equivalence classes.
512  //
513  // All the basic blocks in the same equivalence class will execute
514  // the same number of times. Since we know that the head block in
515  // each equivalence class has the largest weight, assign that weight
516  // to all the blocks in that equivalence class.
517  LLVM_DEBUG(
518  dbgs() << "\nAssign the same weight to all blocks in the same class\n");
519  for (auto &BI : F) {
520  const BasicBlockT *BB = &BI;
521  const BasicBlockT *EquivBB = EquivalenceClass[BB];
522  if (BB != EquivBB)
523  BlockWeights[BB] = BlockWeights[EquivBB];
524  LLVM_DEBUG(printBlockWeight(dbgs(), BB));
525  }
526 }
527 
528 /// Visit the given edge to decide if it has a valid weight.
529 ///
530 /// If \p E has not been visited before, we copy to \p UnknownEdge
531 /// and increment the count of unknown edges.
532 ///
533 /// \param E Edge to visit.
534 /// \param NumUnknownEdges Current number of unknown edges.
535 /// \param UnknownEdge Set if E has not been visited before.
536 ///
537 /// \returns E's weight, if known. Otherwise, return 0.
538 template <typename BT>
540  unsigned *NumUnknownEdges,
541  Edge *UnknownEdge) {
542  if (!VisitedEdges.count(E)) {
543  (*NumUnknownEdges)++;
544  *UnknownEdge = E;
545  return 0;
546  }
547 
548  return EdgeWeights[E];
549 }
550 
551 /// Propagate weights through incoming/outgoing edges.
552 ///
553 /// If the weight of a basic block is known, and there is only one edge
554 /// with an unknown weight, we can calculate the weight of that edge.
555 ///
556 /// Similarly, if all the edges have a known count, we can calculate the
557 /// count of the basic block, if needed.
558 ///
559 /// \param F Function to process.
560 /// \param UpdateBlockCount Whether we should update basic block counts that
561 /// has already been annotated.
562 ///
563 /// \returns True if new weights were assigned to edges or blocks.
564 template <typename BT>
566  FunctionT &F, bool UpdateBlockCount) {
567  bool Changed = false;
568  LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
569  for (const auto &BI : F) {
570  const BasicBlockT *BB = &BI;
571  const BasicBlockT *EC = EquivalenceClass[BB];
572 
573  // Visit all the predecessor and successor edges to determine
574  // which ones have a weight assigned already. Note that it doesn't
575  // matter that we only keep track of a single unknown edge. The
576  // only case we are interested in handling is when only a single
577  // edge is unknown (see setEdgeOrBlockWeight).
578  for (unsigned i = 0; i < 2; i++) {
579  uint64_t TotalWeight = 0;
580  unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
581  Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
582 
583  if (i == 0) {
584  // First, visit all predecessor edges.
585  NumTotalEdges = Predecessors[BB].size();
586  for (auto *Pred : Predecessors[BB]) {
587  Edge E = std::make_pair(Pred, BB);
588  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
589  if (E.first == E.second)
590  SelfReferentialEdge = E;
591  }
592  if (NumTotalEdges == 1) {
593  SingleEdge = std::make_pair(Predecessors[BB][0], BB);
594  }
595  } else {
596  // On the second round, visit all successor edges.
597  NumTotalEdges = Successors[BB].size();
598  for (auto *Succ : Successors[BB]) {
599  Edge E = std::make_pair(BB, Succ);
600  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
601  }
602  if (NumTotalEdges == 1) {
603  SingleEdge = std::make_pair(BB, Successors[BB][0]);
604  }
605  }
606 
607  // After visiting all the edges, there are three cases that we
608  // can handle immediately:
609  //
610  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
611  // In this case, we simply check that the sum of all the edges
612  // is the same as BB's weight. If not, we change BB's weight
613  // to match. Additionally, if BB had not been visited before,
614  // we mark it visited.
615  //
616  // - Only one edge is unknown and BB has already been visited.
617  // In this case, we can compute the weight of the edge by
618  // subtracting the total block weight from all the known
619  // edge weights. If the edges weight more than BB, then the
620  // edge of the last remaining edge is set to zero.
621  //
622  // - There exists a self-referential edge and the weight of BB is
623  // known. In this case, this edge can be based on BB's weight.
624  // We add up all the other known edges and set the weight on
625  // the self-referential edge as we did in the previous case.
626  //
627  // In any other case, we must continue iterating. Eventually,
628  // all edges will get a weight, or iteration will stop when
629  // it reaches SampleProfileMaxPropagateIterations.
630  if (NumUnknownEdges <= 1) {
631  uint64_t &BBWeight = BlockWeights[EC];
632  if (NumUnknownEdges == 0) {
633  if (!VisitedBlocks.count(EC)) {
634  // If we already know the weight of all edges, the weight of the
635  // basic block can be computed. It should be no larger than the sum
636  // of all edge weights.
637  if (TotalWeight > BBWeight) {
638  BBWeight = TotalWeight;
639  Changed = true;
640  LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
641  << " known. Set weight for block: ";
642  printBlockWeight(dbgs(), BB););
643  }
644  } else if (NumTotalEdges == 1 &&
645  EdgeWeights[SingleEdge] < BlockWeights[EC]) {
646  // If there is only one edge for the visited basic block, use the
647  // block weight to adjust edge weight if edge weight is smaller.
648  EdgeWeights[SingleEdge] = BlockWeights[EC];
649  Changed = true;
650  }
651  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
652  // If there is a single unknown edge and the block has been
653  // visited, then we can compute E's weight.
654  if (BBWeight >= TotalWeight)
655  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
656  else
657  EdgeWeights[UnknownEdge] = 0;
658  const BasicBlockT *OtherEC;
659  if (i == 0)
660  OtherEC = EquivalenceClass[UnknownEdge.first];
661  else
662  OtherEC = EquivalenceClass[UnknownEdge.second];
663  // Edge weights should never exceed the BB weights it connects.
664  if (VisitedBlocks.count(OtherEC) &&
665  EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
666  EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
667  VisitedEdges.insert(UnknownEdge);
668  Changed = true;
669  LLVM_DEBUG(dbgs() << "Set weight for edge: ";
670  printEdgeWeight(dbgs(), UnknownEdge));
671  }
672  } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
673  // If a block Weights 0, all its in/out edges should weight 0.
674  if (i == 0) {
675  for (auto *Pred : Predecessors[BB]) {
676  Edge E = std::make_pair(Pred, BB);
677  EdgeWeights[E] = 0;
678  VisitedEdges.insert(E);
679  }
680  } else {
681  for (auto *Succ : Successors[BB]) {
682  Edge E = std::make_pair(BB, Succ);
683  EdgeWeights[E] = 0;
684  VisitedEdges.insert(E);
685  }
686  }
687  } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
688  uint64_t &BBWeight = BlockWeights[BB];
689  // We have a self-referential edge and the weight of BB is known.
690  if (BBWeight >= TotalWeight)
691  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
692  else
693  EdgeWeights[SelfReferentialEdge] = 0;
694  VisitedEdges.insert(SelfReferentialEdge);
695  Changed = true;
696  LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
697  printEdgeWeight(dbgs(), SelfReferentialEdge));
698  }
699  if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
700  BlockWeights[EC] = TotalWeight;
701  VisitedBlocks.insert(EC);
702  Changed = true;
703  }
704  }
705  }
706 
707  return Changed;
708 }
709 
710 /// Build in/out edge lists for each basic block in the CFG.
711 ///
712 /// We are interested in unique edges. If a block B1 has multiple
713 /// edges to another block B2, we only add a single B1->B2 edge.
714 template <typename BT>
716  for (auto &BI : F) {
717  BasicBlockT *B1 = &BI;
718 
719  // Add predecessors for B1.
721  if (!Predecessors[B1].empty())
722  llvm_unreachable("Found a stale predecessors list in a basic block.");
723  for (auto *B2 : getPredecessors(B1))
724  if (Visited.insert(B2).second)
725  Predecessors[B1].push_back(B2);
726 
727  // Add successors for B1.
728  Visited.clear();
729  if (!Successors[B1].empty())
730  llvm_unreachable("Found a stale successors list in a basic block.");
731  for (auto *B2 : getSuccessors(B1))
732  if (Visited.insert(B2).second)
733  Successors[B1].push_back(B2);
734  }
735 }
736 
737 /// Propagate weights into edges
738 ///
739 /// The following rules are applied to every block BB in the CFG:
740 ///
741 /// - If BB has a single predecessor/successor, then the weight
742 /// of that edge is the weight of the block.
743 ///
744 /// - If all incoming or outgoing edges are known except one, and the
745 /// weight of the block is already known, the weight of the unknown
746 /// edge will be the weight of the block minus the sum of all the known
747 /// edges. If the sum of all the known edges is larger than BB's weight,
748 /// we set the unknown edge weight to zero.
749 ///
750 /// - If there is a self-referential edge, and the weight of the block is
751 /// known, the weight for that edge is set to the weight of the block
752 /// minus the weight of the other incoming edges to that block (if
753 /// known).
754 template <typename BT>
756  // Flow-based profile inference is only usable with BasicBlock instantiation
757  // of SampleProfileLoaderBaseImpl.
758  if (SampleProfileUseProfi) {
759  // Prepare block sample counts for inference.
760  BlockWeightMap SampleBlockWeights;
761  for (const auto &BI : F) {
762  ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
763  if (Weight)
764  SampleBlockWeights[&BI] = Weight.get();
765  }
766  // Fill in BlockWeights and EdgeWeights using an inference algorithm.
767  applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
768  } else {
769  bool Changed = true;
770  unsigned I = 0;
771 
772  // If BB weight is larger than its corresponding loop's header BB weight,
773  // use the BB weight to replace the loop header BB weight.
774  for (auto &BI : F) {
775  BasicBlockT *BB = &BI;
776  LoopT *L = LI->getLoopFor(BB);
777  if (!L) {
778  continue;
779  }
780  BasicBlockT *Header = L->getHeader();
781  if (Header && BlockWeights[BB] > BlockWeights[Header]) {
782  BlockWeights[Header] = BlockWeights[BB];
783  }
784  }
785 
786  // Propagate until we converge or we go past the iteration limit.
787  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
788  Changed = propagateThroughEdges(F, false);
789  }
790 
791  // The first propagation propagates BB counts from annotated BBs to unknown
792  // BBs. The 2nd propagation pass resets edges weights, and use all BB
793  // weights to propagate edge weights.
794  VisitedEdges.clear();
795  Changed = true;
796  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
797  Changed = propagateThroughEdges(F, false);
798  }
799 
800  // The 3rd propagation pass allows adjust annotated BB weights that are
801  // obviously wrong.
802  Changed = true;
803  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
804  Changed = propagateThroughEdges(F, true);
805  }
806  }
807 }
808 
809 template <typename BT>
811  FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
812  BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
813  auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
814  Infer.apply(BlockWeights, EdgeWeights);
815 }
816 
817 /// Generate branch weight metadata for all branches in \p F.
818 ///
819 /// Branch weights are computed out of instruction samples using a
820 /// propagation heuristic. Propagation proceeds in 3 phases:
821 ///
822 /// 1- Assignment of block weights. All the basic blocks in the function
823 /// are initial assigned the same weight as their most frequently
824 /// executed instruction.
825 ///
826 /// 2- Creation of equivalence classes. Since samples may be missing from
827 /// blocks, we can fill in the gaps by setting the weights of all the
828 /// blocks in the same equivalence class to the same weight. To compute
829 /// the concept of equivalence, we use dominance and loop information.
830 /// Two blocks B1 and B2 are in the same equivalence class if B1
831 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
832 ///
833 /// 3- Propagation of block weights into edges. This uses a simple
834 /// propagation heuristic. The following rules are applied to every
835 /// block BB in the CFG:
836 ///
837 /// - If BB has a single predecessor/successor, then the weight
838 /// of that edge is the weight of the block.
839 ///
840 /// - If all the edges are known except one, and the weight of the
841 /// block is already known, the weight of the unknown edge will
842 /// be the weight of the block minus the sum of all the known
843 /// edges. If the sum of all the known edges is larger than BB's weight,
844 /// we set the unknown edge weight to zero.
845 ///
846 /// - If there is a self-referential edge, and the weight of the block is
847 /// known, the weight for that edge is set to the weight of the block
848 /// minus the weight of the other incoming edges to that block (if
849 /// known).
850 ///
851 /// Since this propagation is not guaranteed to finalize for every CFG, we
852 /// only allow it to proceed for a limited number of iterations (controlled
853 /// by -sample-profile-max-propagate-iterations).
854 ///
855 /// FIXME: Try to replace this propagation heuristic with a scheme
856 /// that is guaranteed to finalize. A work-list approach similar to
857 /// the standard value propagation algorithm used by SSA-CCP might
858 /// work here.
859 ///
860 /// \param F The function to query.
861 ///
862 /// \returns true if \p F was modified. Returns false, otherwise.
863 template <typename BT>
865  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
866  bool Changed = (InlinedGUIDs.size() != 0);
867 
868  // Compute basic block weights.
869  Changed |= computeBlockWeights(F);
870 
871  if (Changed) {
872  // Initialize propagation.
873  initWeightPropagation(F, InlinedGUIDs);
874 
875  // Propagate weights to all edges.
876  propagateWeights(F);
877 
878  // Post-process propagated weights.
879  finalizeWeightPropagation(F, InlinedGUIDs);
880  }
881 
882  return Changed;
883 }
884 
885 template <typename BT>
887  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
888  // Add an entry count to the function using the samples gathered at the
889  // function entry.
890  // Sets the GUIDs that are inlined in the profiled binary. This is used
891  // for ThinLink to make correct liveness analysis, and also make the IR
892  // match the profiled binary before annotation.
893  getFunction(F).setEntryCount(
894  ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
895  &InlinedGUIDs);
896 
897  if (!SampleProfileUseProfi) {
898  // Compute dominance and loop info needed for propagation.
899  computeDominanceAndLoopInfo(F);
900 
901  // Find equivalence classes.
902  findEquivalenceClasses(F);
903  }
904 
905  // Before propagation starts, build, for each block, a list of
906  // unique predecessors and successors. This is necessary to handle
907  // identical edges in multiway branches. Since we visit all blocks and all
908  // edges of the CFG, it is cleaner to build these lists once at the start
909  // of the pass.
910  buildEdges(F);
911 }
912 
913 template <typename BT>
915  FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
916  // If we utilize a flow-based count inference, then we trust the computed
917  // counts and set the entry count as computed by the algorithm. This is
918  // primarily done to sync the counts produced by profi and BFI inference,
919  // which uses the entry count for mass propagation.
920  // If profi produces a zero-value for the entry count, we fallback to
921  // Samples->getHeadSamples() + 1 to avoid functions with zero count.
922  if (SampleProfileUseProfi) {
923  const BasicBlockT *EntryBB = getEntryBB(&F);
924  ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
925  if (BlockWeights[EntryBB] > 0 &&
926  (SampleProfileInferEntryCount || !EntryWeight)) {
927  getFunction(F).setEntryCount(
928  ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
929  &InlinedGUIDs);
930  }
931  }
932 }
933 
934 template <typename BT>
936  // If coverage checking was requested, compute it now.
937  const Function &Func = getFunction(F);
939  unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
940  unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
941  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
942  if (Coverage < SampleProfileRecordCoverage) {
943  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
944  Func.getSubprogram()->getFilename(), getFunctionLoc(F),
945  Twine(Used) + " of " + Twine(Total) + " available profile records (" +
946  Twine(Coverage) + "%) were applied",
947  DS_Warning));
948  }
949  }
950 
952  uint64_t Used = CoverageTracker.getTotalUsedSamples();
953  uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
954  unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
955  if (Coverage < SampleProfileSampleCoverage) {
956  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
957  Func.getSubprogram()->getFilename(), getFunctionLoc(F),
958  Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
959  Twine(Coverage) + "%) were applied",
960  DS_Warning));
961  }
962  }
963 }
964 
965 /// Get the line number for the function header.
966 ///
967 /// This looks up function \p F in the current compilation unit and
968 /// retrieves the line number where the function is defined. This is
969 /// line 0 for all the samples read from the profile file. Every line
970 /// number is relative to this line.
971 ///
972 /// \param F Function object to query.
973 ///
974 /// \returns the line number where \p F is defined. If it returns 0,
975 /// it means that there is no debug information available for \p F.
976 template <typename BT>
978  const Function &Func = getFunction(F);
979  if (DISubprogram *S = Func.getSubprogram())
980  return S->getLine();
981 
982  if (NoWarnSampleUnused)
983  return 0;
984 
985  // If the start of \p F is missing, emit a diagnostic to inform the user
986  // about the missed opportunity.
987  Func.getContext().diagnose(DiagnosticInfoSampleProfile(
988  "No debug information found in function " + Func.getName() +
989  ": Function profile not used",
990  DS_Warning));
991  return 0;
992 }
993 
994 template <typename BT>
996  FunctionT &F) {
997  DT.reset(new DominatorTree);
998  DT->recalculate(F);
999 
1000  PDT.reset(new PostDominatorTree(F));
1001 
1002  LI.reset(new LoopInfo);
1003  LI->analyze(*DT);
1004 }
1005 
1006 #undef DEBUG_TYPE
1007 
1008 } // namespace llvm
1009 #endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
llvm::SampleProfileLoaderBaseImpl::getEntryBB
const BasicBlockT * getEntryBB(const FunctionT *F)
Definition: SampleProfileLoaderBaseImpl.h:122
i
i
Definition: README.txt:29
B1
getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:42
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::OptRemarkAnalysisT
typename afdo_detail::IRTraits< MachineBasicBlock >::OptRemarkAnalysisT OptRemarkAnalysisT
Definition: SampleProfileLoaderBaseImpl.h:103
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
llvm::SampleProfileLoaderBaseImpl::getFunction
Function & getFunction(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:119
DebugInfoMetadata.h
llvm::SampleProfileLoaderBaseImpl::visitEdge
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
Definition: SampleProfileLoaderBaseImpl.h:539
llvm::Function
Definition: Function.h:60
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:233
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::SuccRangeT
typename afdo_detail::IRTraits< MachineBasicBlock >::SuccRangeT SuccRangeT
Definition: SampleProfileLoaderBaseImpl.h:105
llvm::afdo_detail::IRTraits< BasicBlock >::PostDominatorTreePtrT
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:63
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::SampleProfileLoaderBaseImpl
Definition: SampleProfileLoaderBaseImpl.h:81
llvm::SampleProfileLoaderBaseImpl::Predecessors
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:199
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SampleProfileLoaderBaseImpl::Successors
BlockEdgeMap Successors
Successors for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:202
llvm::EnableFSDiscriminator
cl::opt< bool > EnableFSDiscriminator
Definition: TargetPassConfig.cpp:392
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1551
DenseMap.h
Module.h
llvm::afdo_detail::IRTraits< BasicBlock >::getSuccessors
static succ_range getSuccessors(BasicBlock *BB)
Definition: SampleProfileLoaderBaseImpl.h:73
llvm::SampleProfileLoaderBaseImpl::LI
LoopInfoPtrT LI
Definition: SampleProfileLoaderBaseImpl.h:196
llvm::SmallSet< Edge, 32 >
llvm::SmallPtrSet< const BasicBlockT *, 32 >
llvm::SampleProfileLoaderBaseImpl::DT
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
Definition: SampleProfileLoaderBaseImpl.h:194
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
SampleProfileInference.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::BlockFrequencyInfoT
typename afdo_detail::IRTraits< MachineBasicBlock >::BlockFrequencyInfoT BlockFrequencyInfoT
Definition: SampleProfileLoaderBaseImpl.h:90
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::afdo_detail::IRTraits< BasicBlock >::getFunction
static Function & getFunction(Function &F)
Definition: SampleProfileLoaderBaseImpl.h:68
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::SampleProfileLoaderBaseImpl::initWeightPropagation
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Definition: SampleProfileLoaderBaseImpl.h:886
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::SampleProfileLoaderBaseImpl::getBlockWeight
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
Definition: SampleProfileLoaderBaseImpl.h:351
llvm::SampleProfileUseProfi
cl::opt< bool > SampleProfileUseProfi
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::SampleProfileLoaderBaseImpl::propagateThroughEdges
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
Definition: SampleProfileLoaderBaseImpl.h:565
llvm::SampleProfileLoaderBaseImpl::printBlockEquivalence
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:260
PostDominators.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::afdo_detail::IRTraits< BasicBlock >::getEntryBB
static const BasicBlock * getEntryBB(const Function *F)
Definition: SampleProfileLoaderBaseImpl.h:69
llvm::SampleProfileLoaderBaseImpl::BlockWeights
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
Definition: SampleProfileLoaderBaseImpl.h:171
DenseSet.h
SampleProf.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::Instruction
Definition: Instruction.h:42
llvm::SampleProfileLoaderBaseImpl::Filename
std::string Filename
Name of the profile file to load.
Definition: SampleProfileLoaderBaseImpl.h:214
llvm::SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo
void computeDominanceAndLoopInfo(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:995
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::SampleProfileLoaderBaseImpl::printEdgeWeight
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:250
DebugLoc.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::LoopInfoPtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::LoopInfoPtrT LoopInfoPtrT
Definition: SampleProfileLoaderBaseImpl.h:93
SmallPtrSet.h
llvm::Function::PCT_Real
@ PCT_Real
Definition: Function.h:248
SampleProfileLoaderBaseUtil.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PostDominatorTreeT
typename afdo_detail::IRTraits< MachineBasicBlock >::PostDominatorTreeT PostDominatorTreeT
Definition: SampleProfileLoaderBaseImpl.h:99
llvm::SampleProfileLoaderBaseImpl::findFunctionSamples
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
Definition: SampleProfileLoaderBaseImpl.h:397
llvm::SampleProfileLoaderBaseImpl::getInstWeight
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
Definition: SampleProfileLoaderBaseImpl.h:293
llvm::SampleProfileLoaderBaseImpl::Reader
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
Definition: SampleProfileLoaderBaseImpl.h:208
CFG.h
LoopInfo.h
llvm::SampleProfileLoaderBaseImpl::emitCoverageRemarks
void emitCoverageRemarks(FunctionT &F)
Definition: SampleProfileLoaderBaseImpl.h:935
llvm::SampleProfileLoaderBaseImpl::SampleProfileLoaderBaseImpl
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
Definition: SampleProfileLoaderBaseImpl.h:83
llvm::SampleProfileLoaderBaseImpl::clearFunctionData
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
Definition: SampleProfileLoaderBaseImpl.h:228
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PredRangeT
typename afdo_detail::IRTraits< MachineBasicBlock >::PredRangeT PredRangeT
Definition: SampleProfileLoaderBaseImpl.h:104
llvm::SampleProfileLoaderBaseImpl::getFunctionLoc
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
Definition: SampleProfileLoaderBaseImpl.h:977
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::HighlightColor::Remark
@ Remark
BasicBlock.h
llvm::SampleProfileLoaderBaseImpl::computeAndPropagateWeights
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
Definition: SampleProfileLoaderBaseImpl.h:864
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:47
llvm::SampleProfileLoaderBaseImpl::computeBlockWeights
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
Definition: SampleProfileLoaderBaseImpl.h:371
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::DominatorTreePtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::DominatorTreePtrT DominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:95
llvm::succ_range
iterator_range< succ_iterator > succ_range
Definition: CFG.h:244
llvm::SampleProfileLoaderBaseImpl::PDT
PostDominatorTreePtrT PDT
Definition: SampleProfileLoaderBaseImpl.h:195
uint64_t
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:720
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::InstructionT
typename afdo_detail::IRTraits< MachineBasicBlock >::InstructionT InstructionT
Definition: SampleProfileLoaderBaseImpl.h:87
llvm::SampleProfileLoaderBaseImpl::CoverageTracker
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
Definition: SampleProfileLoaderBaseImpl.h:205
llvm::SampleProfileLoaderBaseImpl::getInstWeightImpl
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
Definition: SampleProfileLoaderBaseImpl.h:299
llvm::afdo_detail::IRTraits< BasicBlock >::getPredecessors
static pred_range getPredecessors(BasicBlock *BB)
Definition: SampleProfileLoaderBaseImpl.h:72
llvm::DenseMap< const BasicBlockT *, uint64_t >
I
#define I(x, y, z)
Definition: MD5.cpp:58
SampleProfReader.h
llvm::SampleProfileLoaderBaseImpl::findEquivalenceClasses
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
Definition: SampleProfileLoaderBaseImpl.h:478
ArrayRef.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SampleProfileLoaderBaseImpl.h:49
llvm::SampleProfileInferEntryCount
cl::opt< bool > SampleProfileInferEntryCount
llvm::X86AS::FS
@ FS
Definition: X86.h:192
llvm::SampleProfileLoaderBaseImpl::EquivalenceClass
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
Definition: SampleProfileLoaderBaseImpl.h:191
llvm::SampleProfileLoaderBaseImpl::VisitedBlocks
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
Definition: SampleProfileLoaderBaseImpl.h:180
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::PostDominatorTreePtrT
typename afdo_detail::IRTraits< MachineBasicBlock >::PostDominatorTreePtrT PostDominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:97
llvm::SampleProfileLoaderBaseImpl::buildEdges
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
Definition: SampleProfileLoaderBaseImpl.h:715
llvm::sampleprof::FunctionSamples::getOffset
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:218
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:291
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileSampleCoverage
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::LoopT
typename afdo_detail::IRTraits< MachineBasicBlock >::LoopT LoopT
Definition: SampleProfileLoaderBaseImpl.h:92
llvm::SampleProfileLoaderBaseImpl::RemappingFilename
std::string RemappingFilename
Name of the profile remapping file to load.
Definition: SampleProfileLoaderBaseImpl.h:217
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::BasicBlockT
typename afdo_detail::IRTraits< MachineBasicBlock >::BasicBlockT BasicBlockT
Definition: SampleProfileLoaderBaseImpl.h:88
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SampleProfileLoaderBaseImpl::applyProfi
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
Definition: SampleProfileLoaderBaseImpl.h:810
llvm::SampleProfileLoaderBaseImpl::propagateWeights
void propagateWeights(FunctionT &F)
Propagate weights into edges.
Definition: SampleProfileLoaderBaseImpl.h:755
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::SampleProfileLoaderBaseImpl::printBlockWeight
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
Definition: SampleProfileLoaderBaseImpl.h:272
llvm::SampleProfileLoaderBaseImpl::findEquivalencesFor
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
Definition: SampleProfileLoaderBaseImpl.h:434
llvm::SampleProfileLoaderBaseImpl::dump
void dump()
Definition: SampleProfileLoaderBaseImpl.h:85
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
GenericDomTree.h
llvm::SampleProfileLoaderBaseImpl::VisitedEdges
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
Definition: SampleProfileLoaderBaseImpl.h:183
Function.h
llvm::afdo_detail::IRTraits< BasicBlock >::DominatorTreePtrT
std::unique_ptr< DominatorTree > DominatorTreePtrT
Definition: SampleProfileLoaderBaseImpl.h:61
llvm::sampleprofutil::SampleCoverageTracker
Definition: SampleProfileLoaderBaseUtil.h:35
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::SampleProfileLoaderBaseImpl::finalizeWeightPropagation
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Definition: SampleProfileLoaderBaseImpl.h:914
llvm::DebugLoc::getLine
unsigned getLine() const
Definition: DebugLoc.cpp:24
llvm::SampleProfileLoaderBaseImpl::getPredecessors
PredRangeT getPredecessors(BasicBlockT *BB)
Definition: SampleProfileLoaderBaseImpl.h:125
llvm::DILocation::getBaseDiscriminator
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Definition: DebugInfoMetadata.h:2201
Instructions.h
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::Edge
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
Definition: SampleProfileLoaderBaseImpl.h:110
llvm::SampleProfileInference
Sample profile inference pass.
Definition: SampleProfileInference.h:83
SmallVector.h
llvm::pred_range
iterator_range< pred_iterator > pred_range
Definition: CFG.h:107
llvm::SampleProfileLoaderBaseImpl::getSuccessors
SuccRangeT getSuccessors(BasicBlockT *BB)
Definition: SampleProfileLoaderBaseImpl.h:128
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
Dominators.h
llvm::afdo_detail::IRTraits
Definition: SampleProfileLoaderBaseImpl.h:53
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SampleProfileMaxPropagateIterations
cl::opt< unsigned > SampleProfileMaxPropagateIterations
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SampleProfileLoaderBaseImpl::EdgeWeights
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
Definition: SampleProfileLoaderBaseImpl.h:177
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::FunctionT
typename afdo_detail::IRTraits< MachineBasicBlock >::FunctionT FunctionT
Definition: SampleProfileLoaderBaseImpl.h:91
llvm::DISubprogram
Subprogram description.
Definition: DebugInfoMetadata.h:1797
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::SampleProfileLoaderBaseImpl< MachineBasicBlock >::OptRemarkEmitterT
typename afdo_detail::IRTraits< MachineBasicBlock >::OptRemarkEmitterT OptRemarkEmitterT
Definition: SampleProfileLoaderBaseImpl.h:101
raw_ostream.h
llvm::afdo_detail::IRTraits< BasicBlock >::LoopInfoPtrT
std::unique_ptr< LoopInfo > LoopInfoPtrT
Definition: SampleProfileLoaderBaseImpl.h:60
llvm::SampleProfileLoaderBaseImpl::DILocation2SampleMap
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
Definition: SampleProfileLoaderBaseImpl.h:137
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::NoWarnSampleUnused
cl::opt< bool > NoWarnSampleUnused