LLVM  14.0.0git
BlockFrequencyInfoImpl.h
Go to the documentation of this file.
1 //==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Shared implementation of BlockFrequency for IR and Machine Instructions.
10 // See the documentation below for BlockFrequencyInfoImpl for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
15 #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/GraphTraits.h"
20 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/ValueHandle.h"
32 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/Format.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstddef>
40 #include <cstdint>
41 #include <deque>
42 #include <iterator>
43 #include <limits>
44 #include <list>
45 #include <queue>
46 #include <string>
47 #include <unordered_set>
48 #include <utility>
49 #include <vector>
50 
51 #define DEBUG_TYPE "block-freq"
52 
53 namespace llvm {
55 
59 
60 class BranchProbabilityInfo;
61 class Function;
62 class Loop;
63 class LoopInfo;
64 class MachineBasicBlock;
65 class MachineBranchProbabilityInfo;
66 class MachineFunction;
67 class MachineLoop;
68 class MachineLoopInfo;
69 
70 namespace bfi_detail {
71 
72 struct IrreducibleGraph;
73 
74 // This is part of a workaround for a GCC 4.7 crash on lambdas.
75 template <class BT> struct BlockEdgesAdder;
76 
77 /// Mass of a block.
78 ///
79 /// This class implements a sort of fixed-point fraction always between 0.0 and
80 /// 1.0. getMass() == std::numeric_limits<uint64_t>::max() indicates a value of
81 /// 1.0.
82 ///
83 /// Masses can be added and subtracted. Simple saturation arithmetic is used,
84 /// so arithmetic operations never overflow or underflow.
85 ///
86 /// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses
87 /// an inexpensive floating-point algorithm that's off-by-one (almost, but not
88 /// quite, maximum precision).
89 ///
90 /// Masses can be scaled by \a BranchProbability at maximum precision.
91 class BlockMass {
92  uint64_t Mass = 0;
93 
94 public:
95  BlockMass() = default;
96  explicit BlockMass(uint64_t Mass) : Mass(Mass) {}
97 
98  static BlockMass getEmpty() { return BlockMass(); }
99 
100  static BlockMass getFull() {
102  }
103 
104  uint64_t getMass() const { return Mass; }
105 
106  bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); }
107  bool isEmpty() const { return !Mass; }
108 
109  bool operator!() const { return isEmpty(); }
110 
111  /// Add another mass.
112  ///
113  /// Adds another mass, saturating at \a isFull() rather than overflowing.
115  uint64_t Sum = Mass + X.Mass;
117  return *this;
118  }
119 
120  /// Subtract another mass.
121  ///
122  /// Subtracts another mass, saturating at \a isEmpty() rather than
123  /// undeflowing.
125  uint64_t Diff = Mass - X.Mass;
126  Mass = Diff > Mass ? 0 : Diff;
127  return *this;
128  }
129 
131  Mass = P.scale(Mass);
132  return *this;
133  }
134 
135  bool operator==(BlockMass X) const { return Mass == X.Mass; }
136  bool operator!=(BlockMass X) const { return Mass != X.Mass; }
137  bool operator<=(BlockMass X) const { return Mass <= X.Mass; }
138  bool operator>=(BlockMass X) const { return Mass >= X.Mass; }
139  bool operator<(BlockMass X) const { return Mass < X.Mass; }
140  bool operator>(BlockMass X) const { return Mass > X.Mass; }
141 
142  /// Convert to scaled number.
143  ///
144  /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty()
145  /// gives slightly above 0.0.
147 
148  void dump() const;
149  raw_ostream &print(raw_ostream &OS) const;
150 };
151 
153  return BlockMass(L) += R;
154 }
156  return BlockMass(L) -= R;
157 }
159  return BlockMass(L) *= R;
160 }
162  return BlockMass(R) *= L;
163 }
164 
166  return X.print(OS);
167 }
168 
169 } // end namespace bfi_detail
170 
171 /// Base class for BlockFrequencyInfoImpl
172 ///
173 /// BlockFrequencyInfoImplBase has supporting data structures and some
174 /// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on
175 /// the block type (or that call such algorithms) are skipped here.
176 ///
177 /// Nevertheless, the majority of the overall algorithm documentation lives with
178 /// BlockFrequencyInfoImpl. See there for details.
180 public:
183 
184  /// Representative of a block.
185  ///
186  /// This is a simple wrapper around an index into the reverse-post-order
187  /// traversal of the blocks.
188  ///
189  /// Unlike a block pointer, its order has meaning (location in the
190  /// topological sort) and it's class is the same regardless of block type.
191  struct BlockNode {
193 
195 
196  BlockNode() : Index(std::numeric_limits<uint32_t>::max()) {}
198 
199  bool operator==(const BlockNode &X) const { return Index == X.Index; }
200  bool operator!=(const BlockNode &X) const { return Index != X.Index; }
201  bool operator<=(const BlockNode &X) const { return Index <= X.Index; }
202  bool operator>=(const BlockNode &X) const { return Index >= X.Index; }
203  bool operator<(const BlockNode &X) const { return Index < X.Index; }
204  bool operator>(const BlockNode &X) const { return Index > X.Index; }
205 
206  bool isValid() const { return Index <= getMaxIndex(); }
207 
208  static size_t getMaxIndex() {
210  }
211  };
212 
213  /// Stats about a block itself.
214  struct FrequencyData {
217  };
218 
219  /// Data about a loop.
220  ///
221  /// Contains the data necessary to represent a loop as a pseudo-node once it's
222  /// packaged.
223  struct LoopData {
227 
228  LoopData *Parent; ///< The parent loop.
229  bool IsPackaged = false; ///< Whether this has been packaged.
230  uint32_t NumHeaders = 1; ///< Number of headers.
231  ExitMap Exits; ///< Successor edges (and weights).
232  NodeList Nodes; ///< Header and the members of the loop.
233  HeaderMassList BackedgeMass; ///< Mass returned to each loop header.
236 
238  : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {}
239 
240  template <class It1, class It2>
241  LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther,
242  It2 LastOther)
243  : Parent(Parent), Nodes(FirstHeader, LastHeader) {
244  NumHeaders = Nodes.size();
245  Nodes.insert(Nodes.end(), FirstOther, LastOther);
247  }
248 
249  bool isHeader(const BlockNode &Node) const {
250  if (isIrreducible())
251  return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders,
252  Node);
253  return Node == Nodes[0];
254  }
255 
256  BlockNode getHeader() const { return Nodes[0]; }
257  bool isIrreducible() const { return NumHeaders > 1; }
258 
259  HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) {
260  assert(isHeader(B) && "this is only valid on loop header blocks");
261  if (isIrreducible())
262  return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) -
263  Nodes.begin();
264  return 0;
265  }
266 
268  return Nodes.begin() + NumHeaders;
269  }
270 
271  NodeList::const_iterator members_end() const { return Nodes.end(); }
273  return make_range(members_begin(), members_end());
274  }
275  };
276 
277  /// Index of loop information.
278  struct WorkingData {
279  BlockNode Node; ///< This node.
280  LoopData *Loop = nullptr; ///< The loop this block is inside.
281  BlockMass Mass; ///< Mass distribution from the entry block.
282 
284 
285  bool isLoopHeader() const { return Loop && Loop->isHeader(Node); }
286 
287  bool isDoubleLoopHeader() const {
288  return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() &&
289  Loop->Parent->isHeader(Node);
290  }
291 
293  if (!isLoopHeader())
294  return Loop;
295  if (!isDoubleLoopHeader())
296  return Loop->Parent;
297  return Loop->Parent->Parent;
298  }
299 
300  /// Resolve a node to its representative.
301  ///
302  /// Get the node currently representing Node, which could be a containing
303  /// loop.
304  ///
305  /// This function should only be called when distributing mass. As long as
306  /// there are no irreducible edges to Node, then it will have complexity
307  /// O(1) in this context.
308  ///
309  /// In general, the complexity is O(L), where L is the number of loop
310  /// headers Node has been packaged into. Since this method is called in
311  /// the context of distributing mass, L will be the number of loop headers
312  /// an early exit edge jumps out of.
314  auto L = getPackagedLoop();
315  return L ? L->getHeader() : Node;
316  }
317 
319  if (!Loop || !Loop->IsPackaged)
320  return nullptr;
321  auto L = Loop;
322  while (L->Parent && L->Parent->IsPackaged)
323  L = L->Parent;
324  return L;
325  }
326 
327  /// Get the appropriate mass for a node.
328  ///
329  /// Get appropriate mass for Node. If Node is a loop-header (whose loop
330  /// has been packaged), returns the mass of its pseudo-node. If it's a
331  /// node inside a packaged loop, it returns the loop's mass.
333  if (!isAPackage())
334  return Mass;
335  if (!isADoublePackage())
336  return Loop->Mass;
337  return Loop->Parent->Mass;
338  }
339 
340  /// Has ContainingLoop been packaged up?
341  bool isPackaged() const { return getResolvedNode() != Node; }
342 
343  /// Has Loop been packaged up?
344  bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; }
345 
346  /// Has Loop been packaged up twice?
347  bool isADoublePackage() const {
348  return isDoubleLoopHeader() && Loop->Parent->IsPackaged;
349  }
350  };
351 
352  /// Unscaled probability weight.
353  ///
354  /// Probability weight for an edge in the graph (including the
355  /// successor/target node).
356  ///
357  /// All edges in the original function are 32-bit. However, exit edges from
358  /// loop packages are taken from 64-bit exit masses, so we need 64-bits of
359  /// space in general.
360  ///
361  /// In addition to the raw weight amount, Weight stores the type of the edge
362  /// in the current context (i.e., the context of the loop being processed).
363  /// Is this a local edge within the loop, an exit from the loop, or a
364  /// backedge to the loop header?
365  struct Weight {
370 
371  Weight() = default;
374  };
375 
376  /// Distribution of unscaled probability weight.
377  ///
378  /// Distribution of unscaled probability weight to a set of successors.
379  ///
380  /// This class collates the successor edge weights for later processing.
381  ///
382  /// \a DidOverflow indicates whether \a Total did overflow while adding to
383  /// the distribution. It should never overflow twice.
384  struct Distribution {
386 
387  WeightList Weights; ///< Individual successor weights.
388  uint64_t Total = 0; ///< Sum of all weights.
389  bool DidOverflow = false; ///< Whether \a Total did overflow.
390 
391  Distribution() = default;
392 
393  void addLocal(const BlockNode &Node, uint64_t Amount) {
394  add(Node, Amount, Weight::Local);
395  }
396 
397  void addExit(const BlockNode &Node, uint64_t Amount) {
398  add(Node, Amount, Weight::Exit);
399  }
400 
401  void addBackedge(const BlockNode &Node, uint64_t Amount) {
402  add(Node, Amount, Weight::Backedge);
403  }
404 
405  /// Normalize the distribution.
406  ///
407  /// Combines multiple edges to the same \a Weight::TargetNode and scales
408  /// down so that \a Total fits into 32-bits.
409  ///
410  /// This is linear in the size of \a Weights. For the vast majority of
411  /// cases, adjacent edge weights are combined by sorting WeightList and
412  /// combining adjacent weights. However, for very large edge lists an
413  /// auxiliary hash table is used.
414  void normalize();
415 
416  private:
417  void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type);
418  };
419 
420  /// Data about each block. This is used downstream.
421  std::vector<FrequencyData> Freqs;
422 
423  /// Whether each block is an irreducible loop header.
424  /// This is used downstream.
426 
427  /// Loop data: see initializeLoops().
428  std::vector<WorkingData> Working;
429 
430  /// Indexed information about loops.
431  std::list<LoopData> Loops;
432 
433  /// Virtual destructor.
434  ///
435  /// Need a virtual destructor to mask the compiler warning about
436  /// getBlockName().
437  virtual ~BlockFrequencyInfoImplBase() = default;
438 
439  /// Add all edges out of a packaged loop to the distribution.
440  ///
441  /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each
442  /// successor edge.
443  ///
444  /// \return \c true unless there's an irreducible backedge.
445  bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop,
446  Distribution &Dist);
447 
448  /// Add an edge to the distribution.
449  ///
450  /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the
451  /// edge is local/exit/backedge is in the context of LoopHead. Otherwise,
452  /// every edge should be a local edge (since all the loops are packaged up).
453  ///
454  /// \return \c true unless aborted due to an irreducible backedge.
455  bool addToDist(Distribution &Dist, const LoopData *OuterLoop,
456  const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
457 
458  /// Analyze irreducible SCCs.
459  ///
460  /// Separate irreducible SCCs from \c G, which is an explicit graph of \c
461  /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr).
462  /// Insert them into \a Loops before \c Insert.
463  ///
464  /// \return the \c LoopData nodes representing the irreducible SCCs.
467  std::list<LoopData>::iterator Insert);
468 
469  /// Update a loop after packaging irreducible SCCs inside of it.
470  ///
471  /// Update \c OuterLoop. Before finding irreducible control flow, it was
472  /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a
473  /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged
474  /// up need to be removed from \a OuterLoop::Nodes.
475  void updateLoopWithIrreducible(LoopData &OuterLoop);
476 
477  /// Distribute mass according to a distribution.
478  ///
479  /// Distributes the mass in Source according to Dist. If LoopHead.isValid(),
480  /// backedges and exits are stored in its entry in Loops.
481  ///
482  /// Mass is distributed in parallel from two copies of the source mass.
483  void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
484  Distribution &Dist);
485 
486  /// Compute the loop scale for a loop.
488 
489  /// Adjust the mass of all headers in an irreducible loop.
490  ///
491  /// Initially, irreducible loops are assumed to distribute their mass
492  /// equally among its headers. This can lead to wrong frequency estimates
493  /// since some headers may be executed more frequently than others.
494  ///
495  /// This adjusts header mass distribution so it matches the weights of
496  /// the backedges going into each of the loop headers.
498 
500 
501  /// Package up a loop.
502  void packageLoop(LoopData &Loop);
503 
504  /// Unwrap loops.
505  void unwrapLoops();
506 
507  /// Finalize frequency metrics.
508  ///
509  /// Calculates final frequencies and cleans up no-longer-needed data
510  /// structures.
511  void finalizeMetrics();
512 
513  /// Clear all memory.
514  void clear();
515 
516  virtual std::string getBlockName(const BlockNode &Node) const;
517  std::string getLoopName(const LoopData &Loop) const;
518 
519  virtual raw_ostream &print(raw_ostream &OS) const { return OS; }
520  void dump() const { print(dbgs()); }
521 
522  Scaled64 getFloatingBlockFreq(const BlockNode &Node) const;
523 
524  BlockFrequency getBlockFreq(const BlockNode &Node) const;
526  const BlockNode &Node,
527  bool AllowSynthetic = false) const;
529  uint64_t Freq,
530  bool AllowSynthetic = false) const;
531  bool isIrrLoopHeader(const BlockNode &Node);
532 
533  void setBlockFreq(const BlockNode &Node, uint64_t Freq);
534 
535  raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const;
537  const BlockFrequency &Freq) const;
538 
540  assert(!Freqs.empty());
541  return Freqs[0].Integer;
542  }
543 };
544 
545 namespace bfi_detail {
546 
547 template <class BlockT> struct TypeMap {};
548 template <> struct TypeMap<BasicBlock> {
553  using LoopT = Loop;
555 };
556 template <> struct TypeMap<MachineBasicBlock> {
558  using BlockKeyT = const MachineBasicBlock *;
563 };
564 
565 template <class BlockT, class BFIImplT>
567 
568 /// Get the name of a MachineBasicBlock.
569 ///
570 /// Get the name of a MachineBasicBlock. It's templated so that including from
571 /// CodeGen is unnecessary (that would be a layering issue).
572 ///
573 /// This is used mainly for debug output. The name is similar to
574 /// MachineBasicBlock::getFullName(), but skips the name of the function.
575 template <class BlockT> std::string getBlockName(const BlockT *BB) {
576  assert(BB && "Unexpected nullptr");
577  auto MachineName = "BB" + Twine(BB->getNumber());
578  if (BB->getBasicBlock())
579  return (MachineName + "[" + BB->getName() + "]").str();
580  return MachineName.str();
581 }
582 /// Get the name of a BasicBlock.
583 template <> inline std::string getBlockName(const BasicBlock *BB) {
584  assert(BB && "Unexpected nullptr");
585  return BB->getName().str();
586 }
587 
588 /// Graph of irreducible control flow.
589 ///
590 /// This graph is used for determining the SCCs in a loop (or top-level
591 /// function) that has irreducible control flow.
592 ///
593 /// During the block frequency algorithm, the local graphs are defined in a
594 /// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock
595 /// graphs for most edges, but getting others from \a LoopData::ExitMap. The
596 /// latter only has successor information.
597 ///
598 /// \a IrreducibleGraph makes this graph explicit. It's in a form that can use
599 /// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator),
600 /// and it explicitly lists predecessors and successors. The initialization
601 /// that relies on \c MachineBasicBlock is defined in the header.
604 
606 
608  struct IrrNode {
610  unsigned NumIn = 0;
611  std::deque<const IrrNode *> Edges;
612 
614 
615  using iterator = std::deque<const IrrNode *>::const_iterator;
616 
617  iterator pred_begin() const { return Edges.begin(); }
618  iterator succ_begin() const { return Edges.begin() + NumIn; }
619  iterator pred_end() const { return succ_begin(); }
620  iterator succ_end() const { return Edges.end(); }
621  };
623  const IrrNode *StartIrr = nullptr;
624  std::vector<IrrNode> Nodes;
626 
627  /// Construct an explicit graph containing irreducible control flow.
628  ///
629  /// Construct an explicit graph of the control flow in \c OuterLoop (or the
630  /// top-level function, if \c OuterLoop is \c nullptr). Uses \c
631  /// addBlockEdges to add block successors that have not been packaged into
632  /// loops.
633  ///
634  /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected
635  /// user of this.
636  template <class BlockEdgesAdder>
638  BlockEdgesAdder addBlockEdges) : BFI(BFI) {
639  initialize(OuterLoop, addBlockEdges);
640  }
641 
642  template <class BlockEdgesAdder>
643  void initialize(const BFIBase::LoopData *OuterLoop,
644  BlockEdgesAdder addBlockEdges);
645  void addNodesInLoop(const BFIBase::LoopData &OuterLoop);
646  void addNodesInFunction();
647 
648  void addNode(const BlockNode &Node) {
649  Nodes.emplace_back(Node);
650  BFI.Working[Node.Index].getMass() = BlockMass::getEmpty();
651  }
652 
653  void indexNodes();
654  template <class BlockEdgesAdder>
655  void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop,
656  BlockEdgesAdder addBlockEdges);
657  void addEdge(IrrNode &Irr, const BlockNode &Succ,
658  const BFIBase::LoopData *OuterLoop);
659 };
660 
661 template <class BlockEdgesAdder>
663  BlockEdgesAdder addBlockEdges) {
664  if (OuterLoop) {
665  addNodesInLoop(*OuterLoop);
666  for (auto N : OuterLoop->Nodes)
667  addEdges(N, OuterLoop, addBlockEdges);
668  } else {
670  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
671  addEdges(Index, OuterLoop, addBlockEdges);
672  }
674 }
675 
676 template <class BlockEdgesAdder>
678  const BFIBase::LoopData *OuterLoop,
679  BlockEdgesAdder addBlockEdges) {
680  auto L = Lookup.find(Node.Index);
681  if (L == Lookup.end())
682  return;
683  IrrNode &Irr = *L->second;
684  const auto &Working = BFI.Working[Node.Index];
685 
686  if (Working.isAPackage())
687  for (const auto &I : Working.Loop->Exits)
688  addEdge(Irr, I.first, OuterLoop);
689  else
690  addBlockEdges(*this, Irr, OuterLoop);
691 }
692 
693 } // end namespace bfi_detail
694 
695 /// Shared implementation for block frequency analysis.
696 ///
697 /// This is a shared implementation of BlockFrequencyInfo and
698 /// MachineBlockFrequencyInfo, and calculates the relative frequencies of
699 /// blocks.
700 ///
701 /// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block,
702 /// which is called the header. A given loop, L, can have sub-loops, which are
703 /// loops within the subgraph of L that exclude its header. (A "trivial" SCC
704 /// consists of a single block that does not have a self-edge.)
705 ///
706 /// In addition to loops, this algorithm has limited support for irreducible
707 /// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are
708 /// discovered on the fly, and modelled as loops with multiple headers.
709 ///
710 /// The headers of irreducible sub-SCCs consist of its entry blocks and all
711 /// nodes that are targets of a backedge within it (excluding backedges within
712 /// true sub-loops). Block frequency calculations act as if a block is
713 /// inserted that intercepts all the edges to the headers. All backedges and
714 /// entries point to this block. Its successors are the headers, which split
715 /// the frequency evenly.
716 ///
717 /// This algorithm leverages BlockMass and ScaledNumber to maintain precision,
718 /// separates mass distribution from loop scaling, and dithers to eliminate
719 /// probability mass loss.
720 ///
721 /// The implementation is split between BlockFrequencyInfoImpl, which knows the
722 /// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and
723 /// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a
724 /// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in
725 /// reverse-post order. This gives two advantages: it's easy to compare the
726 /// relative ordering of two nodes, and maps keyed on BlockT can be represented
727 /// by vectors.
728 ///
729 /// This algorithm is O(V+E), unless there is irreducible control flow, in
730 /// which case it's O(V*E) in the worst case.
731 ///
732 /// These are the main stages:
733 ///
734 /// 0. Reverse post-order traversal (\a initializeRPOT()).
735 ///
736 /// Run a single post-order traversal and save it (in reverse) in RPOT.
737 /// All other stages make use of this ordering. Save a lookup from BlockT
738 /// to BlockNode (the index into RPOT) in Nodes.
739 ///
740 /// 1. Loop initialization (\a initializeLoops()).
741 ///
742 /// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of
743 /// the algorithm. In particular, store the immediate members of each loop
744 /// in reverse post-order.
745 ///
746 /// 2. Calculate mass and scale in loops (\a computeMassInLoops()).
747 ///
748 /// For each loop (bottom-up), distribute mass through the DAG resulting
749 /// from ignoring backedges and treating sub-loops as a single pseudo-node.
750 /// Track the backedge mass distributed to the loop header, and use it to
751 /// calculate the loop scale (number of loop iterations). Immediate
752 /// members that represent sub-loops will already have been visited and
753 /// packaged into a pseudo-node.
754 ///
755 /// Distributing mass in a loop is a reverse-post-order traversal through
756 /// the loop. Start by assigning full mass to the Loop header. For each
757 /// node in the loop:
758 ///
759 /// - Fetch and categorize the weight distribution for its successors.
760 /// If this is a packaged-subloop, the weight distribution is stored
761 /// in \a LoopData::Exits. Otherwise, fetch it from
762 /// BranchProbabilityInfo.
763 ///
764 /// - Each successor is categorized as \a Weight::Local, a local edge
765 /// within the current loop, \a Weight::Backedge, a backedge to the
766 /// loop header, or \a Weight::Exit, any successor outside the loop.
767 /// The weight, the successor, and its category are stored in \a
768 /// Distribution. There can be multiple edges to each successor.
769 ///
770 /// - If there's a backedge to a non-header, there's an irreducible SCC.
771 /// The usual flow is temporarily aborted. \a
772 /// computeIrreducibleMass() finds the irreducible SCCs within the
773 /// loop, packages them up, and restarts the flow.
774 ///
775 /// - Normalize the distribution: scale weights down so that their sum
776 /// is 32-bits, and coalesce multiple edges to the same node.
777 ///
778 /// - Distribute the mass accordingly, dithering to minimize mass loss,
779 /// as described in \a distributeMass().
780 ///
781 /// In the case of irreducible loops, instead of a single loop header,
782 /// there will be several. The computation of backedge masses is similar
783 /// but instead of having a single backedge mass, there will be one
784 /// backedge per loop header. In these cases, each backedge will carry
785 /// a mass proportional to the edge weights along the corresponding
786 /// path.
787 ///
788 /// At the end of propagation, the full mass assigned to the loop will be
789 /// distributed among the loop headers proportionally according to the
790 /// mass flowing through their backedges.
791 ///
792 /// Finally, calculate the loop scale from the accumulated backedge mass.
793 ///
794 /// 3. Distribute mass in the function (\a computeMassInFunction()).
795 ///
796 /// Finally, distribute mass through the DAG resulting from packaging all
797 /// loops in the function. This uses the same algorithm as distributing
798 /// mass in a loop, except that there are no exit or backedge edges.
799 ///
800 /// 4. Unpackage loops (\a unwrapLoops()).
801 ///
802 /// Initialize each block's frequency to a floating point representation of
803 /// its mass.
804 ///
805 /// Visit loops top-down, scaling the frequencies of its immediate members
806 /// by the loop's pseudo-node's frequency.
807 ///
808 /// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()).
809 ///
810 /// Using the min and max frequencies as a guide, translate floating point
811 /// frequencies to an appropriate range in uint64_t.
812 ///
813 /// It has some known flaws.
814 ///
815 /// - The model of irreducible control flow is a rough approximation.
816 ///
817 /// Modelling irreducible control flow exactly involves setting up and
818 /// solving a group of infinite geometric series. Such precision is
819 /// unlikely to be worthwhile, since most of our algorithms give up on
820 /// irreducible control flow anyway.
821 ///
822 /// Nevertheless, we might find that we need to get closer. Here's a sort
823 /// of TODO list for the model with diminishing returns, to be completed as
824 /// necessary.
825 ///
826 /// - The headers for the \a LoopData representing an irreducible SCC
827 /// include non-entry blocks. When these extra blocks exist, they
828 /// indicate a self-contained irreducible sub-SCC. We could treat them
829 /// as sub-loops, rather than arbitrarily shoving the problematic
830 /// blocks into the headers of the main irreducible SCC.
831 ///
832 /// - Entry frequencies are assumed to be evenly split between the
833 /// headers of a given irreducible SCC, which is the only option if we
834 /// need to compute mass in the SCC before its parent loop. Instead,
835 /// we could partially compute mass in the parent loop, and stop when
836 /// we get to the SCC. Here, we have the correct ratio of entry
837 /// masses, which we can use to adjust their relative frequencies.
838 /// Compute mass in the SCC, and then continue propagation in the
839 /// parent.
840 ///
841 /// - We can propagate mass iteratively through the SCC, for some fixed
842 /// number of iterations. Each iteration starts by assigning the entry
843 /// blocks their backedge mass from the prior iteration. The final
844 /// mass for each block (and each exit, and the total backedge mass
845 /// used for computing loop scale) is the sum of all iterations.
846 /// (Running this until fixed point would "solve" the geometric
847 /// series by simulation.)
848 template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
849  // This is part of a workaround for a GCC 4.7 crash on lambdas.
851 
852  using BlockT = typename bfi_detail::TypeMap<BT>::BlockT;
853  using BlockKeyT = typename bfi_detail::TypeMap<BT>::BlockKeyT;
854  using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT;
855  using BranchProbabilityInfoT =
857  using LoopT = typename bfi_detail::TypeMap<BT>::LoopT;
858  using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT;
861  using BFICallbackVH =
863 
864  const BranchProbabilityInfoT *BPI = nullptr;
865  const LoopInfoT *LI = nullptr;
866  const FunctionT *F = nullptr;
867 
868  // All blocks in reverse postorder.
869  std::vector<const BlockT *> RPOT;
871 
872  using rpot_iterator = typename std::vector<const BlockT *>::const_iterator;
873 
874  rpot_iterator rpot_begin() const { return RPOT.begin(); }
875  rpot_iterator rpot_end() const { return RPOT.end(); }
876 
877  size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); }
878 
879  BlockNode getNode(const rpot_iterator &I) const {
880  return BlockNode(getIndex(I));
881  }
882 
883  BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB).first; }
884 
885  const BlockT *getBlock(const BlockNode &Node) const {
886  assert(Node.Index < RPOT.size());
887  return RPOT[Node.Index];
888  }
889 
890  /// Run (and save) a post-order traversal.
891  ///
892  /// Saves a reverse post-order traversal of all the nodes in \a F.
893  void initializeRPOT();
894 
895  /// Initialize loop data.
896  ///
897  /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from
898  /// each block to the deepest loop it's in, but we need the inverse. For each
899  /// loop, we store in reverse post-order its "immediate" members, defined as
900  /// the header, the headers of immediate sub-loops, and all other blocks in
901  /// the loop that are not in sub-loops.
902  void initializeLoops();
903 
904  /// Propagate to a block's successors.
905  ///
906  /// In the context of distributing mass through \c OuterLoop, divide the mass
907  /// currently assigned to \c Node between its successors.
908  ///
909  /// \return \c true unless there's an irreducible backedge.
910  bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node);
911 
912  /// Compute mass in a particular loop.
913  ///
914  /// Assign mass to \c Loop's header, and then for each block in \c Loop in
915  /// reverse post-order, distribute mass to its successors. Only visits nodes
916  /// that have not been packaged into sub-loops.
917  ///
918  /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop.
919  /// \return \c true unless there's an irreducible backedge.
920  bool computeMassInLoop(LoopData &Loop);
921 
922  /// Try to compute mass in the top-level function.
923  ///
924  /// Assign mass to the entry block, and then for each block in reverse
925  /// post-order, distribute mass to its successors. Skips nodes that have
926  /// been packaged into loops.
927  ///
928  /// \pre \a computeMassInLoops() has been called.
929  /// \return \c true unless there's an irreducible backedge.
930  bool tryToComputeMassInFunction();
931 
932  /// Compute mass in (and package up) irreducible SCCs.
933  ///
934  /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front
935  /// of \c Insert), and call \a computeMassInLoop() on each of them.
936  ///
937  /// If \c OuterLoop is \c nullptr, it refers to the top-level function.
938  ///
939  /// \pre \a computeMassInLoop() has been called for each subloop of \c
940  /// OuterLoop.
941  /// \pre \c Insert points at the last loop successfully processed by \a
942  /// computeMassInLoop().
943  /// \pre \c OuterLoop has irreducible SCCs.
944  void computeIrreducibleMass(LoopData *OuterLoop,
945  std::list<LoopData>::iterator Insert);
946 
947  /// Compute mass in all loops.
948  ///
949  /// For each loop bottom-up, call \a computeMassInLoop().
950  ///
951  /// \a computeMassInLoop() aborts (and returns \c false) on loops that
952  /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then
953  /// re-enter \a computeMassInLoop().
954  ///
955  /// \post \a computeMassInLoop() has returned \c true for every loop.
956  void computeMassInLoops();
957 
958  /// Compute mass in the top-level function.
959  ///
960  /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to
961  /// compute mass in the top-level function.
962  ///
963  /// \post \a tryToComputeMassInFunction() has returned \c true.
964  void computeMassInFunction();
965 
966  std::string getBlockName(const BlockNode &Node) const override {
967  return bfi_detail::getBlockName(getBlock(Node));
968  }
969 
970  /// The current implementation for computing relative block frequencies does
971  /// not handle correctly control-flow graphs containing irreducible loops. To
972  /// resolve the problem, we apply a post-processing step, which iteratively
973  /// updates block frequencies based on the frequencies of their predesessors.
974  /// This corresponds to finding the stationary point of the Markov chain by
975  /// an iterative method aka "PageRank computation".
976  /// The algorithm takes at most O(|E| * IterativeBFIMaxIterations) steps but
977  /// typically converges faster.
978  ///
979  /// Decide whether we want to apply iterative inference for a given function.
980  bool needIterativeInference() const;
981 
982  /// Apply an iterative post-processing to infer correct counts for irr loops.
983  void applyIterativeInference();
984 
985  using ProbMatrixType = std::vector<std::vector<std::pair<size_t, Scaled64>>>;
986 
987  /// Run iterative inference for a probability matrix and initial frequencies.
988  void iterativeInference(const ProbMatrixType &ProbMatrix,
989  std::vector<Scaled64> &Freq) const;
990 
991  /// Find all blocks to apply inference on, that is, reachable from the entry
992  /// and backward reachable from exists along edges with positive probability.
993  void findReachableBlocks(std::vector<const BlockT *> &Blocks) const;
994 
995  /// Build a matrix of probabilities with transitions (edges) between the
996  /// blocks: ProbMatrix[I] holds pairs (J, P), where Pr[J -> I | J] = P
997  void initTransitionProbabilities(
998  const std::vector<const BlockT *> &Blocks,
999  const DenseMap<const BlockT *, size_t> &BlockIndex,
1000  ProbMatrixType &ProbMatrix) const;
1001 
1002 #ifndef NDEBUG
1003  /// Compute the discrepancy between current block frequencies and the
1004  /// probability matrix.
1005  Scaled64 discrepancy(const ProbMatrixType &ProbMatrix,
1006  const std::vector<Scaled64> &Freq) const;
1007 #endif
1008 
1009 public:
1010  BlockFrequencyInfoImpl() = default;
1011 
1012  const FunctionT *getFunction() const { return F; }
1013 
1014  void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
1015  const LoopInfoT &LI);
1016 
1018 
1019  BlockFrequency getBlockFreq(const BlockT *BB) const {
1021  }
1022 
1024  const BlockT *BB,
1025  bool AllowSynthetic = false) const {
1027  AllowSynthetic);
1028  }
1029 
1031  uint64_t Freq,
1032  bool AllowSynthetic = false) const {
1034  AllowSynthetic);
1035  }
1036 
1037  bool isIrrLoopHeader(const BlockT *BB) {
1039  }
1040 
1041  void setBlockFreq(const BlockT *BB, uint64_t Freq);
1042 
1043  void forgetBlock(const BlockT *BB) {
1044  // We don't erase corresponding items from `Freqs`, `RPOT` and other to
1045  // avoid invalidating indices. Doing so would have saved some memory, but
1046  // it's not worth it.
1047  Nodes.erase(BB);
1048  }
1049 
1050  Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
1052  }
1053 
1054  const BranchProbabilityInfoT &getBPI() const { return *BPI; }
1055 
1056  /// Print the frequencies for the current function.
1057  ///
1058  /// Prints the frequencies for the blocks in the current function.
1059  ///
1060  /// Blocks are printed in the natural iteration order of the function, rather
1061  /// than reverse post-order. This provides two advantages: writing -analyze
1062  /// tests is easier (since blocks come out in source order), and even
1063  /// unreachable blocks are printed.
1064  ///
1065  /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so
1066  /// we need to override it here.
1067  raw_ostream &print(raw_ostream &OS) const override;
1068 
1071 
1072  raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const {
1073  return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB));
1074  }
1075 
1077 };
1078 
1079 namespace bfi_detail {
1080 
1081 template <class BFIImplT>
1082 class BFICallbackVH<BasicBlock, BFIImplT> : public CallbackVH {
1083  BFIImplT *BFIImpl;
1084 
1085 public:
1086  BFICallbackVH() = default;
1087 
1088  BFICallbackVH(const BasicBlock *BB, BFIImplT *BFIImpl)
1089  : CallbackVH(BB), BFIImpl(BFIImpl) {}
1090 
1091  virtual ~BFICallbackVH() = default;
1092 
1093  void deleted() override {
1094  BFIImpl->forgetBlock(cast<BasicBlock>(getValPtr()));
1095  }
1096 };
1097 
1098 /// Dummy implementation since MachineBasicBlocks aren't Values, so ValueHandles
1099 /// don't apply to them.
1100 template <class BFIImplT>
1102 public:
1103  BFICallbackVH() = default;
1104  BFICallbackVH(const MachineBasicBlock *, BFIImplT *) {}
1105 };
1106 
1107 } // end namespace bfi_detail
1108 
1109 template <class BT>
1111  const BranchProbabilityInfoT &BPI,
1112  const LoopInfoT &LI) {
1113  // Save the parameters.
1114  this->BPI = &BPI;
1115  this->LI = &LI;
1116  this->F = &F;
1117 
1118  // Clean up left-over data structures.
1120  RPOT.clear();
1121  Nodes.clear();
1122 
1123  // Initialize.
1124  LLVM_DEBUG(dbgs() << "\nblock-frequency: " << F.getName()
1125  << "\n================="
1126  << std::string(F.getName().size(), '=') << "\n");
1127  initializeRPOT();
1128  initializeLoops();
1129 
1130  // Visit loops in post-order to find the local mass distribution, and then do
1131  // the full function.
1132  computeMassInLoops();
1133  computeMassInFunction();
1134  unwrapLoops();
1135  // Apply a post-processing step improving computed frequencies for functions
1136  // with irreducible loops.
1137  if (needIterativeInference())
1138  applyIterativeInference();
1139  finalizeMetrics();
1140 
1142  // To detect BFI queries for unknown blocks, add entries for unreachable
1143  // blocks, if any. This is to distinguish between known/existing unreachable
1144  // blocks and unknown blocks.
1145  for (const BlockT &BB : F)
1146  if (!Nodes.count(&BB))
1147  setBlockFreq(&BB, 0);
1148  }
1149 }
1150 
1151 template <class BT>
1153  if (Nodes.count(BB))
1155  else {
1156  // If BB is a newly added block after BFI is done, we need to create a new
1157  // BlockNode for it assigned with a new index. The index can be determined
1158  // by the size of Freqs.
1159  BlockNode NewNode(Freqs.size());
1160  Nodes[BB] = {NewNode, BFICallbackVH(BB, this)};
1161  Freqs.emplace_back();
1163  }
1164 }
1165 
1166 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
1167  const BlockT *Entry = &F->front();
1168  RPOT.reserve(F->size());
1169  std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT));
1170  std::reverse(RPOT.begin(), RPOT.end());
1171 
1172  assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() &&
1173  "More nodes in function than Block Frequency Info supports");
1174 
1175  LLVM_DEBUG(dbgs() << "reverse-post-order-traversal\n");
1176  for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
1177  BlockNode Node = getNode(I);
1178  LLVM_DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node)
1179  << "\n");
1180  Nodes[*I] = {Node, BFICallbackVH(*I, this)};
1181  }
1182 
1183  Working.reserve(RPOT.size());
1184  for (size_t Index = 0; Index < RPOT.size(); ++Index)
1185  Working.emplace_back(Index);
1186  Freqs.resize(RPOT.size());
1187 }
1188 
1189 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() {
1190  LLVM_DEBUG(dbgs() << "loop-detection\n");
1191  if (LI->empty())
1192  return;
1193 
1194  // Visit loops top down and assign them an index.
1195  std::deque<std::pair<const LoopT *, LoopData *>> Q;
1196  for (const LoopT *L : *LI)
1197  Q.emplace_back(L, nullptr);
1198  while (!Q.empty()) {
1199  const LoopT *Loop = Q.front().first;
1200  LoopData *Parent = Q.front().second;
1201  Q.pop_front();
1202 
1203  BlockNode Header = getNode(Loop->getHeader());
1204  assert(Header.isValid());
1205 
1206  Loops.emplace_back(Parent, Header);
1207  Working[Header.Index].Loop = &Loops.back();
1208  LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n");
1209 
1210  for (const LoopT *L : *Loop)
1211  Q.emplace_back(L, &Loops.back());
1212  }
1213 
1214  // Visit nodes in reverse post-order and add them to their deepest containing
1215  // loop.
1216  for (size_t Index = 0; Index < RPOT.size(); ++Index) {
1217  // Loop headers have already been mostly mapped.
1218  if (Working[Index].isLoopHeader()) {
1219  LoopData *ContainingLoop = Working[Index].getContainingLoop();
1220  if (ContainingLoop)
1221  ContainingLoop->Nodes.push_back(Index);
1222  continue;
1223  }
1224 
1225  const LoopT *Loop = LI->getLoopFor(RPOT[Index]);
1226  if (!Loop)
1227  continue;
1228 
1229  // Add this node to its containing loop's member list.
1230  BlockNode Header = getNode(Loop->getHeader());
1231  assert(Header.isValid());
1232  const auto &HeaderData = Working[Header.Index];
1233  assert(HeaderData.isLoopHeader());
1234 
1235  Working[Index].Loop = HeaderData.Loop;
1236  HeaderData.Loop->Nodes.push_back(Index);
1237  LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header)
1238  << ": member = " << getBlockName(Index) << "\n");
1239  }
1240 }
1241 
1242 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() {
1243  // Visit loops with the deepest first, and the top-level loops last.
1244  for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
1245  if (computeMassInLoop(*L))
1246  continue;
1247  auto Next = std::next(L);
1248  computeIrreducibleMass(&*L, L.base());
1249  L = std::prev(Next);
1250  if (computeMassInLoop(*L))
1251  continue;
1252  llvm_unreachable("unhandled irreducible control flow");
1253  }
1254 }
1255 
1256 template <class BT>
1257 bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
1258  // Compute mass in loop.
1259  LLVM_DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");
1260 
1261  if (Loop.isIrreducible()) {
1262  LLVM_DEBUG(dbgs() << "isIrreducible = true\n");
1263  Distribution Dist;
1264  unsigned NumHeadersWithWeight = 0;
1265  Optional<uint64_t> MinHeaderWeight;
1266  DenseSet<uint32_t> HeadersWithoutWeight;
1267  HeadersWithoutWeight.reserve(Loop.NumHeaders);
1268  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1269  auto &HeaderNode = Loop.Nodes[H];
1270  const BlockT *Block = getBlock(HeaderNode);
1271  IsIrrLoopHeader.set(Loop.Nodes[H].Index);
1272  Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight();
1273  if (!HeaderWeight) {
1274  LLVM_DEBUG(dbgs() << "Missing irr loop header metadata on "
1275  << getBlockName(HeaderNode) << "\n");
1276  HeadersWithoutWeight.insert(H);
1277  continue;
1278  }
1279  LLVM_DEBUG(dbgs() << getBlockName(HeaderNode)
1280  << " has irr loop header weight "
1281  << HeaderWeight.getValue() << "\n");
1282  NumHeadersWithWeight++;
1283  uint64_t HeaderWeightValue = HeaderWeight.getValue();
1284  if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight)
1285  MinHeaderWeight = HeaderWeightValue;
1286  if (HeaderWeightValue) {
1287  Dist.addLocal(HeaderNode, HeaderWeightValue);
1288  }
1289  }
1290  // As a heuristic, if some headers don't have a weight, give them the
1291  // minimum weight seen (not to disrupt the existing trends too much by
1292  // using a weight that's in the general range of the other headers' weights,
1293  // and the minimum seems to perform better than the average.)
1294  // FIXME: better update in the passes that drop the header weight.
1295  // If no headers have a weight, give them even weight (use weight 1).
1296  if (!MinHeaderWeight)
1297  MinHeaderWeight = 1;
1298  for (uint32_t H : HeadersWithoutWeight) {
1299  auto &HeaderNode = Loop.Nodes[H];
1300  assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() &&
1301  "Shouldn't have a weight metadata");
1302  uint64_t MinWeight = MinHeaderWeight.getValue();
1303  LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to "
1304  << getBlockName(HeaderNode) << "\n");
1305  if (MinWeight)
1306  Dist.addLocal(HeaderNode, MinWeight);
1307  }
1308  distributeIrrLoopHeaderMass(Dist);
1309  for (const BlockNode &M : Loop.Nodes)
1310  if (!propagateMassToSuccessors(&Loop, M))
1311  llvm_unreachable("unhandled irreducible control flow");
1312  if (NumHeadersWithWeight == 0)
1313  // No headers have a metadata. Adjust header mass.
1314  adjustLoopHeaderMass(Loop);
1315  } else {
1316  Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
1317  if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
1318  llvm_unreachable("irreducible control flow to loop header!?");
1319  for (const BlockNode &M : Loop.members())
1320  if (!propagateMassToSuccessors(&Loop, M))
1321  // Irreducible backedge.
1322  return false;
1323  }
1324 
1325  computeLoopScale(Loop);
1326  packageLoop(Loop);
1327  return true;
1328 }
1329 
1330 template <class BT>
1331 bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() {
1332  // Compute mass in function.
1333  LLVM_DEBUG(dbgs() << "compute-mass-in-function\n");
1334  assert(!Working.empty() && "no blocks in function");
1335  assert(!Working[0].isLoopHeader() && "entry block is a loop header");
1336 
1337  Working[0].getMass() = BlockMass::getFull();
1338  for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) {
1339  // Check for nodes that have been packaged.
1340  BlockNode Node = getNode(I);
1341  if (Working[Node.Index].isPackaged())
1342  continue;
1343 
1344  if (!propagateMassToSuccessors(nullptr, Node))
1345  return false;
1346  }
1347  return true;
1348 }
1349 
1350 template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() {
1351  if (tryToComputeMassInFunction())
1352  return;
1353  computeIrreducibleMass(nullptr, Loops.begin());
1354  if (tryToComputeMassInFunction())
1355  return;
1356  llvm_unreachable("unhandled irreducible control flow");
1357 }
1358 
1359 template <class BT>
1360 bool BlockFrequencyInfoImpl<BT>::needIterativeInference() const {
1362  return false;
1363  if (!F->getFunction().hasProfileData())
1364  return false;
1365  // Apply iterative inference only if the function contains irreducible loops;
1366  // otherwise, computed block frequencies are reasonably correct.
1367  for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) {
1368  if (L->isIrreducible())
1369  return true;
1370  }
1371  return false;
1372 }
1373 
1374 template <class BT> void BlockFrequencyInfoImpl<BT>::applyIterativeInference() {
1375  // Extract blocks for processing: a block is considered for inference iff it
1376  // can be reached from the entry by edges with a positive probability.
1377  // Non-processed blocks are assigned with the zero frequency and are ignored
1378  // in the computation
1379  std::vector<const BlockT *> ReachableBlocks;
1380  findReachableBlocks(ReachableBlocks);
1381  if (ReachableBlocks.empty())
1382  return;
1383 
1384  // The map is used to to index successors/predecessors of reachable blocks in
1385  // the ReachableBlocks vector
1386  DenseMap<const BlockT *, size_t> BlockIndex;
1387  // Extract initial frequencies for the reachable blocks
1388  auto Freq = std::vector<Scaled64>(ReachableBlocks.size());
1389  Scaled64 SumFreq;
1390  for (size_t I = 0; I < ReachableBlocks.size(); I++) {
1391  const BlockT *BB = ReachableBlocks[I];
1392  BlockIndex[BB] = I;
1393  Freq[I] = getFloatingBlockFreq(BB);
1394  SumFreq += Freq[I];
1395  }
1396  assert(!SumFreq.isZero() && "empty initial block frequencies");
1397 
1398  LLVM_DEBUG(dbgs() << "Applying iterative inference for " << F->getName()
1399  << " with " << ReachableBlocks.size() << " blocks\n");
1400 
1401  // Normalizing frequencies so they sum up to 1.0
1402  for (auto &Value : Freq) {
1403  Value /= SumFreq;
1404  }
1405 
1406  // Setting up edge probabilities using sparse matrix representation:
1407  // ProbMatrix[I] holds a vector of pairs (J, P) where Pr[J -> I | J] = P
1408  ProbMatrixType ProbMatrix;
1409  initTransitionProbabilities(ReachableBlocks, BlockIndex, ProbMatrix);
1410 
1411  // Run the propagation
1412  iterativeInference(ProbMatrix, Freq);
1413 
1414  // Assign computed frequency values
1415  for (const BlockT &BB : *F) {
1416  auto Node = getNode(&BB);
1417  if (!Node.isValid())
1418  continue;
1419  if (BlockIndex.count(&BB)) {
1420  Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]];
1421  } else {
1422  Freqs[Node.Index].Scaled = Scaled64::getZero();
1423  }
1424  }
1425 }
1426 
1427 template <class BT>
1428 void BlockFrequencyInfoImpl<BT>::iterativeInference(
1429  const ProbMatrixType &ProbMatrix, std::vector<Scaled64> &Freq) const {
1431  "incorrectly specified precision");
1432  // Convert double precision to Scaled64
1433  const auto Precision =
1434  Scaled64::getInverse(static_cast<uint64_t>(1.0 / IterativeBFIPrecision));
1435  const size_t MaxIterations = IterativeBFIMaxIterationsPerBlock * Freq.size();
1436 
1437 #ifndef NDEBUG
1438  LLVM_DEBUG(dbgs() << " Initial discrepancy = "
1439  << discrepancy(ProbMatrix, Freq).toString() << "\n");
1440 #endif
1441 
1442  // Successors[I] holds unique sucessors of the I-th block
1443  auto Successors = std::vector<std::vector<size_t>>(Freq.size());
1444  for (size_t I = 0; I < Freq.size(); I++) {
1445  for (auto &Jump : ProbMatrix[I]) {
1446  Successors[Jump.first].push_back(I);
1447  }
1448  }
1449 
1450  // To speedup computation, we maintain a set of "active" blocks whose
1451  // frequencies need to be updated based on the incoming edges.
1452  // The set is dynamic and changes after every update. Initially all blocks
1453  // with a positive frequency are active
1454  auto IsActive = std::vector<bool>(Freq.size(), false);
1455  std::queue<size_t> ActiveSet;
1456  for (size_t I = 0; I < Freq.size(); I++) {
1457  if (Freq[I] > 0) {
1458  ActiveSet.push(I);
1459  IsActive[I] = true;
1460  }
1461  }
1462 
1463  // Iterate over the blocks propagating frequencies
1464  size_t It = 0;
1465  while (It++ < MaxIterations && !ActiveSet.empty()) {
1466  size_t I = ActiveSet.front();
1467  ActiveSet.pop();
1468  IsActive[I] = false;
1469 
1470  // Compute a new frequency for the block: NewFreq := Freq \times ProbMatrix.
1471  // A special care is taken for self-edges that needs to be scaled by
1472  // (1.0 - SelfProb), where SelfProb is the sum of probabilities on the edges
1473  Scaled64 NewFreq;
1474  Scaled64 OneMinusSelfProb = Scaled64::getOne();
1475  for (auto &Jump : ProbMatrix[I]) {
1476  if (Jump.first == I) {
1477  OneMinusSelfProb -= Jump.second;
1478  } else {
1479  NewFreq += Freq[Jump.first] * Jump.second;
1480  }
1481  }
1482  if (OneMinusSelfProb != Scaled64::getOne())
1483  NewFreq /= OneMinusSelfProb;
1484 
1485  // If the block's frequency has changed enough, then
1486  // make sure the block and its successors are in the active set
1487  auto Change = Freq[I] >= NewFreq ? Freq[I] - NewFreq : NewFreq - Freq[I];
1488  if (Change > Precision) {
1489  ActiveSet.push(I);
1490  IsActive[I] = true;
1491  for (size_t Succ : Successors[I]) {
1492  if (!IsActive[Succ]) {
1493  ActiveSet.push(Succ);
1494  IsActive[Succ] = true;
1495  }
1496  }
1497  }
1498 
1499  // Update the frequency for the block
1500  Freq[I] = NewFreq;
1501  }
1502 
1503  LLVM_DEBUG(dbgs() << " Completed " << It << " inference iterations"
1504  << format(" (%0.0f per block)", double(It) / Freq.size())
1505  << "\n");
1506 #ifndef NDEBUG
1507  LLVM_DEBUG(dbgs() << " Final discrepancy = "
1508  << discrepancy(ProbMatrix, Freq).toString() << "\n");
1509 #endif
1510 }
1511 
1512 template <class BT>
1513 void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
1514  std::vector<const BlockT *> &Blocks) const {
1515  // Find all blocks to apply inference on, that is, reachable from the entry
1516  // along edges with non-zero probablities
1517  std::queue<const BlockT *> Queue;
1518  std::unordered_set<const BlockT *> Reachable;
1519  const BlockT *Entry = &F->front();
1520  Queue.push(Entry);
1521  Reachable.insert(Entry);
1522  while (!Queue.empty()) {
1523  const BlockT *SrcBB = Queue.front();
1524  Queue.pop();
1525  for (const BlockT *DstBB : children<const BlockT *>(SrcBB)) {
1526  auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
1527  if (EP.isZero())
1528  continue;
1529  if (Reachable.find(DstBB) == Reachable.end()) {
1530  Queue.push(DstBB);
1531  Reachable.insert(DstBB);
1532  }
1533  }
1534  }
1535 
1536  // Find all blocks to apply inference on, that is, backward reachable from
1537  // the entry along (backward) edges with non-zero probablities
1538  std::unordered_set<const BlockT *> InverseReachable;
1539  for (const BlockT &BB : *F) {
1540  // An exit block is a block without any successors
1541  bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) !=
1542  GraphTraits<const BlockT *>::child_end(&BB);
1543  if (!HasSucc && Reachable.count(&BB)) {
1544  Queue.push(&BB);
1545  InverseReachable.insert(&BB);
1546  }
1547  }
1548  while (!Queue.empty()) {
1549  const BlockT *SrcBB = Queue.front();
1550  Queue.pop();
1551  for (const BlockT *DstBB : children<Inverse<const BlockT *>>(SrcBB)) {
1552  auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
1553  if (EP.isZero())
1554  continue;
1555  if (InverseReachable.find(DstBB) == InverseReachable.end()) {
1556  Queue.push(DstBB);
1557  InverseReachable.insert(DstBB);
1558  }
1559  }
1560  }
1561 
1562  // Collect the result
1563  Blocks.reserve(F->size());
1564  for (const BlockT &BB : *F) {
1565  if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
1566  Blocks.push_back(&BB);
1567  }
1568  }
1569 }
1570 
1571 template <class BT>
1572 void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
1573  const std::vector<const BlockT *> &Blocks,
1574  const DenseMap<const BlockT *, size_t> &BlockIndex,
1575  ProbMatrixType &ProbMatrix) const {
1576  const size_t NumBlocks = Blocks.size();
1577  auto Succs = std::vector<std::vector<std::pair<size_t, Scaled64>>>(NumBlocks);
1578  auto SumProb = std::vector<Scaled64>(NumBlocks);
1579 
1580  // Find unique successors and corresponding probabilities for every block
1581  for (size_t Src = 0; Src < NumBlocks; Src++) {
1582  const BlockT *BB = Blocks[Src];
1583  std::unordered_set<const BlockT *> UniqueSuccs;
1584  for (const auto SI : children<const BlockT *>(BB)) {
1585  // Ignore cold blocks
1586  if (BlockIndex.find(SI) == BlockIndex.end())
1587  continue;
1588  // Ignore parallel edges between BB and SI blocks
1589  if (UniqueSuccs.find(SI) != UniqueSuccs.end())
1590  continue;
1591  UniqueSuccs.insert(SI);
1592  // Ignore jumps with zero probability
1593  auto EP = BPI->getEdgeProbability(BB, SI);
1594  if (EP.isZero())
1595  continue;
1596 
1597  auto EdgeProb =
1598  Scaled64::getFraction(EP.getNumerator(), EP.getDenominator());
1599  size_t Dst = BlockIndex.find(SI)->second;
1600  Succs[Src].push_back(std::make_pair(Dst, EdgeProb));
1601  SumProb[Src] += EdgeProb;
1602  }
1603  }
1604 
1605  // Add transitions for every jump with positive branch probability
1606  ProbMatrix = ProbMatrixType(NumBlocks);
1607  for (size_t Src = 0; Src < NumBlocks; Src++) {
1608  // Ignore blocks w/o successors
1609  if (Succs[Src].empty())
1610  continue;
1611 
1612  assert(!SumProb[Src].isZero() && "Zero sum probability of non-exit block");
1613  for (auto &Jump : Succs[Src]) {
1614  size_t Dst = Jump.first;
1615  Scaled64 Prob = Jump.second;
1616  ProbMatrix[Dst].push_back(std::make_pair(Src, Prob / SumProb[Src]));
1617  }
1618  }
1619 
1620  // Add transitions from sinks to the source
1621  size_t EntryIdx = BlockIndex.find(&F->front())->second;
1622  for (size_t Src = 0; Src < NumBlocks; Src++) {
1623  if (Succs[Src].empty()) {
1624  ProbMatrix[EntryIdx].push_back(std::make_pair(Src, Scaled64::getOne()));
1625  }
1626  }
1627 }
1628 
1629 #ifndef NDEBUG
1630 template <class BT>
1631 BlockFrequencyInfoImplBase::Scaled64 BlockFrequencyInfoImpl<BT>::discrepancy(
1632  const ProbMatrixType &ProbMatrix, const std::vector<Scaled64> &Freq) const {
1633  assert(Freq[0] > 0 && "Incorrectly computed frequency of the entry block");
1634  Scaled64 Discrepancy;
1635  for (size_t I = 0; I < ProbMatrix.size(); I++) {
1636  Scaled64 Sum;
1637  for (const auto &Jump : ProbMatrix[I]) {
1638  Sum += Freq[Jump.first] * Jump.second;
1639  }
1640  Discrepancy += Freq[I] >= Sum ? Freq[I] - Sum : Sum - Freq[I];
1641  }
1642  // Normalizing by the frequency of the entry block
1643  return Discrepancy / Freq[0];
1644 }
1645 #endif
1646 
1647 /// \note This should be a lambda, but that crashes GCC 4.7.
1648 namespace bfi_detail {
1649 
1650 template <class BT> struct BlockEdgesAdder {
1651  using BlockT = BT;
1654 
1656 
1658  : BFI(BFI) {}
1659 
1661  const LoopData *OuterLoop) {
1662  const BlockT *BB = BFI.RPOT[Irr.Node.Index];
1663  for (const auto Succ : children<const BlockT *>(BB))
1664  G.addEdge(Irr, BFI.getNode(Succ), OuterLoop);
1665  }
1666 };
1667 
1668 } // end namespace bfi_detail
1669 
1670 template <class BT>
1671 void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
1672  LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
1673  LLVM_DEBUG(dbgs() << "analyze-irreducible-in-";
1674  if (OuterLoop) dbgs()
1675  << "loop: " << getLoopName(*OuterLoop) << "\n";
1676  else dbgs() << "function\n");
1677 
1678  using namespace bfi_detail;
1679 
1680  // Ideally, addBlockEdges() would be declared here as a lambda, but that
1681  // crashes GCC 4.7.
1682  BlockEdgesAdder<BT> addBlockEdges(*this);
1683  IrreducibleGraph G(*this, OuterLoop, addBlockEdges);
1684 
1685  for (auto &L : analyzeIrreducible(G, OuterLoop, Insert))
1686  computeMassInLoop(L);
1687 
1688  if (!OuterLoop)
1689  return;
1690  updateLoopWithIrreducible(*OuterLoop);
1691 }
1692 
1693 // A helper function that converts a branch probability into weight.
1695  return Prob.getNumerator();
1696 }
1697 
1698 template <class BT>
1699 bool
1700 BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
1701  const BlockNode &Node) {
1702  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n");
1703  // Calculate probability for successors.
1704  Distribution Dist;
1705  if (auto *Loop = Working[Node.Index].getPackagedLoop()) {
1706  assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop");
1707  if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist))
1708  // Irreducible backedge.
1709  return false;
1710  } else {
1711  const BlockT *BB = getBlock(Node);
1712  for (auto SI = GraphTraits<const BlockT *>::child_begin(BB),
1713  SE = GraphTraits<const BlockT *>::child_end(BB);
1714  SI != SE; ++SI)
1715  if (!addToDist(
1716  Dist, OuterLoop, Node, getNode(*SI),
1717  getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
1718  // Irreducible backedge.
1719  return false;
1720  }
1721 
1722  // Distribute mass to successors, saving exit and backedge data in the
1723  // loop header.
1724  distributeMass(Node, OuterLoop, Dist);
1725  return true;
1726 }
1727 
1728 template <class BT>
1730  if (!F)
1731  return OS;
1732  OS << "block-frequency-info: " << F->getName() << "\n";
1733  for (const BlockT &BB : *F) {
1734  OS << " - " << bfi_detail::getBlockName(&BB) << ": float = ";
1735  getFloatingBlockFreq(&BB).print(OS, 5)
1736  << ", int = " << getBlockFreq(&BB).getFrequency();
1739  F->getFunction(), getNode(&BB)))
1740  OS << ", count = " << ProfileCount.getValue();
1741  if (Optional<uint64_t> IrrLoopHeaderWeight =
1742  BB.getIrrLoopHeaderWeight())
1743  OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue();
1744  OS << "\n";
1745  }
1746 
1747  // Add an extra newline for readability.
1748  OS << "\n";
1749  return OS;
1750 }
1751 
1752 template <class BT>
1754  BlockFrequencyInfoImpl<BT> &Other) const {
1755  bool Match = true;
1757  DenseMap<const BlockT *, BlockNode> OtherValidNodes;
1758  for (auto &Entry : Nodes) {
1759  const BlockT *BB = Entry.first;
1760  if (BB) {
1761  ValidNodes[BB] = Entry.second.first;
1762  }
1763  }
1764  for (auto &Entry : Other.Nodes) {
1765  const BlockT *BB = Entry.first;
1766  if (BB) {
1767  OtherValidNodes[BB] = Entry.second.first;
1768  }
1769  }
1770  unsigned NumValidNodes = ValidNodes.size();
1771  unsigned NumOtherValidNodes = OtherValidNodes.size();
1772  if (NumValidNodes != NumOtherValidNodes) {
1773  Match = false;
1774  dbgs() << "Number of blocks mismatch: " << NumValidNodes << " vs "
1775  << NumOtherValidNodes << "\n";
1776  } else {
1777  for (auto &Entry : ValidNodes) {
1778  const BlockT *BB = Entry.first;
1779  BlockNode Node = Entry.second;
1780  if (OtherValidNodes.count(BB)) {
1781  BlockNode OtherNode = OtherValidNodes[BB];
1782  const auto &Freq = Freqs[Node.Index];
1783  const auto &OtherFreq = Other.Freqs[OtherNode.Index];
1784  if (Freq.Integer != OtherFreq.Integer) {
1785  Match = false;
1786  dbgs() << "Freq mismatch: " << bfi_detail::getBlockName(BB) << " "
1787  << Freq.Integer << " vs " << OtherFreq.Integer << "\n";
1788  }
1789  } else {
1790  Match = false;
1791  dbgs() << "Block " << bfi_detail::getBlockName(BB) << " index "
1792  << Node.Index << " does not exist in Other.\n";
1793  }
1794  }
1795  // If there's a valid node in OtherValidNodes that's not in ValidNodes,
1796  // either the above num check or the check on OtherValidNodes will fail.
1797  }
1798  if (!Match) {
1799  dbgs() << "This\n";
1800  print(dbgs());
1801  dbgs() << "Other\n";
1802  Other.print(dbgs());
1803  }
1804  assert(Match && "BFI mismatch");
1805 }
1806 
1807 // Graph trait base class for block frequency information graph
1808 // viewer.
1809 
1811 
1812 template <class BlockFrequencyInfoT, class BranchProbabilityInfoT>
1815  using NodeRef = typename GTraits::NodeRef;
1816  using EdgeIter = typename GTraits::ChildIteratorType;
1817  using NodeIter = typename GTraits::nodes_iterator;
1818 
1820 
1821  explicit BFIDOTGraphTraitsBase(bool isSimple = false)
1823 
1824  static StringRef getGraphName(const BlockFrequencyInfoT *G) {
1825  return G->getFunction()->getName();
1826  }
1827 
1828  std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph,
1829  unsigned HotPercentThreshold = 0) {
1830  std::string Result;
1831  if (!HotPercentThreshold)
1832  return Result;
1833 
1834  // Compute MaxFrequency on the fly:
1835  if (!MaxFrequency) {
1836  for (NodeIter I = GTraits::nodes_begin(Graph),
1837  E = GTraits::nodes_end(Graph);
1838  I != E; ++I) {
1839  NodeRef N = *I;
1840  MaxFrequency =
1841  std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency());
1842  }
1843  }
1844  BlockFrequency Freq = Graph->getBlockFreq(Node);
1845  BlockFrequency HotFreq =
1847  BranchProbability::getBranchProbability(HotPercentThreshold, 100));
1848 
1849  if (Freq < HotFreq)
1850  return Result;
1851 
1852  raw_string_ostream OS(Result);
1853  OS << "color=\"red\"";
1854  OS.flush();
1855  return Result;
1856  }
1857 
1858  std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
1859  GVDAGType GType, int layout_order = -1) {
1860  std::string Result;
1861  raw_string_ostream OS(Result);
1862 
1863  if (layout_order != -1)
1864  OS << Node->getName() << "[" << layout_order << "] : ";
1865  else
1866  OS << Node->getName() << " : ";
1867  switch (GType) {
1868  case GVDT_Fraction:
1869  Graph->printBlockFreq(OS, Node);
1870  break;
1871  case GVDT_Integer:
1872  OS << Graph->getBlockFreq(Node).getFrequency();
1873  break;
1874  case GVDT_Count: {
1875  auto Count = Graph->getBlockProfileCount(Node);
1876  if (Count)
1877  OS << Count.getValue();
1878  else
1879  OS << "Unknown";
1880  break;
1881  }
1882  case GVDT_None:
1883  llvm_unreachable("If we are not supposed to render a graph we should "
1884  "never reach this point.");
1885  }
1886  return Result;
1887  }
1888 
1890  const BlockFrequencyInfoT *BFI,
1891  const BranchProbabilityInfoT *BPI,
1892  unsigned HotPercentThreshold = 0) {
1893  std::string Str;
1894  if (!BPI)
1895  return Str;
1896 
1897  BranchProbability BP = BPI->getEdgeProbability(Node, EI);
1898  uint32_t N = BP.getNumerator();
1899  uint32_t D = BP.getDenominator();
1900  double Percent = 100.0 * N / D;
1901  raw_string_ostream OS(Str);
1902  OS << format("label=\"%.1f%%\"", Percent);
1903 
1904  if (HotPercentThreshold) {
1905  BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP;
1907  BranchProbability(HotPercentThreshold, 100);
1908 
1909  if (EFreq >= HotFreq) {
1910  OS << ",color=\"red\"";
1911  }
1912  }
1913 
1914  OS.flush();
1915  return Str;
1916  }
1917 };
1918 
1919 } // end namespace llvm
1920 
1921 #undef DEBUG_TYPE
1922 
1923 #endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H
llvm::BlockFrequencyInfoImplBase::Weight::Amount
uint64_t Amount
Definition: BlockFrequencyInfoImpl.h:369
llvm::BlockFrequencyInfoImplBase::getLoopName
std::string getLoopName(const LoopData &Loop) const
Definition: BlockFrequencyInfoImpl.cpp:642
llvm::BlockFrequencyInfoImplBase::BlockNode::operator!=
bool operator!=(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:200
llvm::bfi_detail::BlockMass
Mass of a block.
Definition: BlockFrequencyInfoImpl.h:91
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::BlockFrequencyInfoImpl::calculate
void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, const LoopInfoT &LI)
Definition: BlockFrequencyInfoImpl.h:1110
llvm::bfi_detail::BFICallbackVH
Definition: BlockFrequencyInfoImpl.h:566
llvm::BlockFrequencyInfoImplBase::distributeIrrLoopHeaderMass
void distributeIrrLoopHeaderMass(Distribution &Dist)
Definition: BlockFrequencyInfoImpl.cpp:873
llvm::BlockFrequencyInfoImplBase::LoopData::LoopData
LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, It2 LastOther)
Definition: BlockFrequencyInfoImpl.h:241
llvm::BlockFrequencyInfoImplBase::computeLoopScale
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
Definition: BlockFrequencyInfoImpl.cpp:387
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
Optional.h
llvm::bfi_detail::operator+
BlockMass operator+(BlockMass L, BlockMass R)
Definition: BlockFrequencyInfoImpl.h:152
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::BlockFrequencyInfoImplBase::LoopData::members_begin
NodeList::const_iterator members_begin() const
Definition: BlockFrequencyInfoImpl.h:267
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::BlockFrequencyInfoImplBase::WorkingData::Loop
LoopData * Loop
The loop this block is inside.
Definition: BlockFrequencyInfoImpl.h:280
llvm::bfi_detail::IrreducibleGraph::addNodesInLoop
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:661
llvm::bfi_detail::IrreducibleGraph
Graph of irreducible control flow.
Definition: BlockFrequencyInfoImpl.h:602
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::BlockFrequencyInfoImplBase::WorkingData::WorkingData
WorkingData(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.h:283
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1661
llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition: BranchProbability.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::bfi_detail::BlockMass::operator!=
bool operator!=(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:136
llvm::BlockFrequencyInfoImplBase::dump
void dump() const
Definition: BlockFrequencyInfoImpl.h:520
llvm::po_end
po_iterator< T > po_end(const T &G)
Definition: PostOrderIterator.h:186
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::toString
std::string toString(Error E)
Write all error messages (if any) in E to a string.
Definition: Error.h:1030
llvm::bfi_detail::BlockMass::operator==
bool operator==(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:135
llvm::SmallVector< std::pair< BlockNode, BlockMass >, 4 >
llvm::BlockFrequencyInfoImpl::getFloatingBlockFreq
Scaled64 getFloatingBlockFreq(const BlockT *BB) const
Definition: BlockFrequencyInfoImpl.h:1050
llvm::BlockFrequencyInfoImplBase::LoopData::Parent
LoopData * Parent
The parent loop.
Definition: BlockFrequencyInfoImpl.h:228
llvm::bfi_detail::BlockMass::operator<=
bool operator<=(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:137
llvm::bfi_detail::BlockEdgesAdder::BFI
const BlockFrequencyInfoImpl< BT > & BFI
Definition: BlockFrequencyInfoImpl.h:1655
llvm::BlockFrequencyInfoImpl::forgetBlock
void forgetBlock(const BlockT *BB)
Definition: BlockFrequencyInfoImpl.h:1043
llvm::bfi_detail::IrreducibleGraph::IrreducibleGraph
IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
Construct an explicit graph containing irreducible control flow.
Definition: BlockFrequencyInfoImpl.h:637
llvm::BlockFrequencyInfoImplBase::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:590
llvm::BlockFrequencyInfoImplBase::FrequencyData
Stats about a block itself.
Definition: BlockFrequencyInfoImpl.h:214
ErrorHandling.h
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::BlockFrequencyInfoImplBase::LoopData::NumHeaders
uint32_t NumHeaders
Number of headers.
Definition: BlockFrequencyInfoImpl.h:230
llvm::FloatStyle::Percent
@ Percent
llvm::BlockFrequencyInfoImpl::isIrrLoopHeader
bool isIrrLoopHeader(const BlockT *BB)
Definition: BlockFrequencyInfoImpl.h:1037
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::BlockFrequencyInfoImplBase::Loops
std::list< LoopData > Loops
Indexed information about loops.
Definition: BlockFrequencyInfoImpl.h:431
llvm::DenseMapBase::begin
iterator begin()
Definition: DenseMap.h:74
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::BFIDOTGraphTraitsBase::BFIDOTGraphTraitsBase
BFIDOTGraphTraitsBase(bool isSimple=false)
Definition: BlockFrequencyInfoImpl.h:1821
llvm::bfi_detail::IrreducibleGraph::IrrNode::pred_begin
iterator pred_begin() const
Definition: BlockFrequencyInfoImpl.h:617
llvm::bfi_detail::IrreducibleGraph::addNode
void addNode(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.h:648
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::BlockFrequencyInfoImplBase::LoopData::members_end
NodeList::const_iterator members_end() const
Definition: BlockFrequencyInfoImpl.h:271
llvm::BlockFrequencyInfoImplBase::WorkingData::getContainingLoop
LoopData * getContainingLoop() const
Definition: BlockFrequencyInfoImpl.h:292
llvm::BlockFrequencyInfoImplBase::WorkingData::isPackaged
bool isPackaged() const
Has ContainingLoop been packaged up?
Definition: BlockFrequencyInfoImpl.h:341
llvm::BlockFrequencyInfoImplBase::WorkingData::Node
BlockNode Node
This node.
Definition: BlockFrequencyInfoImpl.h:279
llvm::Optional< uint64_t >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::bfi_detail::getBlockName
std::string getBlockName(const BasicBlock *BB)
Get the name of a BasicBlock.
Definition: BlockFrequencyInfoImpl.h:583
llvm::bfi_detail::BlockMass::isFull
bool isFull() const
Definition: BlockFrequencyInfoImpl.h:106
llvm::BlockFrequencyInfoImplBase::Distribution::addLocal
void addLocal(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:393
llvm::BlockFrequencyInfoImplBase::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfoImpl.h:539
llvm::BlockFrequencyInfoImplBase::clear
void clear()
Clear all memory.
Definition: BlockFrequencyInfoImpl.cpp:292
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::bfi_detail::TypeMap
Definition: BlockFrequencyInfoImpl.h:547
llvm::BlockFrequencyInfoImplBase::Weight::Local
@ Local
Definition: BlockFrequencyInfoImpl.h:366
Format.h
llvm::BlockFrequencyInfoImpl::setBlockFreq
void setBlockFreq(const BlockT *BB, uint64_t Freq)
Definition: BlockFrequencyInfoImpl.h:1152
llvm::BlockFrequencyInfoImplBase::LoopData::getHeader
BlockNode getHeader() const
Definition: BlockFrequencyInfoImpl.h:256
llvm::BFIDOTGraphTraitsBase::NodeIter
typename GTraits::nodes_iterator NodeIter
Definition: BlockFrequencyInfoImpl.h:1817
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::BlockFrequencyInfoImplBase::packageLoop
void packageLoop(LoopData &Loop)
Package up a loop.
Definition: BlockFrequencyInfoImpl.cpp:422
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::bfi_detail::IrreducibleGraph::IrrNode::succ_end
iterator succ_end() const
Definition: BlockFrequencyInfoImpl.h:620
llvm::BlockFrequencyInfoImplBase::LoopData::Mass
BlockMass Mass
Definition: BlockFrequencyInfoImpl.h:234
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GraphTraits.h
SparseBitVector.h
llvm::BFIDOTGraphTraitsBase::getNodeLabel
std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph, GVDAGType GType, int layout_order=-1)
Definition: BlockFrequencyInfoImpl.h:1858
CommandLine.h
llvm::BlockFrequencyInfoImplBase::updateLoopWithIrreducible
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
Definition: BlockFrequencyInfoImpl.cpp:825
llvm::SparseBitVector
Definition: SparseBitVector.h:255
llvm::GVDT_Fraction
@ GVDT_Fraction
Definition: BlockFrequencyInfoImpl.h:1810
llvm::bfi_detail::BlockMass::operator>=
bool operator>=(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:138
llvm::BlockFrequencyInfoImplBase::WorkingData::isAPackage
bool isAPackage() const
Has Loop been packaged up?
Definition: BlockFrequencyInfoImpl.h:344
llvm::BlockFrequencyInfoImpl::print
raw_ostream & print(raw_ostream &OS) const override
Print the frequencies for the current function.
Definition: BlockFrequencyInfoImpl.h:1729
llvm::po_begin
po_iterator< T > po_begin(const T &G)
Definition: PostOrderIterator.h:184
llvm::MachineBranchProbabilityInfo
Definition: MachineBranchProbabilityInfo.h:24
llvm::BlockFrequencyInfoImplBase::finalizeMetrics
void finalizeMetrics()
Finalize frequency metrics.
Definition: BlockFrequencyInfoImpl.cpp:552
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::bfi_detail::BlockMass::dump
void dump() const
Definition: BlockFrequencyInfoImpl.cpp:72
llvm::BranchProbability::getDenominator
static uint32_t getDenominator()
Definition: BranchProbability.h:66
llvm::BlockFrequencyInfoImplBase::WorkingData::getResolvedNode
BlockNode getResolvedNode() const
Resolve a node to its representative.
Definition: BlockFrequencyInfoImpl.h:313
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::BlockFrequencyInfoImplBase::WorkingData::getMass
BlockMass & getMass()
Get the appropriate mass for a node.
Definition: BlockFrequencyInfoImpl.h:332
llvm::bfi_detail::IrreducibleGraph::addEdge
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
Definition: BlockFrequencyInfoImpl.cpp:682
llvm::BlockFrequencyInfoImplBase::getProfileCountFromFreq
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.cpp:598
llvm::bfi_detail::IrreducibleGraph::StartIrr
const IrrNode * StartIrr
Definition: BlockFrequencyInfoImpl.h:623
Twine.h
llvm::bfi_detail::IrreducibleGraph::Start
BlockNode Start
Definition: BlockFrequencyInfoImpl.h:622
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::bfi_detail::IrreducibleGraph::initialize
void initialize(const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
Definition: BlockFrequencyInfoImpl.h:662
llvm::BlockFrequencyInfoImplBase::~BlockFrequencyInfoImplBase
virtual ~BlockFrequencyInfoImplBase()=default
Virtual destructor.
llvm::bfi_detail::BlockMass::toScaled
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
Definition: BlockFrequencyInfoImpl.cpp:65
llvm::BlockFrequencyInfoImplBase::getBlockFreq
BlockFrequency getBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:574
llvm::BitTracker
Definition: BitTracker.h:35
llvm::BlockFrequencyInfoImplBase::LoopData::isHeader
bool isHeader(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.h:249
DenseSet.h
llvm::BlockFrequencyInfoImplBase::BlockNode::BlockNode
BlockNode()
Definition: BlockFrequencyInfoImpl.h:196
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::bfi_detail::IrreducibleGraph::BFI
BFIBase & BFI
Definition: BlockFrequencyInfoImpl.h:605
llvm::BlockFrequencyInfoImpl::printBlockFreq
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockT *BB) const
Definition: BlockFrequencyInfoImpl.h:1072
llvm::BlockFrequencyInfoImplBase::distributeMass
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
Definition: BlockFrequencyInfoImpl.cpp:447
llvm::bfi_detail::IrreducibleGraph::IrrNode::iterator
std::deque< const IrrNode * >::const_iterator iterator
Definition: BlockFrequencyInfoImpl.h:615
llvm::bfi_detail::BlockMass::getFull
static BlockMass getFull()
Definition: BlockFrequencyInfoImpl.h:100
llvm::bfi_detail::operator*
BlockMass operator*(BlockMass L, BranchProbability R)
Definition: BlockFrequencyInfoImpl.h:158
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::BFIDOTGraphTraitsBase::getEdgeAttributes
std::string getEdgeAttributes(NodeRef Node, EdgeIter EI, const BlockFrequencyInfoT *BFI, const BranchProbabilityInfoT *BPI, unsigned HotPercentThreshold=0)
Definition: BlockFrequencyInfoImpl.h:1889
llvm::BlockFrequencyInfoImplBase::Weight::TargetNode
BlockNode TargetNode
Definition: BlockFrequencyInfoImpl.h:368
llvm::bfi_detail::BFICallbackVH< BasicBlock, BFIImplT >::deleted
void deleted() override
Callback for Value destruction.
Definition: BlockFrequencyInfoImpl.h:1093
llvm::BlockFrequencyInfoImplBase::Distribution
Distribution of unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:384
llvm::BlockFrequencyInfoImplBase::WorkingData::isLoopHeader
bool isLoopHeader() const
Definition: BlockFrequencyInfoImpl.h:285
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
BranchProbability.h
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::BlockFrequencyInfoImplBase::WorkingData::isDoubleLoopHeader
bool isDoubleLoopHeader() const
Definition: BlockFrequencyInfoImpl.h:287
llvm::BlockFrequencyInfoImpl::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockT *BB, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.h:1023
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::BlockFrequencyInfoImplBase::Distribution::normalize
void normalize()
Normalize the distribution.
Definition: BlockFrequencyInfoImpl.cpp:236
llvm::bfi_detail::IrreducibleGraph::IrrNode::pred_end
iterator pred_end() const
Definition: BlockFrequencyInfoImpl.h:619
llvm::BlockFrequencyInfoImpl::getProfileCountFromFreq
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq, bool AllowSynthetic=false) const
Definition: BlockFrequencyInfoImpl.h:1030
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
BasicBlock.h
llvm::cl::opt< bool >
llvm::BlockFrequencyInfoImplBase::printBlockFreq
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:647
llvm::BlockFrequencyInfoImplBase::LoopData::isIrreducible
bool isIrreducible() const
Definition: BlockFrequencyInfoImpl.h:257
llvm::BlockFrequencyInfoImplBase::BlockNode::isValid
bool isValid() const
Definition: BlockFrequencyInfoImpl.h:206
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::BlockFrequency
Definition: BlockFrequency.h:24
llvm::bfi_detail::operator-
BlockMass operator-(BlockMass L, BlockMass R)
Definition: BlockFrequencyInfoImpl.h:155
llvm::BlockFrequencyInfoImplBase::Distribution::addBackedge
void addBackedge(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:401
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::bfi_detail::BlockEdgesAdder::operator()
void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, const LoopData *OuterLoop)
Definition: BlockFrequencyInfoImpl.h:1660
llvm::bfi_detail::BlockMass::operator!
bool operator!() const
Definition: BlockFrequencyInfoImpl.h:109
llvm::BlockFrequencyInfoImplBase::Weight::Exit
@ Exit
Definition: BlockFrequencyInfoImpl.h:366
llvm::DenseMap
Definition: DenseMap.h:714
llvm::bfi_detail::BlockEdgesAdder
Definition: BlockFrequencyInfoImpl.h:75
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::bfi_detail::BlockMass::operator-=
BlockMass & operator-=(BlockMass X)
Subtract another mass.
Definition: BlockFrequencyInfoImpl.h:124
llvm::BlockFrequencyInfoImplBase::FrequencyData::Scaled
Scaled64 Scaled
Definition: BlockFrequencyInfoImpl.h:215
llvm::BlockFrequencyInfoImplBase::WorkingData
Index of loop information.
Definition: BlockFrequencyInfoImpl.h:278
llvm::bfi_detail::IrreducibleGraph::IrrNode
Definition: BlockFrequencyInfoImpl.h:608
llvm::BlockFrequencyInfoImplBase::Weight::DistType
DistType
Definition: BlockFrequencyInfoImpl.h:366
llvm::IterativeBFIPrecision
llvm::cl::opt< double > IterativeBFIPrecision
llvm::BlockFrequencyInfoImplBase::Working
std::vector< WorkingData > Working
Loop data: see initializeLoops().
Definition: BlockFrequencyInfoImpl.h:428
llvm::SmallVectorImpl< BlockNode >::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:563
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::BlockFrequencyInfoImplBase::BlockNode::operator<
bool operator<(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:203
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::BlockFrequencyInfoImplBase::addLoopSuccessorsToDist
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:374
llvm::children
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
Definition: GraphTraits.h:121
llvm::bfi_detail::IrreducibleGraph::addNodesInFunction
void addNodesInFunction()
Definition: BlockFrequencyInfoImpl.cpp:669
llvm::BFIDOTGraphTraitsBase::EdgeIter
typename GTraits::ChildIteratorType EdgeIter
Definition: BlockFrequencyInfoImpl.h:1816
llvm::BlockFrequencyInfoImplBase::Weight::Weight
Weight(DistType Type, BlockNode TargetNode, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:372
iterator_range.h
llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
llvm::bfi_detail::BlockMass::operator+=
BlockMass & operator+=(BlockMass X)
Add another mass.
Definition: BlockFrequencyInfoImpl.h:114
llvm::BlockFrequencyInfoImplBase::unwrapLoops
void unwrapLoops()
Unwrap loops.
Definition: BlockFrequencyInfoImpl.cpp:543
llvm::bfi_detail::BlockMass::operator>
bool operator>(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:140
llvm::BlockFrequencyInfoImplBase::WorkingData::isADoublePackage
bool isADoublePackage() const
Has Loop been packaged up twice?
Definition: BlockFrequencyInfoImpl.h:347
llvm::bfi_detail::BlockMass::getEmpty
static BlockMass getEmpty()
Definition: BlockFrequencyInfoImpl.h:98
llvm::bfi_detail::BFICallbackVH< MachineBasicBlock, BFIImplT >::BFICallbackVH
BFICallbackVH(const MachineBasicBlock *, BFIImplT *)
Definition: BlockFrequencyInfoImpl.h:1104
llvm::GVDT_Integer
@ GVDT_Integer
Definition: BlockFrequencyInfoImpl.h:1810
llvm::BlockFrequencyInfoImplBase::BlockNode::operator>
bool operator>(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:204
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::BFIDOTGraphTraitsBase::getNodeAttributes
std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph, unsigned HotPercentThreshold=0)
Definition: BlockFrequencyInfoImpl.h:1828
llvm::bfi_detail::BlockMass::operator*=
BlockMass & operator*=(BranchProbability P)
Definition: BlockFrequencyInfoImpl.h:130
llvm::BlockFrequencyInfoImplBase::BlockNode::getMaxIndex
static size_t getMaxIndex()
Definition: BlockFrequencyInfoImpl.h:208
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::BlockFrequencyInfoImplBase::FrequencyData::Integer
uint64_t Integer
Definition: BlockFrequencyInfoImpl.h:216
llvm::BlockFrequencyInfoImpl::BlockFrequencyInfoImpl
BlockFrequencyInfoImpl()=default
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::CheckBFIUnknownBlockQueries
llvm::cl::opt< bool > CheckBFIUnknownBlockQueries
llvm::BlockFrequencyInfoImplBase::Weight::Backedge
@ Backedge
Definition: BlockFrequencyInfoImpl.h:366
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::BlockFrequencyInfoImplBase::WorkingData::Mass
BlockMass Mass
Mass distribution from the entry block.
Definition: BlockFrequencyInfoImpl.h:281
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::bfi_detail::IrreducibleGraph::addEdges
void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges)
Definition: BlockFrequencyInfoImpl.h:677
llvm::DefaultDOTGraphTraits::isSimple
bool isSimple()
Definition: DOTGraphTraits.h:33
llvm::BlockFrequencyInfoImplBase::BlockNode::Index
IndexType Index
Definition: BlockFrequencyInfoImpl.h:194
uint32_t
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::bfi_detail::BlockMass::isEmpty
bool isEmpty() const
Definition: BlockFrequencyInfoImpl.h:107
llvm::BlockFrequencyInfoImplBase::Distribution::DidOverflow
bool DidOverflow
Whether Total did overflow.
Definition: BlockFrequencyInfoImpl.h:389
llvm::BlockFrequencyInfoImplBase::print
virtual raw_ostream & print(raw_ostream &OS) const
Definition: BlockFrequencyInfoImpl.h:519
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::BlockFrequencyInfoImplBase
Base class for BlockFrequencyInfoImpl.
Definition: BlockFrequencyInfoImpl.h:179
Node
Definition: ItaniumDemangle.h:235
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::BlockFrequencyInfoImplBase::LoopData::Exits
ExitMap Exits
Successor edges (and weights).
Definition: BlockFrequencyInfoImpl.h:231
ValueHandle.h
ScaledNumber.h
llvm::bfi_detail::BlockMass::operator<
bool operator<(BlockMass X) const
Definition: BlockFrequencyInfoImpl.h:139
llvm::BlockFrequencyInfoImplBase::Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: BlockFrequencyInfoImpl.h:181
llvm::BlockFrequencyInfoImpl::getFunction
const FunctionT * getFunction() const
Definition: BlockFrequencyInfoImpl.h:1012
llvm::ScaledNumber< uint64_t >
llvm::BlockFrequencyInfoImplBase::Weight::Weight
Weight()=default
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::bfi_detail::IrreducibleGraph::IrrNode::NumIn
unsigned NumIn
Definition: BlockFrequencyInfoImpl.h:610
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
std
Definition: BitVector.h:838
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::bfi_detail::operator<<
raw_ostream & operator<<(raw_ostream &OS, BlockMass X)
Definition: BlockFrequencyInfoImpl.h:165
llvm::bfi_detail::IrreducibleGraph::IrrNode::IrrNode
IrrNode(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.h:613
llvm::bfi_detail::IrreducibleGraph::BlockNode
BFIBase::BlockNode BlockNode
Definition: BlockFrequencyInfoImpl.h:607
llvm::bfi_detail::BFICallbackVH< BasicBlock, BFIImplT >::BFICallbackVH
BFICallbackVH(const BasicBlock *BB, BFIImplT *BFIImpl)
Definition: BlockFrequencyInfoImpl.h:1088
llvm::BlockFrequencyInfoImplBase::adjustLoopHeaderMass
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
Definition: BlockFrequencyInfoImpl.cpp:836
llvm::BlockFrequencyInfoImplBase::LoopData::LoopData
LoopData(LoopData *Parent, const BlockNode &Header)
Definition: BlockFrequencyInfoImpl.h:237
llvm::BFIDOTGraphTraitsBase::NodeRef
typename GTraits::NodeRef NodeRef
Definition: BlockFrequencyInfoImpl.h:1815
llvm::BlockFrequencyInfoImplBase::getFloatingBlockFreq
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:623
llvm::BlockFrequencyInfoImplBase::Distribution::Weights
WeightList Weights
Individual successor weights.
Definition: BlockFrequencyInfoImpl.h:387
DOTGraphTraits.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:100
llvm::BFIDOTGraphTraitsBase
Definition: BlockFrequencyInfoImpl.h:1813
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::BlockFrequencyInfoImpl::getBPI
const BranchProbabilityInfoT & getBPI() const
Definition: BlockFrequencyInfoImpl.h:1054
llvm::BlockFrequencyInfoImplBase::BlockNode::operator<=
bool operator<=(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:201
llvm::BlockFrequencyInfoImpl::getBlockFreq
BlockFrequency getBlockFreq(const BlockT *BB) const
Definition: BlockFrequencyInfoImpl.h:1019
llvm::BlockFrequencyInfoImplBase::setBlockFreq
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
Definition: BlockFrequencyInfoImpl.cpp:629
llvm::bfi_detail::BlockMass::BlockMass
BlockMass()=default
llvm::BlockFrequencyInfoImpl::verifyMatch
void verifyMatch(BlockFrequencyInfoImpl< BT > &Other) const
Definition: BlockFrequencyInfoImpl.h:1753
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:42
llvm::BlockFrequencyInfoImplBase::LoopData::members
iterator_range< NodeList::const_iterator > members() const
Definition: BlockFrequencyInfoImpl.h:272
llvm::BlockFrequencyInfoImplBase::Weight
Unscaled probability weight.
Definition: BlockFrequencyInfoImpl.h:365
llvm::BlockFrequencyInfoImplBase::LoopData::Scale
Scaled64 Scale
Definition: BlockFrequencyInfoImpl.h:235
llvm::BlockFrequencyInfoImplBase::LoopData::Nodes
NodeList Nodes
Header and the members of the loop.
Definition: BlockFrequencyInfoImpl.h:232
llvm::CallbackVH
Value handle with callbacks on RAUW and destruction.
Definition: ValueHandle.h:383
llvm::bfi_detail::BlockMass::print
raw_ostream & print(raw_ostream &OS) const
Definition: BlockFrequencyInfoImpl.cpp:82
llvm::GVDT_Count
@ GVDT_Count
Definition: BlockFrequencyInfoImpl.h:1810
llvm::BlockFrequencyInfoImplBase::Freqs
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
Definition: BlockFrequencyInfoImpl.h:421
llvm::bfi_detail::BlockMass::getMass
uint64_t getMass() const
Definition: BlockFrequencyInfoImpl.h:104
llvm::BlockFrequencyInfoImplBase::IsIrrLoopHeader
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
Definition: BlockFrequencyInfoImpl.h:425
llvm::BlockFrequencyInfoImplBase::WorkingData::getPackagedLoop
LoopData * getPackagedLoop() const
Definition: BlockFrequencyInfoImpl.h:318
PostOrderIterator.h
llvm::BlockFrequencyInfoImplBase::LoopData::BackedgeMass
HeaderMassList BackedgeMass
Mass returned to each loop header.
Definition: BlockFrequencyInfoImpl.h:233
SmallVector.h
llvm::bfi_detail::BlockEdgesAdder::BlockEdgesAdder
BlockEdgesAdder(const BlockFrequencyInfoImpl< BT > &BFI)
Definition: BlockFrequencyInfoImpl.h:1657
llvm::GVDT_None
@ GVDT_None
Definition: BlockFrequencyInfoImpl.h:1810
BT
BitTracker BT
Definition: BitTracker.cpp:73
llvm::IterativeBFIMaxIterationsPerBlock
llvm::cl::opt< unsigned > IterativeBFIMaxIterationsPerBlock
llvm::BlockFrequencyInfoImplBase::Distribution::Distribution
Distribution()=default
N
#define N
llvm::AssertingVH
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:260
llvm::bfi_detail::getBlockName
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Definition: BlockFrequencyInfoImpl.h:575
llvm::BFIDOTGraphTraitsBase::getGraphName
static StringRef getGraphName(const BlockFrequencyInfoT *G)
Definition: BlockFrequencyInfoImpl.h:1824
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::BlockFrequencyInfoImplBase::BlockNode::operator>=
bool operator>=(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:202
llvm::bfi_detail::IrreducibleGraph::IrrNode::Node
BlockNode Node
Definition: BlockFrequencyInfoImpl.h:609
llvm::BlockFrequencyInfoImpl
Shared implementation for block frequency analysis.
Definition: BlockFrequencyInfo.h:31
llvm::DefaultDOTGraphTraits
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Definition: DOTGraphTraits.h:28
llvm::BlockFrequencyInfoImplBase::getBlockName
virtual std::string getBlockName(const BlockNode &Node) const
Definition: BlockFrequencyInfoImpl.cpp:637
llvm::BlockFrequencyInfoImplBase::LoopData::getHeaderIndex
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
Definition: BlockFrequencyInfoImpl.h:259
llvm::BlockFrequencyInfoImplBase::BlockNode::operator==
bool operator==(const BlockNode &X) const
Definition: BlockFrequencyInfoImpl.h:199
llvm::getWeightFromBranchProb
uint32_t getWeightFromBranchProb(const BranchProbability Prob)
Definition: BlockFrequencyInfoImpl.h:1694
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::BlockFrequencyInfoImplBase::isIrrLoopHeader
bool isIrrLoopHeader(const BlockNode &Node)
Definition: BlockFrequencyInfoImpl.cpp:616
llvm::GraphTraits::NodeRef
typename GraphType::UnknownGraphTypeError NodeRef
Definition: GraphTraits.h:78
llvm::GraphTraits
Definition: GraphTraits.h:35
llvm::BlockFrequencyInfoImplBase::LoopData
Data about a loop.
Definition: BlockFrequencyInfoImpl.h:223
llvm::bfi_detail::IrreducibleGraph::indexNodes
void indexNodes()
Definition: BlockFrequencyInfoImpl.cpp:677
llvm::bfi_detail::IrreducibleGraph::Nodes
std::vector< IrrNode > Nodes
Definition: BlockFrequencyInfoImpl.h:624
llvm::UseIterativeBFIInference
llvm::cl::opt< bool > UseIterativeBFIInference
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
raw_ostream.h
llvm::bfi_detail::IrreducibleGraph::Lookup
SmallDenseMap< uint32_t, IrrNode *, 4 > Lookup
Definition: BlockFrequencyInfoImpl.h:625
copy
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Definition: README.txt:101
llvm::pdb::PDB_SymType::Block
@ Block
llvm::BlockFrequencyInfoImplBase::Distribution::addExit
void addExit(const BlockNode &Node, uint64_t Amount)
Definition: BlockFrequencyInfoImpl.h:397
llvm::BlockFrequencyInfoImplBase::BlockNode
Representative of a block.
Definition: BlockFrequencyInfoImpl.h:191
llvm::BlockFrequencyInfoImplBase::Distribution::Total
uint64_t Total
Sum of all weights.
Definition: BlockFrequencyInfoImpl.h:388
Debug.h
llvm::bfi_detail::IrreducibleGraph::IrrNode::Edges
std::deque< const IrrNode * > Edges
Definition: BlockFrequencyInfoImpl.h:611
llvm::bfi_detail::IrreducibleGraph::IrrNode::succ_begin
iterator succ_begin() const
Definition: BlockFrequencyInfoImpl.h:618
llvm::BlockFrequencyInfoImplBase::addToDist
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
Definition: BlockFrequencyInfoImpl.cpp:312
llvm::BlockFrequencyInfoImplBase::analyzeIrreducible
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
Definition: BlockFrequencyInfoImpl.cpp:805
llvm::BlockFrequencyInfoImplBase::BlockNode::BlockNode
BlockNode(IndexType Index)
Definition: BlockFrequencyInfoImpl.h:197
llvm::bfi_detail::BlockMass::BlockMass
BlockMass(uint64_t Mass)
Definition: BlockFrequencyInfoImpl.h:96
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184
llvm::BFIDOTGraphTraitsBase::MaxFrequency
uint64_t MaxFrequency
Definition: BlockFrequencyInfoImpl.h:1819
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
llvm::BlockFrequencyInfoImplBase::LoopData::IsPackaged
bool IsPackaged
Whether this has been packaged.
Definition: BlockFrequencyInfoImpl.h:229
llvm::GVDAGType
GVDAGType
Definition: BlockFrequencyInfoImpl.h:1810