LLVM  17.0.0git
CodeLayout.cpp
Go to the documentation of this file.
1 //===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // ExtTSP - layout of basic blocks with i-cache optimization.
10 //
11 // The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
12 // optimizing jump locality and thus processor I-cache utilization. This is
13 // achieved via increasing the number of fall-through jumps and co-locating
14 // frequently executed nodes together. The name follows the underlying
15 // optimization problem, Extended-TSP, which is a generalization of classical
16 // (maximum) Traveling Salesmen Problem.
17 //
18 // The algorithm is a greedy heuristic that works with chains (ordered lists)
19 // of basic blocks. Initially all chains are isolated basic blocks. On every
20 // iteration, we pick a pair of chains whose merging yields the biggest increase
21 // in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
22 // A pair of chains giving the maximum gain is merged into a new chain. The
23 // procedure stops when there is only one chain left, or when merging does not
24 // increase ExtTSP. In the latter case, the remaining chains are sorted by
25 // density in the decreasing order.
26 //
27 // An important aspect is the way two chains are merged. Unlike earlier
28 // algorithms (e.g., based on the approach of Pettis-Hansen), two
29 // chains, X and Y, are first split into three, X1, X2, and Y. Then we
30 // consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
31 // X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
32 // This improves the quality of the final result (the search space is larger)
33 // while keeping the implementation sufficiently fast.
34 //
35 // Reference:
36 // * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
37 // IEEE Transactions on Computers, 2020
38 // https://arxiv.org/abs/1809.04676
39 //
40 //===----------------------------------------------------------------------===//
41 
44 
45 #include <cmath>
46 
47 using namespace llvm;
48 #define DEBUG_TYPE "code-layout"
49 
51  "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
52  cl::desc("Enable machine block placement based on the ext-tsp model, "
53  "optimizing I-cache utilization."));
54 
56  "ext-tsp-apply-without-profile",
57  cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
58  cl::init(true), cl::Hidden);
59 
60 // Algorithm-specific params. The values are tuned for the best performance
61 // of large-scale front-end bound binaries.
63  "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
64  cl::desc("The weight of conditional forward jumps for ExtTSP value"));
65 
67  "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
68  cl::desc("The weight of unconditional forward jumps for ExtTSP value"));
69 
71  "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
72  cl::desc("The weight of conditonal backward jumps for ExtTSP value"));
73 
75  "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
76  cl::desc("The weight of unconditonal backward jumps for ExtTSP value"));
77 
79  "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
80  cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));
81 
83  "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05),
84  cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));
85 
87  "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024),
88  cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
89 
91  "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640),
92  cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
93 
94 // The maximum size of a chain created by the algorithm. The size is bounded
95 // so that the algorithm can efficiently process extremely large instance.
96 static cl::opt<unsigned>
97  MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
98  cl::desc("The maximum size of a chain to create."));
99 
100 // The maximum size of a chain for splitting. Larger values of the threshold
101 // may yield better quality at the cost of worsen run-time.
103  "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
104  cl::desc("The maximum size of a chain to apply splitting"));
105 
106 // The option enables splitting (large) chains along in-coming and out-going
107 // jumps. This typically results in a better quality.
109  "ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true),
110  cl::desc("The maximum size of a chain to apply splitting"));
111 
112 namespace {
113 
114 // Epsilon for comparison of doubles.
115 constexpr double EPS = 1e-8;
116 
117 // Compute the Ext-TSP score for a given jump.
118 double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count,
119  double Weight) {
120  if (JumpDist > JumpMaxDist)
121  return 0;
122  double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;
123  return Weight * Prob * Count;
124 }
125 
126 // Compute the Ext-TSP score for a jump between a given pair of blocks,
127 // using their sizes, (estimated) addresses and the jump execution count.
128 double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
129  uint64_t Count, bool IsConditional) {
130  // Fallthrough
131  if (SrcAddr + SrcSize == DstAddr) {
132  return jumpExtTSPScore(0, 1, Count,
133  IsConditional ? FallthroughWeightCond
135  }
136  // Forward
137  if (SrcAddr + SrcSize < DstAddr) {
138  const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
139  return jumpExtTSPScore(Dist, ForwardDistance, Count,
140  IsConditional ? ForwardWeightCond
142  }
143  // Backward
144  const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
145  return jumpExtTSPScore(Dist, BackwardDistance, Count,
146  IsConditional ? BackwardWeightCond
148 }
149 
150 /// A type of merging two chains, X and Y. The former chain is split into
151 /// X1 and X2 and then concatenated with Y in the order specified by the type.
152 enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
153 
154 /// The gain of merging two chains, that is, the Ext-TSP score of the merge
155 /// together with the corresponfiding merge 'type' and 'offset'.
156 class MergeGainTy {
157 public:
158  explicit MergeGainTy() = default;
159  explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType)
160  : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
161 
162  double score() const { return Score; }
163 
164  size_t mergeOffset() const { return MergeOffset; }
165 
166  MergeTypeTy mergeType() const { return MergeType; }
167 
168  // Returns 'true' iff Other is preferred over this.
169  bool operator<(const MergeGainTy &Other) const {
170  return (Other.Score > EPS && Other.Score > Score + EPS);
171  }
172 
173  // Update the current gain if Other is preferred over this.
174  void updateIfLessThan(const MergeGainTy &Other) {
175  if (*this < Other)
176  *this = Other;
177  }
178 
179 private:
180  double Score{-1.0};
181  size_t MergeOffset{0};
182  MergeTypeTy MergeType{MergeTypeTy::X_Y};
183 };
184 
185 class Jump;
186 class Chain;
187 class ChainEdge;
188 
189 /// A node in the graph, typically corresponding to a basic block in CFG.
190 class Block {
191 public:
192  Block(const Block &) = delete;
193  Block(Block &&) = default;
194  Block &operator=(const Block &) = delete;
195  Block &operator=(Block &&) = default;
196 
197  // The original index of the block in CFG.
198  size_t Index{0};
199  // The index of the block in the current chain.
200  size_t CurIndex{0};
201  // Size of the block in the binary.
202  uint64_t Size{0};
203  // Execution count of the block in the profile data.
204  uint64_t ExecutionCount{0};
205  // Current chain of the node.
206  Chain *CurChain{nullptr};
207  // An offset of the block in the current chain.
208  mutable uint64_t EstimatedAddr{0};
209  // Forced successor of the block in CFG.
210  Block *ForcedSucc{nullptr};
211  // Forced predecessor of the block in CFG.
212  Block *ForcedPred{nullptr};
213  // Outgoing jumps from the block.
214  std::vector<Jump *> OutJumps;
215  // Incoming jumps to the block.
216  std::vector<Jump *> InJumps;
217 
218 public:
219  explicit Block(size_t Index, uint64_t Size, uint64_t EC)
220  : Index(Index), Size(Size), ExecutionCount(EC) {}
221  bool isEntry() const { return Index == 0; }
222 };
223 
224 /// An arc in the graph, typically corresponding to a jump between two blocks.
225 class Jump {
226 public:
227  Jump(const Jump &) = delete;
228  Jump(Jump &&) = default;
229  Jump &operator=(const Jump &) = delete;
230  Jump &operator=(Jump &&) = default;
231 
232  // Source block of the jump.
233  Block *Source;
234  // Target block of the jump.
235  Block *Target;
236  // Execution count of the arc in the profile data.
237  uint64_t ExecutionCount{0};
238  // Whether the jump corresponds to a conditional branch.
239  bool IsConditional{false};
240 
241 public:
242  explicit Jump(Block *Source, Block *Target, uint64_t ExecutionCount)
243  : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
244 };
245 
246 /// A chain (ordered sequence) of blocks.
247 class Chain {
248 public:
249  Chain(const Chain &) = delete;
250  Chain(Chain &&) = default;
251  Chain &operator=(const Chain &) = delete;
252  Chain &operator=(Chain &&) = default;
253 
254  explicit Chain(uint64_t Id, Block *Block)
255  : Id(Id), Score(0), Blocks(1, Block) {}
256 
257  uint64_t id() const { return Id; }
258 
259  bool isEntry() const { return Blocks[0]->Index == 0; }
260 
261  bool isCold() const {
262  for (auto *Block : Blocks) {
263  if (Block->ExecutionCount > 0)
264  return false;
265  }
266  return true;
267  }
268 
269  double score() const { return Score; }
270 
271  void setScore(double NewScore) { Score = NewScore; }
272 
273  const std::vector<Block *> &blocks() const { return Blocks; }
274 
275  size_t numBlocks() const { return Blocks.size(); }
276 
277  const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
278  return Edges;
279  }
280 
281  ChainEdge *getEdge(Chain *Other) const {
282  for (auto It : Edges) {
283  if (It.first == Other)
284  return It.second;
285  }
286  return nullptr;
287  }
288 
289  void removeEdge(Chain *Other) {
290  auto It = Edges.begin();
291  while (It != Edges.end()) {
292  if (It->first == Other) {
293  Edges.erase(It);
294  return;
295  }
296  It++;
297  }
298  }
299 
300  void addEdge(Chain *Other, ChainEdge *Edge) {
301  Edges.push_back(std::make_pair(Other, Edge));
302  }
303 
304  void merge(Chain *Other, const std::vector<Block *> &MergedBlocks) {
305  Blocks = MergedBlocks;
306  // Update the block's chains
307  for (size_t Idx = 0; Idx < Blocks.size(); Idx++) {
308  Blocks[Idx]->CurChain = this;
309  Blocks[Idx]->CurIndex = Idx;
310  }
311  }
312 
313  void mergeEdges(Chain *Other);
314 
315  void clear() {
316  Blocks.clear();
317  Blocks.shrink_to_fit();
318  Edges.clear();
319  Edges.shrink_to_fit();
320  }
321 
322 private:
323  // Unique chain identifier.
324  uint64_t Id;
325  // Cached ext-tsp score for the chain.
326  double Score;
327  // Blocks of the chain.
328  std::vector<Block *> Blocks;
329  // Adjacent chains and corresponding edges (lists of jumps).
330  std::vector<std::pair<Chain *, ChainEdge *>> Edges;
331 };
332 
333 /// An edge in CFG representing jumps between two chains.
334 /// When blocks are merged into chains, the edges are combined too so that
335 /// there is always at most one edge between a pair of chains
336 class ChainEdge {
337 public:
338  ChainEdge(const ChainEdge &) = delete;
339  ChainEdge(ChainEdge &&) = default;
340  ChainEdge &operator=(const ChainEdge &) = delete;
341  ChainEdge &operator=(ChainEdge &&) = default;
342 
343  explicit ChainEdge(Jump *Jump)
344  : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
345  Jumps(1, Jump) {}
346 
347  const std::vector<Jump *> &jumps() const { return Jumps; }
348 
349  void changeEndpoint(Chain *From, Chain *To) {
350  if (From == SrcChain)
351  SrcChain = To;
352  if (From == DstChain)
353  DstChain = To;
354  }
355 
356  void appendJump(Jump *Jump) { Jumps.push_back(Jump); }
357 
358  void moveJumps(ChainEdge *Other) {
359  Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
360  Other->Jumps.clear();
361  Other->Jumps.shrink_to_fit();
362  }
363 
364  bool hasCachedMergeGain(Chain *Src, Chain *Dst) const {
365  return Src == SrcChain ? CacheValidForward : CacheValidBackward;
366  }
367 
368  MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst) const {
369  return Src == SrcChain ? CachedGainForward : CachedGainBackward;
370  }
371 
372  void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) {
373  if (Src == SrcChain) {
374  CachedGainForward = MergeGain;
375  CacheValidForward = true;
376  } else {
377  CachedGainBackward = MergeGain;
378  CacheValidBackward = true;
379  }
380  }
381 
382  void invalidateCache() {
383  CacheValidForward = false;
384  CacheValidBackward = false;
385  }
386 
387 private:
388  // Source chain.
389  Chain *SrcChain{nullptr};
390  // Destination chain.
391  Chain *DstChain{nullptr};
392  // Original jumps in the binary with correspinding execution counts.
393  std::vector<Jump *> Jumps;
394  // Cached ext-tsp value for merging the pair of chains.
395  // Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
396  // we store both values here.
397  MergeGainTy CachedGainForward;
398  MergeGainTy CachedGainBackward;
399  // Whether the cached value must be recomputed.
400  bool CacheValidForward{false};
401  bool CacheValidBackward{false};
402 };
403 
404 void Chain::mergeEdges(Chain *Other) {
405  assert(this != Other && "cannot merge a chain with itself");
406 
407  // Update edges adjacent to chain Other
408  for (auto EdgeIt : Other->Edges) {
409  Chain *DstChain = EdgeIt.first;
410  ChainEdge *DstEdge = EdgeIt.second;
411  Chain *TargetChain = DstChain == Other ? this : DstChain;
412  ChainEdge *CurEdge = getEdge(TargetChain);
413  if (CurEdge == nullptr) {
414  DstEdge->changeEndpoint(Other, this);
415  this->addEdge(TargetChain, DstEdge);
416  if (DstChain != this && DstChain != Other) {
417  DstChain->addEdge(this, DstEdge);
418  }
419  } else {
420  CurEdge->moveJumps(DstEdge);
421  }
422  // Cleanup leftover edge
423  if (DstChain != Other) {
424  DstChain->removeEdge(Other);
425  }
426  }
427 }
428 
429 using BlockIter = std::vector<Block *>::const_iterator;
430 
431 /// A wrapper around three chains of blocks; it is used to avoid extra
432 /// instantiation of the vectors.
433 class MergedChain {
434 public:
435  MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(),
436  BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(),
437  BlockIter End3 = BlockIter())
438  : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
439  End3(End3) {}
440 
441  template <typename F> void forEach(const F &Func) const {
442  for (auto It = Begin1; It != End1; It++)
443  Func(*It);
444  for (auto It = Begin2; It != End2; It++)
445  Func(*It);
446  for (auto It = Begin3; It != End3; It++)
447  Func(*It);
448  }
449 
450  std::vector<Block *> getBlocks() const {
451  std::vector<Block *> Result;
452  Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
453  std::distance(Begin3, End3));
454  Result.insert(Result.end(), Begin1, End1);
455  Result.insert(Result.end(), Begin2, End2);
456  Result.insert(Result.end(), Begin3, End3);
457  return Result;
458  }
459 
460  const Block *getFirstBlock() const { return *Begin1; }
461 
462 private:
463  BlockIter Begin1;
464  BlockIter End1;
465  BlockIter Begin2;
466  BlockIter End2;
467  BlockIter Begin3;
468  BlockIter End3;
469 };
470 
471 /// The implementation of the ExtTSP algorithm.
472 class ExtTSPImpl {
473  using EdgeT = std::pair<uint64_t, uint64_t>;
474  using EdgeCountMap = std::vector<std::pair<EdgeT, uint64_t>>;
475 
476 public:
477  ExtTSPImpl(size_t NumNodes, const std::vector<uint64_t> &NodeSizes,
478  const std::vector<uint64_t> &NodeCounts,
479  const EdgeCountMap &EdgeCounts)
480  : NumNodes(NumNodes) {
481  initialize(NodeSizes, NodeCounts, EdgeCounts);
482  }
483 
484  /// Run the algorithm and return an optimized ordering of blocks.
485  void run(std::vector<uint64_t> &Result) {
486  // Pass 1: Merge blocks with their mutually forced successors
487  mergeForcedPairs();
488 
489  // Pass 2: Merge pairs of chains while improving the ExtTSP objective
490  mergeChainPairs();
491 
492  // Pass 3: Merge cold blocks to reduce code size
493  mergeColdChains();
494 
495  // Collect blocks from all chains
496  concatChains(Result);
497  }
498 
499 private:
500  /// Initialize the algorithm's data structures.
501  void initialize(const std::vector<uint64_t> &NodeSizes,
502  const std::vector<uint64_t> &NodeCounts,
503  const EdgeCountMap &EdgeCounts) {
504  // Initialize blocks
505  AllBlocks.reserve(NumNodes);
506  for (uint64_t Node = 0; Node < NumNodes; Node++) {
507  uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
508  uint64_t ExecutionCount = NodeCounts[Node];
509  // The execution count of the entry block is set to at least 1
510  if (Node == 0 && ExecutionCount == 0)
511  ExecutionCount = 1;
512  AllBlocks.emplace_back(Node, Size, ExecutionCount);
513  }
514 
515  // Initialize jumps between blocks
516  SuccNodes.resize(NumNodes);
517  PredNodes.resize(NumNodes);
518  std::vector<uint64_t> OutDegree(NumNodes, 0);
519  AllJumps.reserve(EdgeCounts.size());
520  for (auto It : EdgeCounts) {
521  auto Pred = It.first.first;
522  auto Succ = It.first.second;
523  OutDegree[Pred]++;
524  // Ignore self-edges
525  if (Pred == Succ)
526  continue;
527 
528  SuccNodes[Pred].push_back(Succ);
529  PredNodes[Succ].push_back(Pred);
530  auto ExecutionCount = It.second;
531  if (ExecutionCount > 0) {
532  auto &Block = AllBlocks[Pred];
533  auto &SuccBlock = AllBlocks[Succ];
534  AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount);
535  SuccBlock.InJumps.push_back(&AllJumps.back());
536  Block.OutJumps.push_back(&AllJumps.back());
537  }
538  }
539  for (auto &Jump : AllJumps) {
540  assert(OutDegree[Jump.Source->Index] > 0);
541  Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
542  }
543 
544  // Initialize chains
545  AllChains.reserve(NumNodes);
546  HotChains.reserve(NumNodes);
547  for (Block &Block : AllBlocks) {
548  AllChains.emplace_back(Block.Index, &Block);
549  Block.CurChain = &AllChains.back();
550  if (Block.ExecutionCount > 0) {
551  HotChains.push_back(&AllChains.back());
552  }
553  }
554 
555  // Initialize chain edges
556  AllEdges.reserve(AllJumps.size());
557  for (Block &Block : AllBlocks) {
558  for (auto &Jump : Block.OutJumps) {
559  auto SuccBlock = Jump->Target;
560  ChainEdge *CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
561  // this edge is already present in the graph
562  if (CurEdge != nullptr) {
563  assert(SuccBlock->CurChain->getEdge(Block.CurChain) != nullptr);
564  CurEdge->appendJump(Jump);
565  continue;
566  }
567  // this is a new edge
568  AllEdges.emplace_back(Jump);
569  Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back());
570  SuccBlock->CurChain->addEdge(Block.CurChain, &AllEdges.back());
571  }
572  }
573  }
574 
575  /// For a pair of blocks, A and B, block B is the forced successor of A,
576  /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
577  /// to B are from A. Such blocks should be adjacent in the optimal ordering;
578  /// the method finds and merges such pairs of blocks.
579  void mergeForcedPairs() {
580  // Find fallthroughs based on edge weights
581  for (auto &Block : AllBlocks) {
582  if (SuccNodes[Block.Index].size() == 1 &&
583  PredNodes[SuccNodes[Block.Index][0]].size() == 1 &&
584  SuccNodes[Block.Index][0] != 0) {
585  size_t SuccIndex = SuccNodes[Block.Index][0];
586  Block.ForcedSucc = &AllBlocks[SuccIndex];
587  AllBlocks[SuccIndex].ForcedPred = &Block;
588  }
589  }
590 
591  // There might be 'cycles' in the forced dependencies, since profile
592  // data isn't 100% accurate. Typically this is observed in loops, when the
593  // loop edges are the hottest successors for the basic blocks of the loop.
594  // Break the cycles by choosing the block with the smallest index as the
595  // head. This helps to keep the original order of the loops, which likely
596  // have already been rotated in the optimized manner.
597  for (auto &Block : AllBlocks) {
598  if (Block.ForcedSucc == nullptr || Block.ForcedPred == nullptr)
599  continue;
600 
601  auto SuccBlock = Block.ForcedSucc;
602  while (SuccBlock != nullptr && SuccBlock != &Block) {
603  SuccBlock = SuccBlock->ForcedSucc;
604  }
605  if (SuccBlock == nullptr)
606  continue;
607  // Break the cycle
608  AllBlocks[Block.ForcedPred->Index].ForcedSucc = nullptr;
609  Block.ForcedPred = nullptr;
610  }
611 
612  // Merge blocks with their fallthrough successors
613  for (auto &Block : AllBlocks) {
614  if (Block.ForcedPred == nullptr && Block.ForcedSucc != nullptr) {
615  auto CurBlock = &Block;
616  while (CurBlock->ForcedSucc != nullptr) {
617  const auto NextBlock = CurBlock->ForcedSucc;
618  mergeChains(Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y);
619  CurBlock = NextBlock;
620  }
621  }
622  }
623  }
624 
625  /// Merge pairs of chains while improving the ExtTSP objective.
626  void mergeChainPairs() {
627  /// Deterministically compare pairs of chains
628  auto compareChainPairs = [](const Chain *A1, const Chain *B1,
629  const Chain *A2, const Chain *B2) {
630  if (A1 != A2)
631  return A1->id() < A2->id();
632  return B1->id() < B2->id();
633  };
634 
635  while (HotChains.size() > 1) {
636  Chain *BestChainPred = nullptr;
637  Chain *BestChainSucc = nullptr;
638  auto BestGain = MergeGainTy();
639  // Iterate over all pairs of chains
640  for (Chain *ChainPred : HotChains) {
641  // Get candidates for merging with the current chain
642  for (auto EdgeIter : ChainPred->edges()) {
643  Chain *ChainSucc = EdgeIter.first;
644  class ChainEdge *ChainEdge = EdgeIter.second;
645  // Ignore loop edges
646  if (ChainPred == ChainSucc)
647  continue;
648 
649  // Stop early if the combined chain violates the maximum allowed size
650  if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
651  continue;
652 
653  // Compute the gain of merging the two chains
654  MergeGainTy CurGain =
655  getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
656  if (CurGain.score() <= EPS)
657  continue;
658 
659  if (BestGain < CurGain ||
660  (std::abs(CurGain.score() - BestGain.score()) < EPS &&
661  compareChainPairs(ChainPred, ChainSucc, BestChainPred,
662  BestChainSucc))) {
663  BestGain = CurGain;
664  BestChainPred = ChainPred;
665  BestChainSucc = ChainSucc;
666  }
667  }
668  }
669 
670  // Stop merging when there is no improvement
671  if (BestGain.score() <= EPS)
672  break;
673 
674  // Merge the best pair of chains
675  mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
676  BestGain.mergeType());
677  }
678  }
679 
680  /// Merge remaining blocks into chains w/o taking jump counts into
681  /// consideration. This allows to maintain the original block order in the
682  /// absense of profile data
683  void mergeColdChains() {
684  for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
685  // Iterating in reverse order to make sure original fallthrough jumps are
686  // merged first; this might be beneficial for code size.
687  size_t NumSuccs = SuccNodes[SrcBB].size();
688  for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
689  auto DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
690  auto SrcChain = AllBlocks[SrcBB].CurChain;
691  auto DstChain = AllBlocks[DstBB].CurChain;
692  if (SrcChain != DstChain && !DstChain->isEntry() &&
693  SrcChain->blocks().back()->Index == SrcBB &&
694  DstChain->blocks().front()->Index == DstBB &&
695  SrcChain->isCold() == DstChain->isCold()) {
696  mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y);
697  }
698  }
699  }
700  }
701 
702  /// Compute the Ext-TSP score for a given block order and a list of jumps.
703  double extTSPScore(const MergedChain &MergedBlocks,
704  const std::vector<Jump *> &Jumps) const {
705  if (Jumps.empty())
706  return 0.0;
707  uint64_t CurAddr = 0;
708  MergedBlocks.forEach([&](const Block *BB) {
709  BB->EstimatedAddr = CurAddr;
710  CurAddr += BB->Size;
711  });
712 
713  double Score = 0;
714  for (auto &Jump : Jumps) {
715  const Block *SrcBlock = Jump->Source;
716  const Block *DstBlock = Jump->Target;
717  Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
718  DstBlock->EstimatedAddr, Jump->ExecutionCount,
719  Jump->IsConditional);
720  }
721  return Score;
722  }
723 
724  /// Compute the gain of merging two chains.
725  ///
726  /// The function considers all possible ways of merging two chains and
727  /// computes the one having the largest increase in ExtTSP objective. The
728  /// result is a pair with the first element being the gain and the second
729  /// element being the corresponding merging type.
730  MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc,
731  ChainEdge *Edge) const {
732  if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
733  return Edge->getCachedMergeGain(ChainPred, ChainSucc);
734  }
735 
736  // Precompute jumps between ChainPred and ChainSucc
737  auto Jumps = Edge->jumps();
738  ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
739  if (EdgePP != nullptr) {
740  Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
741  }
742  assert(!Jumps.empty() && "trying to merge chains w/o jumps");
743 
744  // The object holds the best currently chosen gain of merging the two chains
745  MergeGainTy Gain = MergeGainTy();
746 
747  /// Given a merge offset and a list of merge types, try to merge two chains
748  /// and update Gain with a better alternative
749  auto tryChainMerging = [&](size_t Offset,
750  const std::vector<MergeTypeTy> &MergeTypes) {
751  // Skip merging corresponding to concatenation w/o splitting
752  if (Offset == 0 || Offset == ChainPred->blocks().size())
753  return;
754  // Skip merging if it breaks Forced successors
755  auto BB = ChainPred->blocks()[Offset - 1];
756  if (BB->ForcedSucc != nullptr)
757  return;
758  // Apply the merge, compute the corresponding gain, and update the best
759  // value, if the merge is beneficial
760  for (const auto &MergeType : MergeTypes) {
761  Gain.updateIfLessThan(
762  computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
763  }
764  };
765 
766  // Try to concatenate two chains w/o splitting
767  Gain.updateIfLessThan(
768  computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y));
769 
771  // Attach (a part of) ChainPred before the first block of ChainSucc
772  for (auto &Jump : ChainSucc->blocks().front()->InJumps) {
773  const auto SrcBlock = Jump->Source;
774  if (SrcBlock->CurChain != ChainPred)
775  continue;
776  size_t Offset = SrcBlock->CurIndex + 1;
777  tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y});
778  }
779 
780  // Attach (a part of) ChainPred after the last block of ChainSucc
781  for (auto &Jump : ChainSucc->blocks().back()->OutJumps) {
782  const auto DstBlock = Jump->Source;
783  if (DstBlock->CurChain != ChainPred)
784  continue;
785  size_t Offset = DstBlock->CurIndex;
786  tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1});
787  }
788  }
789 
790  // Try to break ChainPred in various ways and concatenate with ChainSucc
791  if (ChainPred->blocks().size() <= ChainSplitThreshold) {
792  for (size_t Offset = 1; Offset < ChainPred->blocks().size(); Offset++) {
793  // Try to split the chain in different ways. In practice, applying
794  // X2_Y_X1 merging is almost never provides benefits; thus, we exclude
795  // it from consideration to reduce the search space
796  tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1,
797  MergeTypeTy::X2_X1_Y});
798  }
799  }
800  Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
801  return Gain;
802  }
803 
804  /// Compute the score gain of merging two chains, respecting a given
805  /// merge 'type' and 'offset'.
806  ///
807  /// The two chains are not modified in the method.
808  MergeGainTy computeMergeGain(const Chain *ChainPred, const Chain *ChainSucc,
809  const std::vector<Jump *> &Jumps,
810  size_t MergeOffset,
811  MergeTypeTy MergeType) const {
812  auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(),
813  MergeOffset, MergeType);
814 
815  // Do not allow a merge that does not preserve the original entry block
816  if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
817  !MergedBlocks.getFirstBlock()->isEntry())
818  return MergeGainTy();
819 
820  // The gain for the new chain
821  auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score();
822  return MergeGainTy(NewGainScore, MergeOffset, MergeType);
823  }
824 
825  /// Merge two chains of blocks respecting a given merge 'type' and 'offset'.
826  ///
827  /// If MergeType == 0, then the result is a concatenation of two chains.
828  /// Otherwise, the first chain is cut into two sub-chains at the offset,
829  /// and merged using all possible ways of concatenating three chains.
830  MergedChain mergeBlocks(const std::vector<Block *> &X,
831  const std::vector<Block *> &Y, size_t MergeOffset,
832  MergeTypeTy MergeType) const {
833  // Split the first chain, X, into X1 and X2
834  BlockIter BeginX1 = X.begin();
835  BlockIter EndX1 = X.begin() + MergeOffset;
836  BlockIter BeginX2 = X.begin() + MergeOffset;
837  BlockIter EndX2 = X.end();
838  BlockIter BeginY = Y.begin();
839  BlockIter EndY = Y.end();
840 
841  // Construct a new chain from the three existing ones
842  switch (MergeType) {
843  case MergeTypeTy::X_Y:
844  return MergedChain(BeginX1, EndX2, BeginY, EndY);
845  case MergeTypeTy::X1_Y_X2:
846  return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
847  case MergeTypeTy::Y_X2_X1:
848  return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
849  case MergeTypeTy::X2_X1_Y:
850  return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
851  }
852  llvm_unreachable("unexpected chain merge type");
853  }
854 
855  /// Merge chain From into chain Into, update the list of active chains,
856  /// adjacency information, and the corresponding cached values.
857  void mergeChains(Chain *Into, Chain *From, size_t MergeOffset,
858  MergeTypeTy MergeType) {
859  assert(Into != From && "a chain cannot be merged with itself");
860 
861  // Merge the blocks
862  MergedChain MergedBlocks =
863  mergeBlocks(Into->blocks(), From->blocks(), MergeOffset, MergeType);
864  Into->merge(From, MergedBlocks.getBlocks());
865  Into->mergeEdges(From);
866  From->clear();
867 
868  // Update cached ext-tsp score for the new chain
869  ChainEdge *SelfEdge = Into->getEdge(Into);
870  if (SelfEdge != nullptr) {
871  MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end());
872  Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps()));
873  }
874 
875  // Remove chain From from the list of active chains
876  llvm::erase_value(HotChains, From);
877 
878  // Invalidate caches
879  for (auto EdgeIter : Into->edges()) {
880  EdgeIter.second->invalidateCache();
881  }
882  }
883 
884  /// Concatenate all chains into a final order of blocks.
885  void concatChains(std::vector<uint64_t> &Order) {
886  // Collect chains and calculate some stats for their sorting
887  std::vector<Chain *> SortedChains;
888  DenseMap<const Chain *, double> ChainDensity;
889  for (auto &Chain : AllChains) {
890  if (!Chain.blocks().empty()) {
891  SortedChains.push_back(&Chain);
892  // Using doubles to avoid overflow of ExecutionCount
893  double Size = 0;
894  double ExecutionCount = 0;
895  for (auto *Block : Chain.blocks()) {
896  Size += static_cast<double>(Block->Size);
897  ExecutionCount += static_cast<double>(Block->ExecutionCount);
898  }
899  assert(Size > 0 && "a chain of zero size");
900  ChainDensity[&Chain] = ExecutionCount / Size;
901  }
902  }
903 
904  // Sorting chains by density in the decreasing order
905  std::stable_sort(SortedChains.begin(), SortedChains.end(),
906  [&](const Chain *C1, const Chain *C2) {
907  // Make sure the original entry block is at the
908  // beginning of the order
909  if (C1->isEntry() != C2->isEntry()) {
910  return C1->isEntry();
911  }
912 
913  const double D1 = ChainDensity[C1];
914  const double D2 = ChainDensity[C2];
915  // Compare by density and break ties by chain identifiers
916  return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id());
917  });
918 
919  // Collect the blocks in the order specified by their chains
920  Order.reserve(NumNodes);
921  for (Chain *Chain : SortedChains) {
922  for (Block *Block : Chain->blocks()) {
923  Order.push_back(Block->Index);
924  }
925  }
926  }
927 
928 private:
929  /// The number of nodes in the graph.
930  const size_t NumNodes;
931 
932  /// Successors of each node.
933  std::vector<std::vector<uint64_t>> SuccNodes;
934 
935  /// Predecessors of each node.
936  std::vector<std::vector<uint64_t>> PredNodes;
937 
938  /// All basic blocks.
939  std::vector<Block> AllBlocks;
940 
941  /// All jumps between blocks.
942  std::vector<Jump> AllJumps;
943 
944  /// All chains of basic blocks.
945  std::vector<Chain> AllChains;
946 
947  /// All edges between chains.
948  std::vector<ChainEdge> AllEdges;
949 
950  /// Active chains. The vector gets updated at runtime when chains are merged.
951  std::vector<Chain *> HotChains;
952 };
953 
954 } // end of anonymous namespace
955 
956 std::vector<uint64_t> llvm::applyExtTspLayout(
957  const std::vector<uint64_t> &NodeSizes,
958  const std::vector<uint64_t> &NodeCounts,
959  const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
960  size_t NumNodes = NodeSizes.size();
961 
962  // Verify correctness of the input data.
963  assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
964  assert(NumNodes > 2 && "Incorrect input");
965 
966  // Apply the reordering algorithm.
967  auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts);
968  std::vector<uint64_t> Result;
969  Alg.run(Result);
970 
971  // Verify correctness of the output.
972  assert(Result.front() == 0 && "Original entry point is not preserved");
973  assert(Result.size() == NumNodes && "Incorrect size of reordered layout");
974  return Result;
975 }
976 
977 double llvm::calcExtTspScore(
978  const std::vector<uint64_t> &Order, const std::vector<uint64_t> &NodeSizes,
979  const std::vector<uint64_t> &NodeCounts,
980  const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
981  // Estimate addresses of the blocks in memory
982  std::vector<uint64_t> Addr(NodeSizes.size(), 0);
983  for (size_t Idx = 1; Idx < Order.size(); Idx++) {
984  Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
985  }
986  std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
987  for (auto It : EdgeCounts) {
988  auto Pred = It.first.first;
989  OutDegree[Pred]++;
990  }
991 
992  // Increase the score for each jump
993  double Score = 0;
994  for (auto It : EdgeCounts) {
995  auto Pred = It.first.first;
996  auto Succ = It.first.second;
997  uint64_t Count = It.second;
998  bool IsConditional = OutDegree[Pred] > 1;
999  Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count,
1000  IsConditional);
1001  }
1002  return Score;
1003 }
1004 
1005 double llvm::calcExtTspScore(
1006  const std::vector<uint64_t> &NodeSizes,
1007  const std::vector<uint64_t> &NodeCounts,
1008  const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
1009  std::vector<uint64_t> Order(NodeSizes.size());
1010  for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
1011  Order[Idx] = Idx;
1012  }
1013  return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
1014 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
return
return
Definition: README.txt:242
addEdge
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
Definition: LazyCallGraph.cpp:60
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:152
ForwardWeightUncond
static cl::opt< double > ForwardWeightUncond("ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditional forward jumps for ExtTSP value"))
llvm::applyExtTspLayout
std::vector< uint64_t > applyExtTspLayout(const std::vector< uint64_t > &NodeSizes, const std::vector< uint64_t > &NodeCounts, const std::vector< EdgeCountT > &EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:139
F
#define F(x, y, z)
Definition: MD5.cpp:55
clear
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:149
MaxChainSize
static cl::opt< unsigned > MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096), cl::desc("The maximum size of a chain to create."))
CommandLine.h
ApplyExtTspWithoutProfile
cl::opt< bool > ApplyExtTspWithoutProfile("ext-tsp-apply-without-profile", cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), cl::init(true), cl::Hidden)
BackwardWeightUncond
static cl::opt< double > BackwardWeightUncond("ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of unconditonal backward jumps for ExtTSP value"))
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition: LoopDeletion.cpp:53
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::EdgeT
std::pair< uint64_t, uint64_t > EdgeT
Definition: CodeLayout.h:23
CodeLayout.h
llvm::calcExtTspScore
double calcExtTspScore(const std::vector< uint64_t > &Order, const std::vector< uint64_t > &NodeSizes, const std::vector< uint64_t > &NodeCounts, const std::vector< EdgeCountT > &EdgeCounts)
Estimate the "quality" of a given node order in CFG.
ForwardWeightCond
static cl::opt< double > ForwardWeightCond("ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditional forward jumps for ExtTSP value"))
llvm::erase_value
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2006
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
BackwardWeightCond
static cl::opt< double > BackwardWeightCond("ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1), cl::desc("The weight of conditonal backward jumps for ExtTSP value"))
llvm::cl::opt< bool >
BackwardDistance
static cl::opt< unsigned > BackwardDistance("ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640), cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"))
EnableExtTspBlockPlacement
cl::opt< bool > EnableExtTspBlockPlacement("enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), cl::desc("Enable machine block placement based on the ext-tsp model, " "optimizing I-cache utilization."))
EnableChainSplitAlongJumps
static cl::opt< bool > EnableChainSplitAlongJumps("ext-tsp-enable-chain-split-along-jumps", cl::ReallyHidden, cl::init(true), cl::desc("The maximum size of a chain to apply splitting"))
uint64_t
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::DenseMap
Definition: DenseMap.h:714
llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
FallthroughWeightUncond
static cl::opt< double > FallthroughWeightUncond("ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05), cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"))
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1716
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
llvm::Offset
@ Offset
Definition: DWP.cpp:406
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
ForwardDistance
static cl::opt< unsigned > ForwardDistance("ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024), cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"))
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1948
llvm::Constant::operator=
void operator=(const Constant &)=delete
ChainSplitThreshold
static cl::opt< unsigned > ChainSplitThreshold("ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128), cl::desc("The maximum size of a chain to apply splitting"))
Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
FallthroughWeightCond
static cl::opt< double > FallthroughWeightCond("ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0), cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"))
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
InlinePriorityMode::Size
@ Size
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::cl::desc
Definition: CommandLine.h:411
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:247
llvm::pdb::PDB_SymType::Block
@ Block
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1302