LLVM  14.0.0git
DFAJumpThreading.cpp
Go to the documentation of this file.
1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Transform each threading path to effectively jump thread the DFA. For
10 // example, the CFG below could be transformed as follows, where the cloned
11 // blocks unconditionally branch to the next correct case based on what is
12 // identified in the analysis.
13 //
14 // sw.bb sw.bb
15 // / | \ / | \
16 // case1 case2 case3 case1 case2 case3
17 // \ | / | | |
18 // determinator det.2 det.3 det.1
19 // br sw.bb / | \
20 // sw.bb.2 sw.bb.3 sw.bb.1
21 // br case2 br case3 br case1ยง
22 //
23 // Definitions and Terminology:
24 //
25 // * Threading path:
26 // a list of basic blocks, the exit state, and the block that determines
27 // the next state, for which the following notation will be used:
28 // < path of BBs that form a cycle > [ state, determinator ]
29 //
30 // * Predictable switch:
31 // The switch variable is always a known constant so that all conditional
32 // jumps based on switch variable can be converted to unconditional jump.
33 //
34 // * Determinator:
35 // The basic block that determines the next state of the DFA.
36 //
37 // Representing the optimization in C-like pseudocode: the code pattern on the
38 // left could functionally be transformed to the right pattern if the switch
39 // condition is predictable.
40 //
41 // X = A goto A
42 // for (...) A:
43 // switch (X) ...
44 // case A goto B
45 // X = B B:
46 // case B ...
47 // X = C goto C
48 //
49 // The pass first checks that switch variable X is decided by the control flow
50 // path taken in the loop; for example, in case B, the next value of X is
51 // decided to be C. It then enumerates through all paths in the loop and labels
52 // the basic blocks where the next state is decided.
53 //
54 // Using this information it creates new paths that unconditionally branch to
55 // the next case. This involves cloning code, so it only gets triggered if the
56 // amount of code duplicated is below a threshold.
57 //
58 //===----------------------------------------------------------------------===//
59 
61 #include "llvm/ADT/APInt.h"
62 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/SmallSet.h"
65 #include "llvm/ADT/Statistic.h"
71 #include "llvm/IR/CFG.h"
72 #include "llvm/IR/Constants.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/IR/Verifier.h"
75 #include "llvm/InitializePasses.h"
76 #include "llvm/Pass.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
84 #include <algorithm>
85 #include <deque>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "dfa-jump-threading"
90 
91 STATISTIC(NumTransforms, "Number of transformations done");
92 STATISTIC(NumCloned, "Number of blocks cloned");
93 STATISTIC(NumPaths, "Number of individual paths threaded");
94 
95 static cl::opt<bool>
96  ClViewCfgBefore("dfa-jump-view-cfg-before",
97  cl::desc("View the CFG before DFA Jump Threading"),
98  cl::Hidden, cl::init(false));
99 
101  "dfa-max-path-length",
102  cl::desc("Max number of blocks searched to find a threading path"),
103  cl::Hidden, cl::init(20));
104 
105 static cl::opt<unsigned>
106  CostThreshold("dfa-cost-threshold",
107  cl::desc("Maximum cost accepted for the transformation"),
108  cl::Hidden, cl::init(50));
109 
110 namespace {
111 
112 class SelectInstToUnfold {
113  SelectInst *SI;
114  PHINode *SIUse;
115 
116 public:
117  SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
118 
119  SelectInst *getInst() { return SI; }
120  PHINode *getUse() { return SIUse; }
121 
122  explicit operator bool() const { return SI && SIUse; }
123 };
124 
125 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
126  std::vector<SelectInstToUnfold> *NewSIsToUnfold,
127  std::vector<BasicBlock *> *NewBBs);
128 
129 class DFAJumpThreading {
130 public:
131  DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
133  : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
134 
135  bool run(Function &F);
136 
137 private:
138  void
139  unfoldSelectInstrs(DominatorTree *DT,
140  const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
143  for (SelectInstToUnfold SIToUnfold : SelectInsts)
144  Stack.push_back(SIToUnfold);
145 
146  while (!Stack.empty()) {
147  SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
148 
149  std::vector<SelectInstToUnfold> NewSIsToUnfold;
150  std::vector<BasicBlock *> NewBBs;
151  unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
152 
153  // Put newly discovered select instructions into the work list.
154  for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
155  Stack.push_back(NewSIToUnfold);
156  }
157  }
158 
159  AssumptionCache *AC;
160  DominatorTree *DT;
163 };
164 
165 class DFAJumpThreadingLegacyPass : public FunctionPass {
166 public:
167  static char ID; // Pass identification
168  DFAJumpThreadingLegacyPass() : FunctionPass(ID) {}
169 
170  void getAnalysisUsage(AnalysisUsage &AU) const override {
176  }
177 
178  bool runOnFunction(Function &F) override {
179  if (skipFunction(F))
180  return false;
181 
182  AssumptionCache *AC =
183  &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
184  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
186  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
188  &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
189 
190  return DFAJumpThreading(AC, DT, TTI, ORE).run(F);
191  }
192 };
193 } // end anonymous namespace
194 
196 INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
197  "DFA Jump Threading", false, false)
202 INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
203  "DFA Jump Threading", false, false)
204 
205 // Public interface to the DFA Jump Threading pass
207  return new DFAJumpThreadingLegacyPass();
208 }
209 
210 namespace {
211 
212 /// Create a new basic block and sink \p SIToSink into it.
213 void createBasicBlockAndSinkSelectInst(
214  DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,
215  BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,
216  BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,
217  std::vector<BasicBlock *> *NewBBs) {
218  assert(SIToSink->hasOneUse());
219  assert(NewBlock);
220  assert(NewBranch);
221  *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,
222  EndBlock->getParent(), EndBlock);
223  NewBBs->push_back(*NewBlock);
224  *NewBranch = BranchInst::Create(EndBlock, *NewBlock);
225  SIToSink->moveBefore(*NewBranch);
226  NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));
227  DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});
228 }
229 
230 /// Unfold the select instruction held in \p SIToUnfold by replacing it with
231 /// control flow.
232 ///
233 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
234 /// created basic blocks into \p NewBBs.
235 ///
236 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
237 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
238  std::vector<SelectInstToUnfold> *NewSIsToUnfold,
239  std::vector<BasicBlock *> *NewBBs) {
240  SelectInst *SI = SIToUnfold.getInst();
241  PHINode *SIUse = SIToUnfold.getUse();
242  BasicBlock *StartBlock = SI->getParent();
243  BasicBlock *EndBlock = SIUse->getParent();
244  BranchInst *StartBlockTerm =
245  dyn_cast<BranchInst>(StartBlock->getTerminator());
246 
247  assert(StartBlockTerm && StartBlockTerm->isUnconditional());
248  assert(SI->hasOneUse());
249 
250  // These are the new basic blocks for the conditional branch.
251  // At least one will become an actual new basic block.
252  BasicBlock *TrueBlock = nullptr;
253  BasicBlock *FalseBlock = nullptr;
254  BranchInst *TrueBranch = nullptr;
255  BranchInst *FalseBranch = nullptr;
256 
257  // Sink select instructions to be able to unfold them later.
258  if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {
259  createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
260  "si.unfold.true", &TrueBlock, &TrueBranch,
261  NewSIsToUnfold, NewBBs);
262  }
263  if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {
264  createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
265  "si.unfold.false", &FalseBlock,
266  &FalseBranch, NewSIsToUnfold, NewBBs);
267  }
268 
269  // If there was nothing to sink, then arbitrarily choose the 'false' side
270  // for a new input value to the PHI.
271  if (!TrueBlock && !FalseBlock) {
272  FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",
273  EndBlock->getParent(), EndBlock);
274  NewBBs->push_back(FalseBlock);
275  BranchInst::Create(EndBlock, FalseBlock);
276  DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});
277  }
278 
279  // Insert the real conditional branch based on the original condition.
280  // If we did not create a new block for one of the 'true' or 'false' paths
281  // of the condition, it means that side of the branch goes to the end block
282  // directly and the path originates from the start block from the point of
283  // view of the new PHI.
284  BasicBlock *TT = EndBlock;
285  BasicBlock *FT = EndBlock;
286  if (TrueBlock && FalseBlock) {
287  // A diamond.
288  TT = TrueBlock;
289  FT = FalseBlock;
290 
291  // Update the phi node of SI.
292  SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
293  SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
294  SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
295 
296  // Update any other PHI nodes in EndBlock.
297  for (PHINode &Phi : EndBlock->phis()) {
298  if (&Phi != SIUse) {
299  Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock);
300  Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock);
301  }
302  }
303  } else {
304  BasicBlock *NewBlock = nullptr;
305  Value *SIOp1 = SI->getTrueValue();
306  Value *SIOp2 = SI->getFalseValue();
307 
308  // A triangle pointing right.
309  if (!TrueBlock) {
310  NewBlock = FalseBlock;
311  FT = FalseBlock;
312  }
313  // A triangle pointing left.
314  else {
315  NewBlock = TrueBlock;
316  TT = TrueBlock;
317  std::swap(SIOp1, SIOp2);
318  }
319 
320  // Update the phi node of SI.
321  for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {
322  if (SIUse->getIncomingBlock(Idx) == StartBlock)
323  SIUse->setIncomingValue(Idx, SIOp1);
324  }
325  SIUse->addIncoming(SIOp2, NewBlock);
326 
327  // Update any other PHI nodes in EndBlock.
328  for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
329  ++II) {
330  if (Phi != SIUse)
331  Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);
332  }
333  }
334  StartBlockTerm->eraseFromParent();
335  BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);
336  DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
337  {DominatorTree::Insert, StartBlock, FT}});
338 
339  // The select is now dead.
340  SI->eraseFromParent();
341 }
342 
343 struct ClonedBlock {
344  BasicBlock *BB;
345  uint64_t State; ///< \p State corresponds to the next value of a switch stmnt.
346 };
347 
348 typedef std::deque<BasicBlock *> PathType;
349 typedef std::vector<PathType> PathsType;
350 typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
351 typedef std::vector<ClonedBlock> CloneList;
352 
353 // This data structure keeps track of all blocks that have been cloned. If two
354 // different ThreadingPaths clone the same block for a certain state it should
355 // be reused, and it can be looked up in this map.
356 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
357 
358 // This map keeps track of all the new definitions for an instruction. This
359 // information is needed when restoring SSA form after cloning blocks.
361 
362 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
363  OS << "< ";
364  for (const BasicBlock *BB : Path) {
365  std::string BBName;
366  if (BB->hasName())
367  raw_string_ostream(BBName) << BB->getName();
368  else
369  raw_string_ostream(BBName) << BB;
370  OS << BBName << " ";
371  }
372  OS << ">";
373  return OS;
374 }
375 
376 /// ThreadingPath is a path in the control flow of a loop that can be threaded
377 /// by cloning necessary basic blocks and replacing conditional branches with
378 /// unconditional ones. A threading path includes a list of basic blocks, the
379 /// exit state, and the block that determines the next state.
380 struct ThreadingPath {
381  /// Exit value is DFA's exit state for the given path.
382  uint64_t getExitValue() const { return ExitVal; }
383  void setExitValue(const ConstantInt *V) {
384  ExitVal = V->getZExtValue();
385  IsExitValSet = true;
386  }
387  bool isExitValueSet() const { return IsExitValSet; }
388 
389  /// Determinator is the basic block that determines the next state of the DFA.
390  const BasicBlock *getDeterminatorBB() const { return DBB; }
391  void setDeterminator(const BasicBlock *BB) { DBB = BB; }
392 
393  /// Path is a list of basic blocks.
394  const PathType &getPath() const { return Path; }
395  void setPath(const PathType &NewPath) { Path = NewPath; }
396 
397  void print(raw_ostream &OS) const {
398  OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
399  }
400 
401 private:
402  PathType Path;
403  uint64_t ExitVal;
404  const BasicBlock *DBB = nullptr;
405  bool IsExitValSet = false;
406 };
407 
408 #ifndef NDEBUG
409 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
410  TPath.print(OS);
411  return OS;
412 }
413 #endif
414 
415 struct MainSwitch {
416  MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
417  if (isPredictable(SI)) {
418  Instr = SI;
419  } else {
420  ORE->emit([&]() {
421  return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
422  << "Switch instruction is not predictable.";
423  });
424  }
425  }
426 
427  virtual ~MainSwitch() = default;
428 
429  SwitchInst *getInstr() const { return Instr; }
430  const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
431  return SelectInsts;
432  }
433 
434 private:
435  /// Do a use-def chain traversal. Make sure the value of the switch variable
436  /// is always a known constant. This means that all conditional jumps based on
437  /// switch variable can be converted to unconditional jumps.
438  bool isPredictable(const SwitchInst *SI) {
439  std::deque<Instruction *> Q;
440  SmallSet<Value *, 16> SeenValues;
441  SelectInsts.clear();
442 
443  Value *FirstDef = SI->getOperand(0);
444  auto *Inst = dyn_cast<Instruction>(FirstDef);
445 
446  // If this is a function argument or another non-instruction, then give up.
447  // We are interested in loop local variables.
448  if (!Inst)
449  return false;
450 
451  // Require the first definition to be a PHINode
452  if (!isa<PHINode>(Inst))
453  return false;
454 
455  LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n");
456 
457  Q.push_back(Inst);
458  SeenValues.insert(FirstDef);
459 
460  while (!Q.empty()) {
461  Instruction *Current = Q.front();
462  Q.pop_front();
463 
464  if (auto *Phi = dyn_cast<PHINode>(Current)) {
465  for (Value *Incoming : Phi->incoming_values()) {
466  if (!isPredictableValue(Incoming, SeenValues))
467  return false;
468  addInstToQueue(Incoming, Q, SeenValues);
469  }
470  LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n");
471  } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
472  if (!isValidSelectInst(SelI))
473  return false;
474  if (!isPredictableValue(SelI->getTrueValue(), SeenValues) ||
475  !isPredictableValue(SelI->getFalseValue(), SeenValues)) {
476  return false;
477  }
478  addInstToQueue(SelI->getTrueValue(), Q, SeenValues);
479  addInstToQueue(SelI->getFalseValue(), Q, SeenValues);
480  LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n");
481  if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
482  SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
483  } else {
484  // If it is neither a phi nor a select, then we give up.
485  return false;
486  }
487  }
488 
489  return true;
490  }
491 
492  bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
493  if (SeenValues.find(InpVal) != SeenValues.end())
494  return true;
495 
496  if (isa<ConstantInt>(InpVal))
497  return true;
498 
499  // If this is a function argument or another non-instruction, then give up.
500  if (!isa<Instruction>(InpVal))
501  return false;
502 
503  return true;
504  }
505 
506  void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
507  SmallSet<Value *, 16> &SeenValues) {
508  if (SeenValues.find(Val) != SeenValues.end())
509  return;
510  if (Instruction *I = dyn_cast<Instruction>(Val))
511  Q.push_back(I);
512  SeenValues.insert(Val);
513  }
514 
515  bool isValidSelectInst(SelectInst *SI) {
516  if (!SI->hasOneUse())
517  return false;
518 
519  Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
520  // The use of the select inst should be either a phi or another select.
521  if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
522  return false;
523 
524  BasicBlock *SIBB = SI->getParent();
525 
526  // Currently, we can only expand select instructions in basic blocks with
527  // one successor.
528  BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
529  if (!SITerm || !SITerm->isUnconditional())
530  return false;
531 
532  if (isa<PHINode>(SIUse) &&
533  SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
534  return false;
535 
536  // If select will not be sunk during unfolding, and it is in the same basic
537  // block as another state defining select, then cannot unfold both.
538  for (SelectInstToUnfold SIToUnfold : SelectInsts) {
539  SelectInst *PrevSI = SIToUnfold.getInst();
540  if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
541  PrevSI->getParent() == SI->getParent())
542  return false;
543  }
544 
545  return true;
546  }
547 
548  SwitchInst *Instr = nullptr;
550 };
551 
552 struct AllSwitchPaths {
553  AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
554  : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
555  ORE(ORE) {}
556 
557  std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
558  unsigned getNumThreadingPaths() { return TPaths.size(); }
559  SwitchInst *getSwitchInst() { return Switch; }
560  BasicBlock *getSwitchBlock() { return SwitchBlock; }
561 
562  void run() {
563  VisitedBlocks Visited;
564  PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
565  StateDefMap StateDef = getStateDefMap();
566 
567  for (PathType Path : LoopPaths) {
568  ThreadingPath TPath;
569 
570  const BasicBlock *PrevBB = Path.back();
571  for (const BasicBlock *BB : Path) {
572  if (StateDef.count(BB) != 0) {
573  const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
574  assert(Phi && "Expected a state-defining instr to be a phi node.");
575 
576  const Value *V = Phi->getIncomingValueForBlock(PrevBB);
577  if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {
578  TPath.setExitValue(C);
579  TPath.setDeterminator(BB);
580  TPath.setPath(Path);
581  }
582  }
583 
584  // Switch block is the determinator, this is the final exit value.
585  if (TPath.isExitValueSet() && BB == Path.front())
586  break;
587 
588  PrevBB = BB;
589  }
590 
591  if (TPath.isExitValueSet())
592  TPaths.push_back(TPath);
593  }
594  }
595 
596 private:
597  // Value: an instruction that defines a switch state;
598  // Key: the parent basic block of that instruction.
600 
601  PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
602  unsigned PathDepth) const {
603  PathsType Res;
604 
605  // Stop exploring paths after visiting MaxPathLength blocks
606  if (PathDepth > MaxPathLength) {
607  ORE->emit([&]() {
608  return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
609  Switch)
610  << "Exploration stopped after visiting MaxPathLength="
611  << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
612  });
613  return Res;
614  }
615 
616  Visited.insert(BB);
617 
618  // Some blocks have multiple edges to the same successor, and this set
619  // is used to prevent a duplicate path from being generated
620  SmallSet<BasicBlock *, 4> Successors;
621  for (BasicBlock *Succ : successors(BB)) {
622  if (!Successors.insert(Succ).second)
623  continue;
624 
625  // Found a cycle through the SwitchBlock
626  if (Succ == SwitchBlock) {
627  Res.push_back({BB});
628  continue;
629  }
630 
631  // We have encountered a cycle, do not get caught in it
632  if (Visited.contains(Succ))
633  continue;
634 
635  PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
636  for (PathType Path : SuccPaths) {
637  PathType NewPath(Path);
638  NewPath.push_front(BB);
639  Res.push_back(NewPath);
640  }
641  }
642  // This block could now be visited again from a different predecessor. Note
643  // that this will result in exponential runtime. Subpaths could possibly be
644  // cached but it takes a lot of memory to store them.
645  Visited.erase(BB);
646  return Res;
647  }
648 
649  /// Walk the use-def chain and collect all the state-defining instructions.
650  StateDefMap getStateDefMap() const {
651  StateDefMap Res;
652 
653  Value *FirstDef = Switch->getOperand(0);
654 
655  assert(isa<PHINode>(FirstDef) && "After select unfolding, all state "
656  "definitions are expected to be phi "
657  "nodes.");
658 
660  Stack.push_back(dyn_cast<PHINode>(FirstDef));
661  SmallSet<Value *, 16> SeenValues;
662 
663  while (!Stack.empty()) {
664  PHINode *CurPhi = Stack.pop_back_val();
665 
666  Res[CurPhi->getParent()] = CurPhi;
667  SeenValues.insert(CurPhi);
668 
669  for (Value *Incoming : CurPhi->incoming_values()) {
670  if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
671  SeenValues.find(Incoming) != SeenValues.end()) {
672  continue;
673  }
674 
675  assert(isa<PHINode>(Incoming) && "After select unfolding, all state "
676  "definitions are expected to be phi "
677  "nodes.");
678 
679  Stack.push_back(cast<PHINode>(Incoming));
680  }
681  }
682 
683  return Res;
684  }
685 
687  BasicBlock *SwitchBlock;
689  std::vector<ThreadingPath> TPaths;
690 };
691 
692 struct TransformDFA {
693  TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
697  : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
698  EphValues(EphValues) {}
699 
700  void run() {
701  if (isLegalAndProfitableToTransform()) {
702  createAllExitPaths();
703  NumTransforms++;
704  }
705  }
706 
707 private:
708  /// This function performs both a legality check and profitability check at
709  /// the same time since it is convenient to do so. It iterates through all
710  /// blocks that will be cloned, and keeps track of the duplication cost. It
711  /// also returns false if it is illegal to clone some required block.
712  bool isLegalAndProfitableToTransform() {
714  SwitchInst *Switch = SwitchPaths->getSwitchInst();
715 
716  // Note that DuplicateBlockMap is not being used as intended here. It is
717  // just being used to ensure (BB, State) pairs are only counted once.
718  DuplicateBlockMap DuplicateMap;
719 
720  for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
721  PathType PathBBs = TPath.getPath();
722  uint64_t NextState = TPath.getExitValue();
723  const BasicBlock *Determinator = TPath.getDeterminatorBB();
724 
725  // Update Metrics for the Switch block, this is always cloned
726  BasicBlock *BB = SwitchPaths->getSwitchBlock();
727  BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
728  if (!VisitedBB) {
729  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
730  DuplicateMap[BB].push_back({BB, NextState});
731  }
732 
733  // If the Switch block is the Determinator, then we can continue since
734  // this is the only block that is cloned and we already counted for it.
735  if (PathBBs.front() == Determinator)
736  continue;
737 
738  // Otherwise update Metrics for all blocks that will be cloned. If any
739  // block is already cloned and would be reused, don't double count it.
740  auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
741  for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
742  BB = *BBIt;
743  VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
744  if (VisitedBB)
745  continue;
746  Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
747  DuplicateMap[BB].push_back({BB, NextState});
748  }
749 
750  if (Metrics.notDuplicatable) {
751  LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
752  << "non-duplicatable instructions.\n");
753  ORE->emit([&]() {
754  return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
755  Switch)
756  << "Contains non-duplicatable instructions.";
757  });
758  return false;
759  }
760 
761  if (Metrics.convergent) {
762  LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
763  << "convergent instructions.\n");
764  ORE->emit([&]() {
765  return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
766  << "Contains convergent instructions.";
767  });
768  return false;
769  }
770  }
771 
772  unsigned DuplicationCost = 0;
773 
774  unsigned JumpTableSize = 0;
775  TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
776  nullptr);
777  if (JumpTableSize == 0) {
778  // Factor in the number of conditional branches reduced from jump
779  // threading. Assume that lowering the switch block is implemented by
780  // using binary search, hence the LogBase2().
781  unsigned CondBranches =
782  APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
783  DuplicationCost = Metrics.NumInsts / CondBranches;
784  } else {
785  // Compared with jump tables, the DFA optimizer removes an indirect branch
786  // on each loop iteration, thus making branch prediction more precise. The
787  // more branch targets there are, the more likely it is for the branch
788  // predictor to make a mistake, and the more benefit there is in the DFA
789  // optimizer. Thus, the more branch targets there are, the lower is the
790  // cost of the DFA opt.
791  DuplicationCost = Metrics.NumInsts / JumpTableSize;
792  }
793 
794  LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
795  << SwitchPaths->getSwitchBlock()->getName()
796  << " is: " << DuplicationCost << "\n\n");
797 
798  if (DuplicationCost > CostThreshold) {
799  LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
800  << "cost threshold.\n");
801  ORE->emit([&]() {
802  return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
803  << "Duplication cost exceeds the cost threshold (cost="
804  << ore::NV("Cost", DuplicationCost)
805  << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
806  });
807  return false;
808  }
809 
810  ORE->emit([&]() {
811  return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
812  << "Switch statement jump-threaded.";
813  });
814 
815  return true;
816  }
817 
818  /// Transform each threading path to effectively jump thread the DFA.
819  void createAllExitPaths() {
821 
822  // Move the switch block to the end of the path, since it will be duplicated
823  BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
824  for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
825  LLVM_DEBUG(dbgs() << TPath << "\n");
826  PathType NewPath(TPath.getPath());
827  NewPath.push_back(SwitchBlock);
828  TPath.setPath(NewPath);
829  }
830 
831  // Transform the ThreadingPaths and keep track of the cloned values
832  DuplicateBlockMap DuplicateMap;
833  DefMap NewDefs;
834 
835  SmallSet<BasicBlock *, 16> BlocksToClean;
836  for (BasicBlock *BB : successors(SwitchBlock))
837  BlocksToClean.insert(BB);
838 
839  for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
840  createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
841  NumPaths++;
842  }
843 
844  // After all paths are cloned, now update the last successor of the cloned
845  // path so it skips over the switch statement
846  for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
847  updateLastSuccessor(TPath, DuplicateMap, &DTU);
848 
849  // For each instruction that was cloned and used outside, update its uses
850  updateSSA(NewDefs);
851 
852  // Clean PHI Nodes for the newly created blocks
853  for (BasicBlock *BB : BlocksToClean)
854  cleanPhiNodes(BB);
855  }
856 
857  /// For a specific ThreadingPath \p Path, create an exit path starting from
858  /// the determinator block.
859  ///
860  /// To remember the correct destination, we have to duplicate blocks
861  /// corresponding to each state. Also update the terminating instruction of
862  /// the predecessors, and phis in the successor blocks.
863  void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
864  DuplicateBlockMap &DuplicateMap,
865  SmallSet<BasicBlock *, 16> &BlocksToClean,
866  DomTreeUpdater *DTU) {
867  uint64_t NextState = Path.getExitValue();
868  const BasicBlock *Determinator = Path.getDeterminatorBB();
869  PathType PathBBs = Path.getPath();
870 
871  // Don't select the placeholder block in front
872  if (PathBBs.front() == Determinator)
873  PathBBs.pop_front();
874 
875  auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator);
876  auto Prev = std::prev(DetIt);
877  BasicBlock *PrevBB = *Prev;
878  for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
879  BasicBlock *BB = *BBIt;
880  BlocksToClean.insert(BB);
881 
882  // We already cloned BB for this NextState, now just update the branch
883  // and continue.
884  BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
885  if (NextBB) {
886  updatePredecessor(PrevBB, BB, NextBB, DTU);
887  PrevBB = NextBB;
888  continue;
889  }
890 
891  // Clone the BB and update the successor of Prev to jump to the new block
892  BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
893  BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
894  DuplicateMap[BB].push_back({NewBB, NextState});
895  BlocksToClean.insert(NewBB);
896  PrevBB = NewBB;
897  }
898  }
899 
900  /// Restore SSA form after cloning blocks.
901  ///
902  /// Each cloned block creates new defs for a variable, and the uses need to be
903  /// updated to reflect this. The uses may be replaced with a cloned value, or
904  /// some derived phi instruction. Note that all uses of a value defined in the
905  /// same block were already remapped when cloning the block.
906  void updateSSA(DefMap &NewDefs) {
907  SSAUpdaterBulk SSAUpdate;
908  SmallVector<Use *, 16> UsesToRename;
909 
910  for (auto KV : NewDefs) {
911  Instruction *I = KV.first;
912  BasicBlock *BB = I->getParent();
913  std::vector<Instruction *> Cloned = KV.second;
914 
915  // Scan all uses of this instruction to see if it is used outside of its
916  // block, and if so, record them in UsesToRename.
917  for (Use &U : I->uses()) {
918  Instruction *User = cast<Instruction>(U.getUser());
919  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
920  if (UserPN->getIncomingBlock(U) == BB)
921  continue;
922  } else if (User->getParent() == BB) {
923  continue;
924  }
925 
926  UsesToRename.push_back(&U);
927  }
928 
929  // If there are no uses outside the block, we're done with this
930  // instruction.
931  if (UsesToRename.empty())
932  continue;
933  LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
934  << "\n");
935 
936  // We found a use of I outside of BB. Rename all uses of I that are
937  // outside its block to be uses of the appropriate PHI node etc. See
938  // ValuesInBlocks with the values we know.
939  unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
940  SSAUpdate.AddAvailableValue(VarNum, BB, I);
941  for (Instruction *New : Cloned)
942  SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
943 
944  while (!UsesToRename.empty())
945  SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
946 
947  LLVM_DEBUG(dbgs() << "\n");
948  }
949  // SSAUpdater handles phi placement and renaming uses with the appropriate
950  // value.
951  SSAUpdate.RewriteAllUses(DT);
952  }
953 
954  /// Clones a basic block, and adds it to the CFG.
955  ///
956  /// This function also includes updating phi nodes in the successors of the
957  /// BB, and remapping uses that were defined locally in the cloned BB.
958  BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
959  uint64_t NextState,
960  DuplicateBlockMap &DuplicateMap,
961  DefMap &NewDefs,
962  DomTreeUpdater *DTU) {
963  ValueToValueMapTy VMap;
964  BasicBlock *NewBB = CloneBasicBlock(
965  BB, VMap, ".jt" + std::to_string(NextState), BB->getParent());
966  NewBB->moveAfter(BB);
967  NumCloned++;
968 
969  for (Instruction &I : *NewBB) {
970  // Do not remap operands of PHINode in case a definition in BB is an
971  // incoming value to a phi in the same block. This incoming value will
972  // be renamed later while restoring SSA.
973  if (isa<PHINode>(&I))
974  continue;
975  RemapInstruction(&I, VMap,
977  if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
978  AC->registerAssumption(II);
979  }
980 
981  updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
982  updatePredecessor(PrevBB, BB, NewBB, DTU);
983  updateDefMap(NewDefs, VMap);
984 
985  // Add all successors to the DominatorTree
987  for (auto *SuccBB : successors(NewBB)) {
988  if (SuccSet.insert(SuccBB).second)
989  DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
990  }
991  SuccSet.clear();
992  return NewBB;
993  }
994 
995  /// Update the phi nodes in BB's successors.
996  ///
997  /// This means creating a new incoming value from NewBB with the new
998  /// instruction wherever there is an incoming value from BB.
999  void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1000  uint64_t NextState, ValueToValueMapTy &VMap,
1001  DuplicateBlockMap &DuplicateMap) {
1002  std::vector<BasicBlock *> BlocksToUpdate;
1003 
1004  // If BB is the last block in the path, we can simply update the one case
1005  // successor that will be reached.
1006  if (BB == SwitchPaths->getSwitchBlock()) {
1007  SwitchInst *Switch = SwitchPaths->getSwitchInst();
1008  BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1009  BlocksToUpdate.push_back(NextCase);
1010  BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
1011  if (ClonedSucc)
1012  BlocksToUpdate.push_back(ClonedSucc);
1013  }
1014  // Otherwise update phis in all successors.
1015  else {
1016  for (BasicBlock *Succ : successors(BB)) {
1017  BlocksToUpdate.push_back(Succ);
1018 
1019  // Check if a successor has already been cloned for the particular exit
1020  // value. In this case if a successor was already cloned, the phi nodes
1021  // in the cloned block should be updated directly.
1022  BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
1023  if (ClonedSucc)
1024  BlocksToUpdate.push_back(ClonedSucc);
1025  }
1026  }
1027 
1028  // If there is a phi with an incoming value from BB, create a new incoming
1029  // value for the new predecessor ClonedBB. The value will either be the same
1030  // value from BB or a cloned value.
1031  for (BasicBlock *Succ : BlocksToUpdate) {
1032  for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
1033  ++II) {
1034  Value *Incoming = Phi->getIncomingValueForBlock(BB);
1035  if (Incoming) {
1036  if (isa<Constant>(Incoming)) {
1037  Phi->addIncoming(Incoming, ClonedBB);
1038  continue;
1039  }
1040  Value *ClonedVal = VMap[Incoming];
1041  if (ClonedVal)
1042  Phi->addIncoming(ClonedVal, ClonedBB);
1043  else
1044  Phi->addIncoming(Incoming, ClonedBB);
1045  }
1046  }
1047  }
1048  }
1049 
1050  /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1051  /// other successors are kept as well.
1052  void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1053  BasicBlock *NewBB, DomTreeUpdater *DTU) {
1054  // When a path is reused, there is a chance that predecessors were already
1055  // updated before. Check if the predecessor needs to be updated first.
1056  if (!isPredecessor(OldBB, PrevBB))
1057  return;
1058 
1059  Instruction *PrevTerm = PrevBB->getTerminator();
1060  for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1061  if (PrevTerm->getSuccessor(Idx) == OldBB) {
1062  OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
1063  PrevTerm->setSuccessor(Idx, NewBB);
1064  }
1065  }
1066  DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
1067  {DominatorTree::Insert, PrevBB, NewBB}});
1068  }
1069 
1070  /// Add new value mappings to the DefMap to keep track of all new definitions
1071  /// for a particular instruction. These will be used while updating SSA form.
1072  void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1073  for (auto Entry : VMap) {
1074  Instruction *Inst =
1075  dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
1076  if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
1077  isa<SwitchInst>(Inst)) {
1078  continue;
1079  }
1080 
1081  Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
1082  if (!Cloned)
1083  continue;
1084 
1085  if (NewDefs.find(Inst) == NewDefs.end())
1086  NewDefs[Inst] = {Cloned};
1087  else
1088  NewDefs[Inst].push_back(Cloned);
1089  }
1090  }
1091 
1092  /// Update the last branch of a particular cloned path to point to the correct
1093  /// case successor.
1094  ///
1095  /// Note that this is an optional step and would have been done in later
1096  /// optimizations, but it makes the CFG significantly easier to work with.
1097  void updateLastSuccessor(ThreadingPath &TPath,
1098  DuplicateBlockMap &DuplicateMap,
1099  DomTreeUpdater *DTU) {
1100  uint64_t NextState = TPath.getExitValue();
1101  BasicBlock *BB = TPath.getPath().back();
1102  BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1103 
1104  // Note multiple paths can end at the same block so check that it is not
1105  // updated yet
1106  if (!isa<SwitchInst>(LastBlock->getTerminator()))
1107  return;
1108  SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
1109  BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1110 
1111  std::vector<DominatorTree::UpdateType> DTUpdates;
1113  for (BasicBlock *Succ : successors(LastBlock)) {
1114  if (Succ != NextCase && SuccSet.insert(Succ).second)
1115  DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
1116  }
1117 
1118  Switch->eraseFromParent();
1119  BranchInst::Create(NextCase, LastBlock);
1120 
1121  DTU->applyUpdates(DTUpdates);
1122  }
1123 
1124  /// After cloning blocks, some of the phi nodes have extra incoming values
1125  /// that are no longer used. This function removes them.
1126  void cleanPhiNodes(BasicBlock *BB) {
1127  // If BB is no longer reachable, remove any remaining phi nodes
1128  if (pred_empty(BB)) {
1129  std::vector<PHINode *> PhiToRemove;
1130  for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1131  PhiToRemove.push_back(Phi);
1132  }
1133  for (PHINode *PN : PhiToRemove) {
1134  PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
1135  PN->eraseFromParent();
1136  }
1137  return;
1138  }
1139 
1140  // Remove any incoming values that come from an invalid predecessor
1141  for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1142  std::vector<BasicBlock *> BlocksToRemove;
1143  for (BasicBlock *IncomingBB : Phi->blocks()) {
1144  if (!isPredecessor(BB, IncomingBB))
1145  BlocksToRemove.push_back(IncomingBB);
1146  }
1147  for (BasicBlock *BB : BlocksToRemove)
1148  Phi->removeIncomingValue(BB);
1149  }
1150  }
1151 
1152  /// Checks if BB was already cloned for a particular next state value. If it
1153  /// was then it returns this cloned block, and otherwise null.
1154  BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState,
1155  DuplicateBlockMap &DuplicateMap) {
1156  CloneList ClonedBBs = DuplicateMap[BB];
1157 
1158  // Find an entry in the CloneList with this NextState. If it exists then
1159  // return the corresponding BB
1160  auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
1161  return C.State == NextState;
1162  });
1163  return It != ClonedBBs.end() ? (*It).BB : nullptr;
1164  }
1165 
1166  /// Helper to get the successor corresponding to a particular case value for
1167  /// a switch statement.
1168  BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) {
1169  BasicBlock *NextCase = nullptr;
1170  for (auto Case : Switch->cases()) {
1171  if (Case.getCaseValue()->getZExtValue() == NextState) {
1172  NextCase = Case.getCaseSuccessor();
1173  break;
1174  }
1175  }
1176  if (!NextCase)
1177  NextCase = Switch->getDefaultDest();
1178  return NextCase;
1179  }
1180 
1181  /// Returns true if IncomingBB is a predecessor of BB.
1182  bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1183  return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB);
1184  }
1185 
1186  AllSwitchPaths *SwitchPaths;
1187  DominatorTree *DT;
1188  AssumptionCache *AC;
1192  std::vector<ThreadingPath> TPaths;
1193 };
1194 
1195 bool DFAJumpThreading::run(Function &F) {
1196  LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1197 
1198  if (F.hasOptSize()) {
1199  LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1200  return false;
1201  }
1202 
1203  if (ClViewCfgBefore)
1204  F.viewCFG();
1205 
1206  SmallVector<AllSwitchPaths, 2> ThreadableLoops;
1207  bool MadeChanges = false;
1208 
1209  for (BasicBlock &BB : F) {
1210  auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
1211  if (!SI)
1212  continue;
1213 
1214  LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1215  << " is predictable\n");
1216  MainSwitch Switch(SI, ORE);
1217 
1218  if (!Switch.getInstr())
1219  continue;
1220 
1221  LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1222  << "candidate for jump threading\n");
1223  LLVM_DEBUG(SI->dump());
1224 
1225  unfoldSelectInstrs(DT, Switch.getSelectInsts());
1226  if (!Switch.getSelectInsts().empty())
1227  MadeChanges = true;
1228 
1229  AllSwitchPaths SwitchPaths(&Switch, ORE);
1230  SwitchPaths.run();
1231 
1232  if (SwitchPaths.getNumThreadingPaths() > 0) {
1233  ThreadableLoops.push_back(SwitchPaths);
1234 
1235  // For the time being limit this optimization to occurring once in a
1236  // function since it can change the CFG significantly. This is not a
1237  // strict requirement but it can cause buggy behavior if there is an
1238  // overlap of blocks in different opportunities. There is a lot of room to
1239  // experiment with catching more opportunities here.
1240  break;
1241  }
1242  }
1243 
1245  if (ThreadableLoops.size() > 0)
1246  CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
1247 
1248  for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1249  TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1250  Transform.run();
1251  MadeChanges = true;
1252  }
1253 
1254 #ifdef EXPENSIVE_CHECKS
1256  verifyFunction(F, &dbgs());
1257 #endif
1258 
1259  return MadeChanges;
1260 }
1261 
1262 } // end anonymous namespace
1263 
1264 /// Integrate with the new Pass Manager
1271 
1272  if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
1273  return PreservedAnalyses::all();
1274 
1275  PreservedAnalyses PA;
1277  return PA;
1278 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:217
ValueMapper.h
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2719
llvm::ARM::PredBlockMask::TT
@ TT
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::Function
Definition: Function.h:62
llvm::SSAUpdaterBulk::AddUse
void AddUse(unsigned Var, Use *U)
Record a use of the symbolic value.
Definition: SSAUpdaterBulk.cpp:61
Pass.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:30
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:113
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::verifyFunction
bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
Definition: Verifier.cpp:5817
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AssumeInst
This represents the llvm.assume intrinsic.
Definition: IntrinsicInst.h:1319
APInt.h
llvm::coro::ABI::Switch
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
DenseMap.h
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:298
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::SmallPtrSet< const BasicBlock *, 8 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SelectInst::getFalseValue
const Value * getFalseValue() const
Definition: Instructions.h:1787
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::JumpTable::Full
@ Full
Definition: TargetOptions.h:50
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2732
ClViewCfgBefore
static cl::opt< bool > ClViewCfgBefore("dfa-jump-view-cfg-before", cl::desc("View the CFG before DFA Jump Threading"), cl::Hidden, cl::init(false))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DepthFirstIterator.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:253
llvm::createDFAJumpThreadingPass
FunctionPass * createDFAJumpThreadingPass()
Definition: DFAJumpThreading.cpp:206
SSAUpdaterBulk.h
Constants.h
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
paths
Reduce control height in the hot paths
Definition: ControlHeightReduction.cpp:138
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::SSAUpdaterBulk
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdaterBulk.h:39
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2818
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2725
llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:72
LoopIterator.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
jump
The object format emitted by the WebAssembly backed is documented see the home and packaging for producing WebAssembly applications that can run in browsers and other environments wasi sdk provides a more minimal C C SDK based on llvm and a libc based on for producing WebAssemmbly applictions that use the WASI ABI Rust provides WebAssembly support integrated into Cargo There are two main which provides a relatively minimal environment that has an emphasis on being native wasm32 unknown which uses Emscripten internally and provides standard C C filesystem GL and SDL bindings For more and br_table instructions can support having a value on the value stack across the jump(sometimes). We should(a) model this
llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:43
llvm::cl::opt< bool >
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
CostThreshold
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1571
llvm::DFAJumpThreadingPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Integrate with the new Pass Manager.
Definition: DFAJumpThreading.cpp:1265
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3124
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::SelectInst::getTrueValue
const Value * getTrueValue() const
Definition: Instructions.h:1786
Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:53
llvm::pdb::PDB_MemoryType::Stack
@ Stack
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1738
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3146
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::BasicBlock::moveAfter
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:142
llvm::PHINode::blocks
iterator_range< block_iterator > blocks()
Definition: Instructions.h:2711
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:789
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:867
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
DEBUG_TYPE
#define DEBUG_TYPE
Definition: DFAJumpThreading.cpp:89
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
Threading
dfa jump DFA Jump Threading
Definition: DFAJumpThreading.cpp:203
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1578
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:90
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
Verifier.h
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::SSAUpdaterBulk::AddVariable
unsigned AddVariable(StringRef Name, Type *Ty)
Add a new variable to the SSA rewriter.
Definition: SSAUpdaterBulk.cpp:40
MaxPathLength
static cl::opt< unsigned > MaxPathLength("dfa-max-path-length", cl::desc("Max number of blocks searched to find a threading path"), cl::Hidden, cl::init(20))
llvm::APInt::ceilLogBase2
unsigned ceilLogBase2() const
Definition: APInt.h:1646
llvm::SSAUpdaterBulk::AddAvailableValue
void AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdaterBulk.cpp:51
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:229
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:212
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2749
llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:802
TargetTransformInfo.h
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::PHINode
Definition: Instructions.h:2633
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:325
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
threading
dfa jump threading
Definition: DFAJumpThreading.cpp:202
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::cl::desc
Definition: CommandLine.h:412
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
BasicBlockUtils.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::SSAUpdaterBulk::RewriteAllUses
void RewriteAllUses(DominatorTree *DT, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Perform all the necessary updates, including new PHI-nodes insertion and the requested uses update.
Definition: SSAUpdaterBulk.cpp:122
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading", "DFA Jump Threading", false, false) INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
DFAJumpThreading.h