LLVM  6.0.0svn
JumpThreading.cpp
Go to the documentation of this file.
1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Jump Threading pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/CFG.h"
30 #include "llvm/Analysis/Loads.h"
31 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CFG.h"
36 #include "llvm/IR/Constant.h"
37 #include "llvm/IR/ConstantRange.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/PassManager.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Use.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstddef>
73 #include <cstdint>
74 #include <iterator>
75 #include <memory>
76 #include <utility>
77 
78 using namespace llvm;
79 using namespace jumpthreading;
80 
81 #define DEBUG_TYPE "jump-threading"
82 
83 STATISTIC(NumThreads, "Number of jumps threaded");
84 STATISTIC(NumFolds, "Number of terminators folded");
85 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86 
87 static cl::opt<unsigned>
88 BBDuplicateThreshold("jump-threading-threshold",
89  cl::desc("Max block size to duplicate for jump threading"),
90  cl::init(6), cl::Hidden);
91 
92 static cl::opt<unsigned>
94  "jump-threading-implication-search-threshold",
95  cl::desc("The number of predecessors to search for a stronger "
96  "condition to use to thread over a weaker condition"),
97  cl::init(3), cl::Hidden);
98 
100  "print-lvi-after-jump-threading",
101  cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
102  cl::Hidden);
103 
104 namespace {
105 
106  /// This pass performs 'jump threading', which looks at blocks that have
107  /// multiple predecessors and multiple successors. If one or more of the
108  /// predecessors of the block can be proven to always jump to one of the
109  /// successors, we forward the edge from the predecessor to the successor by
110  /// duplicating the contents of this block.
111  ///
112  /// An example of when this can occur is code like this:
113  ///
114  /// if () { ...
115  /// X = 4;
116  /// }
117  /// if (X < 3) {
118  ///
119  /// In this case, the unconditional branch at the end of the first if can be
120  /// revectored to the false side of the second if.
121  class JumpThreading : public FunctionPass {
122  JumpThreadingPass Impl;
123 
124  public:
125  static char ID; // Pass identification
126 
127  JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
129  }
130 
131  bool runOnFunction(Function &F) override;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
140  }
141 
142  void releaseMemory() override { Impl.releaseMemory(); }
143  };
144 
145 } // end anonymous namespace
146 
147 char JumpThreading::ID = 0;
148 
149 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
150  "Jump Threading", false, false)
154 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
155  "Jump Threading", false, false)
156 
157 // Public interface to the Jump Threading pass
159  return new JumpThreading(Threshold);
160 }
161 
163  BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
164 }
165 
166 // Update branch probability information according to conditional
167 // branch probablity. This is usually made possible for cloned branches
168 // in inline instances by the context specific profile in the caller.
169 // For instance,
170 //
171 // [Block PredBB]
172 // [Branch PredBr]
173 // if (t) {
174 // Block A;
175 // } else {
176 // Block B;
177 // }
178 //
179 // [Block BB]
180 // cond = PN([true, %A], [..., %B]); // PHI node
181 // [Branch CondBr]
182 // if (cond) {
183 // ... // P(cond == true) = 1%
184 // }
185 //
186 // Here we know that when block A is taken, cond must be true, which means
187 // P(cond == true | A) = 1
188 //
189 // Given that P(cond == true) = P(cond == true | A) * P(A) +
190 // P(cond == true | B) * P(B)
191 // we get:
192 // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
193 //
194 // which gives us:
195 // P(A) is less than P(cond == true), i.e.
196 // P(t == true) <= P(cond == true)
197 //
198 // In other words, if we know P(cond == true) is unlikely, we know
199 // that P(t == true) is also unlikely.
200 //
202  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
203  if (!CondBr)
204  return;
205 
207  uint64_t TrueWeight, FalseWeight;
208  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
209  return;
210 
211  // Returns the outgoing edge of the dominating predecessor block
212  // that leads to the PhiNode's incoming block:
213  auto GetPredOutEdge =
214  [](BasicBlock *IncomingBB,
215  BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
216  auto *PredBB = IncomingBB;
217  auto *SuccBB = PhiBB;
218  while (true) {
219  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
220  if (PredBr && PredBr->isConditional())
221  return {PredBB, SuccBB};
222  auto *SinglePredBB = PredBB->getSinglePredecessor();
223  if (!SinglePredBB)
224  return {nullptr, nullptr};
225  SuccBB = PredBB;
226  PredBB = SinglePredBB;
227  }
228  };
229 
230  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
231  Value *PhiOpnd = PN->getIncomingValue(i);
232  ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
233 
234  if (!CI || !CI->getType()->isIntegerTy(1))
235  continue;
236 
238  TrueWeight, TrueWeight + FalseWeight)
240  FalseWeight, TrueWeight + FalseWeight));
241 
242  auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
243  if (!PredOutEdge.first)
244  return;
245 
246  BasicBlock *PredBB = PredOutEdge.first;
247  BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
248 
249  uint64_t PredTrueWeight, PredFalseWeight;
250  // FIXME: We currently only set the profile data when it is missing.
251  // With PGO, this can be used to refine even existing profile data with
252  // context information. This needs to be done after more performance
253  // testing.
254  if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
255  continue;
256 
257  // We can not infer anything useful when BP >= 50%, because BP is the
258  // upper bound probability value.
259  if (BP >= BranchProbability(50, 100))
260  continue;
261 
262  SmallVector<uint32_t, 2> Weights;
263  if (PredBr->getSuccessor(0) == PredOutEdge.second) {
264  Weights.push_back(BP.getNumerator());
265  Weights.push_back(BP.getCompl().getNumerator());
266  } else {
267  Weights.push_back(BP.getCompl().getNumerator());
268  Weights.push_back(BP.getNumerator());
269  }
271  MDBuilder(PredBr->getParent()->getContext())
272  .createBranchWeights(Weights));
273  }
274 }
275 
276 /// runOnFunction - Toplevel algorithm.
278  if (skipFunction(F))
279  return false;
280  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
281  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
282  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
283  std::unique_ptr<BlockFrequencyInfo> BFI;
284  std::unique_ptr<BranchProbabilityInfo> BPI;
285  bool HasProfileData = F.getEntryCount().hasValue();
286  if (HasProfileData) {
287  LoopInfo LI{DominatorTree(F)};
288  BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
289  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
290  }
291 
292  bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
293  std::move(BPI));
295  dbgs() << "LVI for function '" << F.getName() << "':\n";
296  LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
297  dbgs());
298  }
299  return Changed;
300 }
301 
304  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
305  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
306  auto &AA = AM.getResult<AAManager>(F);
307 
308  std::unique_ptr<BlockFrequencyInfo> BFI;
309  std::unique_ptr<BranchProbabilityInfo> BPI;
310  bool HasProfileData = F.getEntryCount().hasValue();
311  if (HasProfileData) {
312  LoopInfo LI{DominatorTree(F)};
313  BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
314  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
315  }
316 
317  bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI),
318  std::move(BPI));
319 
320  if (!Changed)
321  return PreservedAnalyses::all();
323  PA.preserve<GlobalsAA>();
324  return PA;
325 }
326 
328  LazyValueInfo *LVI_, AliasAnalysis *AA_,
329  bool HasProfileData_,
330  std::unique_ptr<BlockFrequencyInfo> BFI_,
331  std::unique_ptr<BranchProbabilityInfo> BPI_) {
332  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
333  TLI = TLI_;
334  LVI = LVI_;
335  AA = AA_;
336  BFI.reset();
337  BPI.reset();
338  // When profile data is available, we need to update edge weights after
339  // successful jump threading, which requires both BPI and BFI being available.
340  HasProfileData = HasProfileData_;
341  auto *GuardDecl = F.getParent()->getFunction(
342  Intrinsic::getName(Intrinsic::experimental_guard));
343  HasGuards = GuardDecl && !GuardDecl->use_empty();
344  if (HasProfileData) {
345  BPI = std::move(BPI_);
346  BFI = std::move(BFI_);
347  }
348 
349  // Remove unreachable blocks from function as they may result in infinite
350  // loop. We do threading if we found something profitable. Jump threading a
351  // branch can create other opportunities. If these opportunities form a cycle
352  // i.e. if any jump threading is undoing previous threading in the path, then
353  // we will loop forever. We take care of this issue by not jump threading for
354  // back edges. This works for normal cases but not for unreachable blocks as
355  // they may have cycle with no back edge.
356  bool EverChanged = false;
357  EverChanged |= removeUnreachableBlocks(F, LVI);
358 
359  FindLoopHeaders(F);
360 
361  bool Changed;
362  do {
363  Changed = false;
364  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
365  BasicBlock *BB = &*I;
366  // Thread all of the branches we can over this block.
367  while (ProcessBlock(BB))
368  Changed = true;
369 
370  ++I;
371 
372  // If the block is trivially dead, zap it. This eliminates the successor
373  // edges which simplifies the CFG.
374  if (pred_empty(BB) &&
375  BB != &BB->getParent()->getEntryBlock()) {
376  DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
377  << "' with terminator: " << *BB->getTerminator() << '\n');
378  LoopHeaders.erase(BB);
379  LVI->eraseBlock(BB);
380  DeleteDeadBlock(BB);
381  Changed = true;
382  continue;
383  }
384 
386 
387  // Can't thread an unconditional jump, but if the block is "almost
388  // empty", we can replace uses of it with uses of the successor and make
389  // this dead.
390  // We should not eliminate the loop header or latch either, because
391  // eliminating a loop header or latch might later prevent LoopSimplify
392  // from transforming nested loops into simplified form. We will rely on
393  // later passes in backend to clean up empty blocks.
394  if (BI && BI->isUnconditional() &&
395  BB != &BB->getParent()->getEntryBlock() &&
396  // If the terminator is the only non-phi instruction, try to nuke it.
397  BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
398  !LoopHeaders.count(BI->getSuccessor(0))) {
399  // FIXME: It is always conservatively correct to drop the info
400  // for a block even if it doesn't get erased. This isn't totally
401  // awesome, but it allows us to use AssertingVH to prevent nasty
402  // dangling pointer issues within LazyValueInfo.
403  LVI->eraseBlock(BB);
405  Changed = true;
406  }
407  }
408  EverChanged |= Changed;
409  } while (Changed);
410 
411  LoopHeaders.clear();
412  return EverChanged;
413 }
414 
415 // Replace uses of Cond with ToVal when safe to do so. If all uses are
416 // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
417 // because we may incorrectly replace uses when guards/assumes are uses of
418 // of `Cond` and we used the guards/assume to reason about the `Cond` value
419 // at the end of block. RAUW unconditionally replaces all uses
420 // including the guards/assumes themselves and the uses before the
421 // guard/assume.
422 static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
423  assert(Cond->getType() == ToVal->getType());
424  auto *BB = Cond->getParent();
425  // We can unconditionally replace all uses in non-local blocks (i.e. uses
426  // strictly dominated by BB), since LVI information is true from the
427  // terminator of BB.
428  replaceNonLocalUsesWith(Cond, ToVal);
429  for (Instruction &I : reverse(*BB)) {
430  // Reached the Cond whose uses we are trying to replace, so there are no
431  // more uses.
432  if (&I == Cond)
433  break;
434  // We only replace uses in instructions that are guaranteed to reach the end
435  // of BB, where we know Cond is ToVal.
437  break;
438  I.replaceUsesOfWith(Cond, ToVal);
439  }
440  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
441  Cond->eraseFromParent();
442 }
443 
444 /// Return the cost of duplicating a piece of this block from first non-phi
445 /// and before StopAt instruction to thread across it. Stop scanning the block
446 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
448  Instruction *StopAt,
449  unsigned Threshold) {
450  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
451  /// Ignore PHI nodes, these will be flattened when duplication happens.
453 
454  // FIXME: THREADING will delete values that are just used to compute the
455  // branch, so they shouldn't count against the duplication cost.
456 
457  unsigned Bonus = 0;
458  if (BB->getTerminator() == StopAt) {
459  // Threading through a switch statement is particularly profitable. If this
460  // block ends in a switch, decrease its cost to make it more likely to
461  // happen.
462  if (isa<SwitchInst>(StopAt))
463  Bonus = 6;
464 
465  // The same holds for indirect branches, but slightly more so.
466  if (isa<IndirectBrInst>(StopAt))
467  Bonus = 8;
468  }
469 
470  // Bump the threshold up so the early exit from the loop doesn't skip the
471  // terminator-based Size adjustment at the end.
472  Threshold += Bonus;
473 
474  // Sum up the cost of each instruction until we get to the terminator. Don't
475  // include the terminator because the copy won't include it.
476  unsigned Size = 0;
477  for (; &*I != StopAt; ++I) {
478 
479  // Stop scanning the block if we've reached the threshold.
480  if (Size > Threshold)
481  return Size;
482 
483  // Debugger intrinsics don't incur code size.
484  if (isa<DbgInfoIntrinsic>(I)) continue;
485 
486  // If this is a pointer->pointer bitcast, it is free.
487  if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
488  continue;
489 
490  // Bail out if this instruction gives back a token type, it is not possible
491  // to duplicate it if it is used outside this BB.
492  if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
493  return ~0U;
494 
495  // All other instructions count for at least one unit.
496  ++Size;
497 
498  // Calls are more expensive. If they are non-intrinsic calls, we model them
499  // as having cost of 4. If they are a non-vector intrinsic, we model them
500  // as having cost of 2 total, and if they are a vector intrinsic, we model
501  // them as having cost 1.
502  if (const CallInst *CI = dyn_cast<CallInst>(I)) {
503  if (CI->cannotDuplicate() || CI->isConvergent())
504  // Blocks with NoDuplicate are modelled as having infinite cost, so they
505  // are never duplicated.
506  return ~0U;
507  else if (!isa<IntrinsicInst>(CI))
508  Size += 3;
509  else if (!CI->getType()->isVectorTy())
510  Size += 1;
511  }
512  }
513 
514  return Size > Bonus ? Size - Bonus : 0;
515 }
516 
517 /// FindLoopHeaders - We do not want jump threading to turn proper loop
518 /// structures into irreducible loops. Doing this breaks up the loop nesting
519 /// hierarchy and pessimizes later transformations. To prevent this from
520 /// happening, we first have to find the loop headers. Here we approximate this
521 /// by finding targets of backedges in the CFG.
522 ///
523 /// Note that there definitely are cases when we want to allow threading of
524 /// edges across a loop header. For example, threading a jump from outside the
525 /// loop (the preheader) to an exit block of the loop is definitely profitable.
526 /// It is also almost always profitable to thread backedges from within the loop
527 /// to exit blocks, and is often profitable to thread backedges to other blocks
528 /// within the loop (forming a nested loop). This simple analysis is not rich
529 /// enough to track all of these properties and keep it up-to-date as the CFG
530 /// mutates, so we don't allow any of these transformations.
533  FindFunctionBackedges(F, Edges);
534 
535  for (const auto &Edge : Edges)
536  LoopHeaders.insert(Edge.second);
537 }
538 
539 /// getKnownConstant - Helper method to determine if we can thread over a
540 /// terminator with the given value as its condition, and if so what value to
541 /// use for that. What kind of value this is depends on whether we want an
542 /// integer or a block address, but an undef is always accepted.
543 /// Returns null if Val is null or not an appropriate constant.
545  if (!Val)
546  return nullptr;
547 
548  // Undef is "known" enough.
549  if (UndefValue *U = dyn_cast<UndefValue>(Val))
550  return U;
551 
552  if (Preference == WantBlockAddress)
553  return dyn_cast<BlockAddress>(Val->stripPointerCasts());
554 
555  return dyn_cast<ConstantInt>(Val);
556 }
557 
558 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
559 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
560 /// in any of our predecessors. If so, return the known list of value and pred
561 /// BB in the result vector.
562 ///
563 /// This returns true if there were any known values.
565  Value *V, BasicBlock *BB, PredValueInfo &Result,
567  // This method walks up use-def chains recursively. Because of this, we could
568  // get into an infinite loop going around loops in the use-def chain. To
569  // prevent this, keep track of what (value, block) pairs we've already visited
570  // and terminate the search if we loop back to them
571  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
572  return false;
573 
574  // An RAII help to remove this pair from the recursion set once the recursion
575  // stack pops back out again.
576  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
577 
578  // If V is a constant, then it is known in all predecessors.
579  if (Constant *KC = getKnownConstant(V, Preference)) {
580  for (BasicBlock *Pred : predecessors(BB))
581  Result.push_back(std::make_pair(KC, Pred));
582 
583  return !Result.empty();
584  }
585 
586  // If V is a non-instruction value, or an instruction in a different block,
587  // then it can't be derived from a PHI.
589  if (!I || I->getParent() != BB) {
590 
591  // Okay, if this is a live-in value, see if it has a known value at the end
592  // of any of our predecessors.
593  //
594  // FIXME: This should be an edge property, not a block end property.
595  /// TODO: Per PR2563, we could infer value range information about a
596  /// predecessor based on its terminator.
597  //
598  // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
599  // "I" is a non-local compare-with-a-constant instruction. This would be
600  // able to handle value inequalities better, for example if the compare is
601  // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
602  // Perhaps getConstantOnEdge should be smart enough to do this?
603 
604  for (BasicBlock *P : predecessors(BB)) {
605  // If the value is known by LazyValueInfo to be a constant in a
606  // predecessor, use that information to try to thread this block.
607  Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
608  if (Constant *KC = getKnownConstant(PredCst, Preference))
609  Result.push_back(std::make_pair(KC, P));
610  }
611 
612  return !Result.empty();
613  }
614 
615  /// If I is a PHI node, then we know the incoming values for any constants.
616  if (PHINode *PN = dyn_cast<PHINode>(I)) {
617  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
618  Value *InVal = PN->getIncomingValue(i);
619  if (Constant *KC = getKnownConstant(InVal, Preference)) {
620  Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
621  } else {
622  Constant *CI = LVI->getConstantOnEdge(InVal,
623  PN->getIncomingBlock(i),
624  BB, CxtI);
625  if (Constant *KC = getKnownConstant(CI, Preference))
626  Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
627  }
628  }
629 
630  return !Result.empty();
631  }
632 
633  // Handle Cast instructions. Only see through Cast when the source operand is
634  // PHI or Cmp and the source type is i1 to save the compilation time.
635  if (CastInst *CI = dyn_cast<CastInst>(I)) {
636  Value *Source = CI->getOperand(0);
637  if (!Source->getType()->isIntegerTy(1))
638  return false;
639  if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
640  return false;
641  ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
642  if (Result.empty())
643  return false;
644 
645  // Convert the known values.
646  for (auto &R : Result)
647  R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
648 
649  return true;
650  }
651 
652  // Handle some boolean conditions.
653  if (I->getType()->getPrimitiveSizeInBits() == 1) {
654  assert(Preference == WantInteger && "One-bit non-integer type?");
655  // X | true -> true
656  // X & false -> false
657  if (I->getOpcode() == Instruction::Or ||
658  I->getOpcode() == Instruction::And) {
659  PredValueInfoTy LHSVals, RHSVals;
660 
661  ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
662  WantInteger, CxtI);
663  ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
664  WantInteger, CxtI);
665 
666  if (LHSVals.empty() && RHSVals.empty())
667  return false;
668 
669  ConstantInt *InterestingVal;
670  if (I->getOpcode() == Instruction::Or)
671  InterestingVal = ConstantInt::getTrue(I->getContext());
672  else
673  InterestingVal = ConstantInt::getFalse(I->getContext());
674 
675  SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
676 
677  // Scan for the sentinel. If we find an undef, force it to the
678  // interesting value: x|undef -> true and x&undef -> false.
679  for (const auto &LHSVal : LHSVals)
680  if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
681  Result.emplace_back(InterestingVal, LHSVal.second);
682  LHSKnownBBs.insert(LHSVal.second);
683  }
684  for (const auto &RHSVal : RHSVals)
685  if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
686  // If we already inferred a value for this block on the LHS, don't
687  // re-add it.
688  if (!LHSKnownBBs.count(RHSVal.second))
689  Result.emplace_back(InterestingVal, RHSVal.second);
690  }
691 
692  return !Result.empty();
693  }
694 
695  // Handle the NOT form of XOR.
696  if (I->getOpcode() == Instruction::Xor &&
697  isa<ConstantInt>(I->getOperand(1)) &&
698  cast<ConstantInt>(I->getOperand(1))->isOne()) {
699  ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
700  WantInteger, CxtI);
701  if (Result.empty())
702  return false;
703 
704  // Invert the known values.
705  for (auto &R : Result)
706  R.first = ConstantExpr::getNot(R.first);
707 
708  return true;
709  }
710 
711  // Try to simplify some other binary operator values.
712  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
713  assert(Preference != WantBlockAddress
714  && "A binary operator creating a block address?");
715  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
716  PredValueInfoTy LHSVals;
717  ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
718  WantInteger, CxtI);
719 
720  // Try to use constant folding to simplify the binary operator.
721  for (const auto &LHSVal : LHSVals) {
722  Constant *V = LHSVal.first;
723  Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
724 
725  if (Constant *KC = getKnownConstant(Folded, WantInteger))
726  Result.push_back(std::make_pair(KC, LHSVal.second));
727  }
728  }
729 
730  return !Result.empty();
731  }
732 
733  // Handle compare with phi operand, where the PHI is defined in this block.
734  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
735  assert(Preference == WantInteger && "Compares only produce integers");
736  Type *CmpType = Cmp->getType();
737  Value *CmpLHS = Cmp->getOperand(0);
738  Value *CmpRHS = Cmp->getOperand(1);
739  CmpInst::Predicate Pred = Cmp->getPredicate();
740 
741  PHINode *PN = dyn_cast<PHINode>(CmpLHS);
742  if (PN && PN->getParent() == BB) {
743  const DataLayout &DL = PN->getModule()->getDataLayout();
744  // We can do this simplification if any comparisons fold to true or false.
745  // See if any do.
746  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
747  BasicBlock *PredBB = PN->getIncomingBlock(i);
748  Value *LHS = PN->getIncomingValue(i);
749  Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
750 
751  Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
752  if (!Res) {
753  if (!isa<Constant>(RHS))
754  continue;
755 
757  ResT = LVI->getPredicateOnEdge(Pred, LHS,
758  cast<Constant>(RHS), PredBB, BB,
759  CxtI ? CxtI : Cmp);
760  if (ResT == LazyValueInfo::Unknown)
761  continue;
762  Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
763  }
764 
765  if (Constant *KC = getKnownConstant(Res, WantInteger))
766  Result.push_back(std::make_pair(KC, PredBB));
767  }
768 
769  return !Result.empty();
770  }
771 
772  // If comparing a live-in value against a constant, see if we know the
773  // live-in value on any predecessors.
774  if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
775  Constant *CmpConst = cast<Constant>(CmpRHS);
776 
777  if (!isa<Instruction>(CmpLHS) ||
778  cast<Instruction>(CmpLHS)->getParent() != BB) {
779  for (BasicBlock *P : predecessors(BB)) {
780  // If the value is known by LazyValueInfo to be a constant in a
781  // predecessor, use that information to try to thread this block.
783  LVI->getPredicateOnEdge(Pred, CmpLHS,
784  CmpConst, P, BB, CxtI ? CxtI : Cmp);
785  if (Res == LazyValueInfo::Unknown)
786  continue;
787 
788  Constant *ResC = ConstantInt::get(CmpType, Res);
789  Result.push_back(std::make_pair(ResC, P));
790  }
791 
792  return !Result.empty();
793  }
794 
795  // InstCombine can fold some forms of constant range checks into
796  // (icmp (add (x, C1)), C2). See if we have we have such a thing with
797  // x as a live-in.
798  {
799  using namespace PatternMatch;
800 
801  Value *AddLHS;
802  ConstantInt *AddConst;
803  if (isa<ConstantInt>(CmpConst) &&
804  match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
805  if (!isa<Instruction>(AddLHS) ||
806  cast<Instruction>(AddLHS)->getParent() != BB) {
807  for (BasicBlock *P : predecessors(BB)) {
808  // If the value is known by LazyValueInfo to be a ConstantRange in
809  // a predecessor, use that information to try to thread this
810  // block.
811  ConstantRange CR = LVI->getConstantRangeOnEdge(
812  AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
813  // Propagate the range through the addition.
814  CR = CR.add(AddConst->getValue());
815 
816  // Get the range where the compare returns true.
818  Pred, cast<ConstantInt>(CmpConst)->getValue());
819 
820  Constant *ResC;
821  if (CmpRange.contains(CR))
822  ResC = ConstantInt::getTrue(CmpType);
823  else if (CmpRange.inverse().contains(CR))
824  ResC = ConstantInt::getFalse(CmpType);
825  else
826  continue;
827 
828  Result.push_back(std::make_pair(ResC, P));
829  }
830 
831  return !Result.empty();
832  }
833  }
834  }
835 
836  // Try to find a constant value for the LHS of a comparison,
837  // and evaluate it statically if we can.
838  PredValueInfoTy LHSVals;
839  ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
840  WantInteger, CxtI);
841 
842  for (const auto &LHSVal : LHSVals) {
843  Constant *V = LHSVal.first;
844  Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
845  if (Constant *KC = getKnownConstant(Folded, WantInteger))
846  Result.push_back(std::make_pair(KC, LHSVal.second));
847  }
848 
849  return !Result.empty();
850  }
851  }
852 
853  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
854  // Handle select instructions where at least one operand is a known constant
855  // and we can figure out the condition value for any predecessor block.
856  Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
857  Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
858  PredValueInfoTy Conds;
859  if ((TrueVal || FalseVal) &&
860  ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
861  WantInteger, CxtI)) {
862  for (auto &C : Conds) {
863  Constant *Cond = C.first;
864 
865  // Figure out what value to use for the condition.
866  bool KnownCond;
867  if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
868  // A known boolean.
869  KnownCond = CI->isOne();
870  } else {
871  assert(isa<UndefValue>(Cond) && "Unexpected condition value");
872  // Either operand will do, so be sure to pick the one that's a known
873  // constant.
874  // FIXME: Do this more cleverly if both values are known constants?
875  KnownCond = (TrueVal != nullptr);
876  }
877 
878  // See if the select has a known constant value for this predecessor.
879  if (Constant *Val = KnownCond ? TrueVal : FalseVal)
880  Result.push_back(std::make_pair(Val, C.second));
881  }
882 
883  return !Result.empty();
884  }
885  }
886 
887  // If all else fails, see if LVI can figure out a constant value for us.
888  Constant *CI = LVI->getConstant(V, BB, CxtI);
889  if (Constant *KC = getKnownConstant(CI, Preference)) {
890  for (BasicBlock *Pred : predecessors(BB))
891  Result.push_back(std::make_pair(KC, Pred));
892  }
893 
894  return !Result.empty();
895 }
896 
897 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
898 /// in an undefined jump, decide which block is best to revector to.
899 ///
900 /// Since we can pick an arbitrary destination, we pick the successor with the
901 /// fewest predecessors. This should reduce the in-degree of the others.
902 static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
903  TerminatorInst *BBTerm = BB->getTerminator();
904  unsigned MinSucc = 0;
905  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
906  // Compute the successor with the minimum number of predecessors.
907  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
908  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
909  TestBB = BBTerm->getSuccessor(i);
910  unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
911  if (NumPreds < MinNumPreds) {
912  MinSucc = i;
913  MinNumPreds = NumPreds;
914  }
915  }
916 
917  return MinSucc;
918 }
919 
921  if (!BB->hasAddressTaken()) return false;
922 
923  // If the block has its address taken, it may be a tree of dead constants
924  // hanging off of it. These shouldn't keep the block alive.
927  return !BA->use_empty();
928 }
929 
930 /// ProcessBlock - If there are any predecessors whose control can be threaded
931 /// through to a successor, transform them now.
933  // If the block is trivially dead, just return and let the caller nuke it.
934  // This simplifies other transformations.
935  if (pred_empty(BB) &&
936  BB != &BB->getParent()->getEntryBlock())
937  return false;
938 
939  // If this block has a single predecessor, and if that pred has a single
940  // successor, merge the blocks. This encourages recursive jump threading
941  // because now the condition in this block can be threaded through
942  // predecessors of our predecessor block.
943  if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
944  const TerminatorInst *TI = SinglePred->getTerminator();
945  if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
946  SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
947  // If SinglePred was a loop header, BB becomes one.
948  if (LoopHeaders.erase(SinglePred))
949  LoopHeaders.insert(BB);
950 
951  LVI->eraseBlock(SinglePred);
953 
954  // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
955  // BB code within one basic block `BB`), we need to invalidate the LVI
956  // information associated with BB, because the LVI information need not be
957  // true for all of BB after the merge. For example,
958  // Before the merge, LVI info and code is as follows:
959  // SinglePred: <LVI info1 for %p val>
960  // %y = use of %p
961  // call @exit() // need not transfer execution to successor.
962  // assume(%p) // from this point on %p is true
963  // br label %BB
964  // BB: <LVI info2 for %p val, i.e. %p is true>
965  // %x = use of %p
966  // br label exit
967  //
968  // Note that this LVI info for blocks BB and SinglPred is correct for %p
969  // (info2 and info1 respectively). After the merge and the deletion of the
970  // LVI info1 for SinglePred. We have the following code:
971  // BB: <LVI info2 for %p val>
972  // %y = use of %p
973  // call @exit()
974  // assume(%p)
975  // %x = use of %p <-- LVI info2 is correct from here onwards.
976  // br label exit
977  // LVI info2 for BB is incorrect at the beginning of BB.
978 
979  // Invalidate LVI information for BB if the LVI is not provably true for
980  // all of BB.
981  if (any_of(*BB, [](Instruction &I) {
983  }))
984  LVI->eraseBlock(BB);
985  return true;
986  }
987  }
988 
989  if (TryToUnfoldSelectInCurrBB(BB))
990  return true;
991 
992  // Look if we can propagate guards to predecessors.
993  if (HasGuards && ProcessGuards(BB))
994  return true;
995 
996  // What kind of constant we're looking for.
998 
999  // Look to see if the terminator is a conditional branch, switch or indirect
1000  // branch, if not we can't thread it.
1001  Value *Condition;
1003  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
1004  // Can't thread an unconditional jump.
1005  if (BI->isUnconditional()) return false;
1006  Condition = BI->getCondition();
1007  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
1008  Condition = SI->getCondition();
1009  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
1010  // Can't thread indirect branch with no successors.
1011  if (IB->getNumSuccessors() == 0) return false;
1012  Condition = IB->getAddress()->stripPointerCasts();
1013  Preference = WantBlockAddress;
1014  } else {
1015  return false; // Must be an invoke.
1016  }
1017 
1018  // Run constant folding to see if we can reduce the condition to a simple
1019  // constant.
1020  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1021  Value *SimpleVal =
1023  if (SimpleVal) {
1024  I->replaceAllUsesWith(SimpleVal);
1025  if (isInstructionTriviallyDead(I, TLI))
1026  I->eraseFromParent();
1027  Condition = SimpleVal;
1028  }
1029  }
1030 
1031  // If the terminator is branching on an undef, we can pick any of the
1032  // successors to branch to. Let GetBestDestForJumpOnUndef decide.
1033  if (isa<UndefValue>(Condition)) {
1034  unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
1035 
1036  // Fold the branch/switch.
1037  TerminatorInst *BBTerm = BB->getTerminator();
1038  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1039  if (i == BestSucc) continue;
1040  BBTerm->getSuccessor(i)->removePredecessor(BB, true);
1041  }
1042 
1043  DEBUG(dbgs() << " In block '" << BB->getName()
1044  << "' folding undef terminator: " << *BBTerm << '\n');
1045  BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1046  BBTerm->eraseFromParent();
1047  return true;
1048  }
1049 
1050  // If the terminator of this block is branching on a constant, simplify the
1051  // terminator to an unconditional branch. This can occur due to threading in
1052  // other blocks.
1053  if (getKnownConstant(Condition, Preference)) {
1054  DEBUG(dbgs() << " In block '" << BB->getName()
1055  << "' folding terminator: " << *BB->getTerminator() << '\n');
1056  ++NumFolds;
1057  ConstantFoldTerminator(BB, true);
1058  return true;
1059  }
1060 
1061  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1062 
1063  // All the rest of our checks depend on the condition being an instruction.
1064  if (!CondInst) {
1065  // FIXME: Unify this with code below.
1066  if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
1067  return true;
1068  return false;
1069  }
1070 
1071  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
1072  // If we're branching on a conditional, LVI might be able to determine
1073  // it's value at the branch instruction. We only handle comparisons
1074  // against a constant at this time.
1075  // TODO: This should be extended to handle switches as well.
1076  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
1077  Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
1078  if (CondBr && CondConst) {
1079  // We should have returned as soon as we turn a conditional branch to
1080  // unconditional. Because its no longer interesting as far as jump
1081  // threading is concerned.
1082  assert(CondBr->isConditional() && "Threading on unconditional terminator");
1083 
1085  LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1086  CondConst, CondBr);
1087  if (Ret != LazyValueInfo::Unknown) {
1088  unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
1089  unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
1090  CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
1091  BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
1092  CondBr->eraseFromParent();
1093  if (CondCmp->use_empty())
1094  CondCmp->eraseFromParent();
1095  // We can safely replace *some* uses of the CondInst if it has
1096  // exactly one value as returned by LVI. RAUW is incorrect in the
1097  // presence of guards and assumes, that have the `Cond` as the use. This
1098  // is because we use the guards/assume to reason about the `Cond` value
1099  // at the end of block, but RAUW unconditionally replaces all uses
1100  // including the guards/assumes themselves and the uses before the
1101  // guard/assume.
1102  else if (CondCmp->getParent() == BB) {
1103  auto *CI = Ret == LazyValueInfo::True ?
1104  ConstantInt::getTrue(CondCmp->getType()) :
1105  ConstantInt::getFalse(CondCmp->getType());
1106  ReplaceFoldableUses(CondCmp, CI);
1107  }
1108  return true;
1109  }
1110 
1111  // We did not manage to simplify this branch, try to see whether
1112  // CondCmp depends on a known phi-select pattern.
1113  if (TryToUnfoldSelect(CondCmp, BB))
1114  return true;
1115  }
1116  }
1117 
1118  // Check for some cases that are worth simplifying. Right now we want to look
1119  // for loads that are used by a switch or by the condition for the branch. If
1120  // we see one, check to see if it's partially redundant. If so, insert a PHI
1121  // which can then be used to thread the values.
1122  Value *SimplifyValue = CondInst;
1123  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1124  if (isa<Constant>(CondCmp->getOperand(1)))
1125  SimplifyValue = CondCmp->getOperand(0);
1126 
1127  // TODO: There are other places where load PRE would be profitable, such as
1128  // more complex comparisons.
1129  if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
1130  if (SimplifyPartiallyRedundantLoad(LI))
1131  return true;
1132 
1133  // Before threading, try to propagate profile data backwards:
1134  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1135  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1137 
1138  // Handle a variety of cases where we are branching on something derived from
1139  // a PHI node in the current block. If we can prove that any predecessors
1140  // compute a predictable value based on a PHI node, thread those predecessors.
1141  if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
1142  return true;
1143 
1144  // If this is an otherwise-unfoldable branch on a phi node in the current
1145  // block, see if we can simplify.
1146  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1147  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1148  return ProcessBranchOnPHI(PN);
1149 
1150  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1151  if (CondInst->getOpcode() == Instruction::Xor &&
1152  CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1153  return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
1154 
1155  // Search for a stronger dominating condition that can be used to simplify a
1156  // conditional branch leaving BB.
1157  if (ProcessImpliedCondition(BB))
1158  return true;
1159 
1160  return false;
1161 }
1162 
1164  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1165  if (!BI || !BI->isConditional())
1166  return false;
1167 
1168  Value *Cond = BI->getCondition();
1169  BasicBlock *CurrentBB = BB;
1170  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1171  unsigned Iter = 0;
1172 
1173  auto &DL = BB->getModule()->getDataLayout();
1174 
1175  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1176  auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1177  if (!PBI || !PBI->isConditional())
1178  return false;
1179  if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1180  return false;
1181 
1182  bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1183  Optional<bool> Implication =
1184  isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1185  if (Implication) {
1186  BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB);
1187  BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI);
1188  BI->eraseFromParent();
1189  return true;
1190  }
1191  CurrentBB = CurrentPred;
1192  CurrentPred = CurrentBB->getSinglePredecessor();
1193  }
1194 
1195  return false;
1196 }
1197 
1198 /// Return true if Op is an instruction defined in the given block.
1200  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1201  if (OpInst->getParent() == BB)
1202  return true;
1203  return false;
1204 }
1205 
1206 /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
1207 /// load instruction, eliminate it by replacing it with a PHI node. This is an
1208 /// important optimization that encourages jump threading, and needs to be run
1209 /// interlaced with other jump threading tasks.
1211  // Don't hack volatile and ordered loads.
1212  if (!LI->isUnordered()) return false;
1213 
1214  // If the load is defined in a block with exactly one predecessor, it can't be
1215  // partially redundant.
1216  BasicBlock *LoadBB = LI->getParent();
1217  if (LoadBB->getSinglePredecessor())
1218  return false;
1219 
1220  // If the load is defined in an EH pad, it can't be partially redundant,
1221  // because the edges between the invoke and the EH pad cannot have other
1222  // instructions between them.
1223  if (LoadBB->isEHPad())
1224  return false;
1225 
1226  Value *LoadedPtr = LI->getOperand(0);
1227 
1228  // If the loaded operand is defined in the LoadBB and its not a phi,
1229  // it can't be available in predecessors.
1230  if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1231  return false;
1232 
1233  // Scan a few instructions up from the load, to see if it is obviously live at
1234  // the entry to its block.
1235  BasicBlock::iterator BBIt(LI);
1236  bool IsLoadCSE;
1237  if (Value *AvailableVal = FindAvailableLoadedValue(
1238  LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1239  // If the value of the load is locally available within the block, just use
1240  // it. This frequently occurs for reg2mem'd allocas.
1241 
1242  if (IsLoadCSE) {
1243  LoadInst *NLI = cast<LoadInst>(AvailableVal);
1244  combineMetadataForCSE(NLI, LI);
1245  };
1246 
1247  // If the returned value is the load itself, replace with an undef. This can
1248  // only happen in dead loops.
1249  if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
1250  if (AvailableVal->getType() != LI->getType())
1251  AvailableVal =
1252  CastInst::CreateBitOrPointerCast(AvailableVal, LI->getType(), "", LI);
1253  LI->replaceAllUsesWith(AvailableVal);
1254  LI->eraseFromParent();
1255  return true;
1256  }
1257 
1258  // Otherwise, if we scanned the whole block and got to the top of the block,
1259  // we know the block is locally transparent to the load. If not, something
1260  // might clobber its value.
1261  if (BBIt != LoadBB->begin())
1262  return false;
1263 
1264  // If all of the loads and stores that feed the value have the same AA tags,
1265  // then we can propagate them onto any newly inserted loads.
1266  AAMDNodes AATags;
1267  LI->getAAMetadata(AATags);
1268 
1269  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1270 
1271  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1272 
1273  AvailablePredsTy AvailablePreds;
1274  BasicBlock *OneUnavailablePred = nullptr;
1275  SmallVector<LoadInst*, 8> CSELoads;
1276 
1277  // If we got here, the loaded value is transparent through to the start of the
1278  // block. Check to see if it is available in any of the predecessor blocks.
1279  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1280  // If we already scanned this predecessor, skip it.
1281  if (!PredsScanned.insert(PredBB).second)
1282  continue;
1283 
1284  BBIt = PredBB->end();
1285  unsigned NumScanedInst = 0;
1286  Value *PredAvailable = nullptr;
1287  // NOTE: We don't CSE load that is volatile or anything stronger than
1288  // unordered, that should have been checked when we entered the function.
1289  assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
1290  // If this is a load on a phi pointer, phi-translate it and search
1291  // for available load/store to the pointer in predecessors.
1292  Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
1293  PredAvailable = FindAvailablePtrLoadStore(
1294  Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1295  AA, &IsLoadCSE, &NumScanedInst);
1296 
1297  // If PredBB has a single predecessor, continue scanning through the
1298  // single precessor.
1299  BasicBlock *SinglePredBB = PredBB;
1300  while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1301  NumScanedInst < DefMaxInstsToScan) {
1302  SinglePredBB = SinglePredBB->getSinglePredecessor();
1303  if (SinglePredBB) {
1304  BBIt = SinglePredBB->end();
1305  PredAvailable = FindAvailablePtrLoadStore(
1306  Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
1307  (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1308  &NumScanedInst);
1309  }
1310  }
1311 
1312  if (!PredAvailable) {
1313  OneUnavailablePred = PredBB;
1314  continue;
1315  }
1316 
1317  if (IsLoadCSE)
1318  CSELoads.push_back(cast<LoadInst>(PredAvailable));
1319 
1320  // If so, this load is partially redundant. Remember this info so that we
1321  // can create a PHI node.
1322  AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
1323  }
1324 
1325  // If the loaded value isn't available in any predecessor, it isn't partially
1326  // redundant.
1327  if (AvailablePreds.empty()) return false;
1328 
1329  // Okay, the loaded value is available in at least one (and maybe all!)
1330  // predecessors. If the value is unavailable in more than one unique
1331  // predecessor, we want to insert a merge block for those common predecessors.
1332  // This ensures that we only have to insert one reload, thus not increasing
1333  // code size.
1334  BasicBlock *UnavailablePred = nullptr;
1335 
1336  // If there is exactly one predecessor where the value is unavailable, the
1337  // already computed 'OneUnavailablePred' block is it. If it ends in an
1338  // unconditional branch, we know that it isn't a critical edge.
1339  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1340  OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1341  UnavailablePred = OneUnavailablePred;
1342  } else if (PredsScanned.size() != AvailablePreds.size()) {
1343  // Otherwise, we had multiple unavailable predecessors or we had a critical
1344  // edge from the one.
1345  SmallVector<BasicBlock*, 8> PredsToSplit;
1346  SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1347 
1348  for (const auto &AvailablePred : AvailablePreds)
1349  AvailablePredSet.insert(AvailablePred.first);
1350 
1351  // Add all the unavailable predecessors to the PredsToSplit list.
1352  for (BasicBlock *P : predecessors(LoadBB)) {
1353  // If the predecessor is an indirect goto, we can't split the edge.
1354  if (isa<IndirectBrInst>(P->getTerminator()))
1355  return false;
1356 
1357  if (!AvailablePredSet.count(P))
1358  PredsToSplit.push_back(P);
1359  }
1360 
1361  // Split them out to their own block.
1362  UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1363  }
1364 
1365  // If the value isn't available in all predecessors, then there will be
1366  // exactly one where it isn't available. Insert a load on that edge and add
1367  // it to the AvailablePreds list.
1368  if (UnavailablePred) {
1369  assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1370  "Can't handle critical edge here!");
1371  LoadInst *NewVal = new LoadInst(
1372  LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1373  LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
1374  LI->getSyncScopeID(), UnavailablePred->getTerminator());
1375  NewVal->setDebugLoc(LI->getDebugLoc());
1376  if (AATags)
1377  NewVal->setAAMetadata(AATags);
1378 
1379  AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
1380  }
1381 
1382  // Now we know that each predecessor of this block has a value in
1383  // AvailablePreds, sort them for efficient access as we're walking the preds.
1384  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1385 
1386  // Create a PHI node at the start of the block for the PRE'd load value.
1387  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1388  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
1389  &LoadBB->front());
1390  PN->takeName(LI);
1391  PN->setDebugLoc(LI->getDebugLoc());
1392 
1393  // Insert new entries into the PHI for each predecessor. A single block may
1394  // have multiple entries here.
1395  for (pred_iterator PI = PB; PI != PE; ++PI) {
1396  BasicBlock *P = *PI;
1397  AvailablePredsTy::iterator I =
1398  std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
1399  std::make_pair(P, (Value*)nullptr));
1400 
1401  assert(I != AvailablePreds.end() && I->first == P &&
1402  "Didn't find entry for predecessor!");
1403 
1404  // If we have an available predecessor but it requires casting, insert the
1405  // cast in the predecessor and use the cast. Note that we have to update the
1406  // AvailablePreds vector as we go so that all of the PHI entries for this
1407  // predecessor use the same bitcast.
1408  Value *&PredV = I->second;
1409  if (PredV->getType() != LI->getType())
1410  PredV = CastInst::CreateBitOrPointerCast(PredV, LI->getType(), "",
1411  P->getTerminator());
1412 
1413  PN->addIncoming(PredV, I->first);
1414  }
1415 
1416  for (LoadInst *PredLI : CSELoads) {
1417  combineMetadataForCSE(PredLI, LI);
1418  }
1419 
1420  LI->replaceAllUsesWith(PN);
1421  LI->eraseFromParent();
1422 
1423  return true;
1424 }
1425 
1426 /// FindMostPopularDest - The specified list contains multiple possible
1427 /// threadable destinations. Pick the one that occurs the most frequently in
1428 /// the list.
1429 static BasicBlock *
1431  const SmallVectorImpl<std::pair<BasicBlock *,
1432  BasicBlock *>> &PredToDestList) {
1433  assert(!PredToDestList.empty());
1434 
1435  // Determine popularity. If there are multiple possible destinations, we
1436  // explicitly choose to ignore 'undef' destinations. We prefer to thread
1437  // blocks with known and real destinations to threading undef. We'll handle
1438  // them later if interesting.
1439  DenseMap<BasicBlock*, unsigned> DestPopularity;
1440  for (const auto &PredToDest : PredToDestList)
1441  if (PredToDest.second)
1442  DestPopularity[PredToDest.second]++;
1443 
1444  // Find the most popular dest.
1445  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
1446  BasicBlock *MostPopularDest = DPI->first;
1447  unsigned Popularity = DPI->second;
1448  SmallVector<BasicBlock*, 4> SamePopularity;
1449 
1450  for (++DPI; DPI != DestPopularity.end(); ++DPI) {
1451  // If the popularity of this entry isn't higher than the popularity we've
1452  // seen so far, ignore it.
1453  if (DPI->second < Popularity)
1454  ; // ignore.
1455  else if (DPI->second == Popularity) {
1456  // If it is the same as what we've seen so far, keep track of it.
1457  SamePopularity.push_back(DPI->first);
1458  } else {
1459  // If it is more popular, remember it.
1460  SamePopularity.clear();
1461  MostPopularDest = DPI->first;
1462  Popularity = DPI->second;
1463  }
1464  }
1465 
1466  // Okay, now we know the most popular destination. If there is more than one
1467  // destination, we need to determine one. This is arbitrary, but we need
1468  // to make a deterministic decision. Pick the first one that appears in the
1469  // successor list.
1470  if (!SamePopularity.empty()) {
1471  SamePopularity.push_back(MostPopularDest);
1472  TerminatorInst *TI = BB->getTerminator();
1473  for (unsigned i = 0; ; ++i) {
1474  assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
1475 
1476  if (!is_contained(SamePopularity, TI->getSuccessor(i)))
1477  continue;
1478 
1479  MostPopularDest = TI->getSuccessor(i);
1480  break;
1481  }
1482  }
1483 
1484  // Okay, we have finally picked the most popular destination.
1485  return MostPopularDest;
1486 }
1487 
1490  Instruction *CxtI) {
1491  // If threading this would thread across a loop header, don't even try to
1492  // thread the edge.
1493  if (LoopHeaders.count(BB))
1494  return false;
1495 
1496  PredValueInfoTy PredValues;
1497  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
1498  return false;
1499 
1500  assert(!PredValues.empty() &&
1501  "ComputeValueKnownInPredecessors returned true with no values");
1502 
1503  DEBUG(dbgs() << "IN BB: " << *BB;
1504  for (const auto &PredValue : PredValues) {
1505  dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
1506  << *PredValue.first
1507  << " for pred '" << PredValue.second->getName() << "'.\n";
1508  });
1509 
1510  // Decide what we want to thread through. Convert our list of known values to
1511  // a list of known destinations for each pred. This also discards duplicate
1512  // predecessors and keeps track of the undefined inputs (which are represented
1513  // as a null dest in the PredToDestList).
1514  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1516 
1517  BasicBlock *OnlyDest = nullptr;
1518  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1519  Constant *OnlyVal = nullptr;
1520  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1521 
1522  unsigned PredWithKnownDest = 0;
1523  for (const auto &PredValue : PredValues) {
1524  BasicBlock *Pred = PredValue.second;
1525  if (!SeenPreds.insert(Pred).second)
1526  continue; // Duplicate predecessor entry.
1527 
1528  Constant *Val = PredValue.first;
1529 
1530  BasicBlock *DestBB;
1531  if (isa<UndefValue>(Val))
1532  DestBB = nullptr;
1533  else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1534  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1535  DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1536  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1537  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1538  DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1539  } else {
1540  assert(isa<IndirectBrInst>(BB->getTerminator())
1541  && "Unexpected terminator");
1542  assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1543  DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1544  }
1545 
1546  // If we have exactly one destination, remember it for efficiency below.
1547  if (PredToDestList.empty()) {
1548  OnlyDest = DestBB;
1549  OnlyVal = Val;
1550  } else {
1551  if (OnlyDest != DestBB)
1552  OnlyDest = MultipleDestSentinel;
1553  // It possible we have same destination, but different value, e.g. default
1554  // case in switchinst.
1555  if (Val != OnlyVal)
1556  OnlyVal = MultipleVal;
1557  }
1558 
1559  // We know where this predecessor is going.
1560  ++PredWithKnownDest;
1561 
1562  // If the predecessor ends with an indirect goto, we can't change its
1563  // destination.
1564  if (isa<IndirectBrInst>(Pred->getTerminator()))
1565  continue;
1566 
1567  PredToDestList.push_back(std::make_pair(Pred, DestBB));
1568  }
1569 
1570  // If all edges were unthreadable, we fail.
1571  if (PredToDestList.empty())
1572  return false;
1573 
1574  // If all the predecessors go to a single known successor, we want to fold,
1575  // not thread. By doing so, we do not need to duplicate the current block and
1576  // also miss potential opportunities in case we dont/cant duplicate.
1577  if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1578  if (PredWithKnownDest ==
1579  (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
1580  bool SeenFirstBranchToOnlyDest = false;
1581  for (BasicBlock *SuccBB : successors(BB)) {
1582  if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest)
1583  SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1584  else
1585  SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1586  }
1587 
1588  // Finally update the terminator.
1589  TerminatorInst *Term = BB->getTerminator();
1590  BranchInst::Create(OnlyDest, Term);
1591  Term->eraseFromParent();
1592 
1593  // If the condition is now dead due to the removal of the old terminator,
1594  // erase it.
1595  if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1596  if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1597  CondInst->eraseFromParent();
1598  // We can safely replace *some* uses of the CondInst if it has
1599  // exactly one value as returned by LVI. RAUW is incorrect in the
1600  // presence of guards and assumes, that have the `Cond` as the use. This
1601  // is because we use the guards/assume to reason about the `Cond` value
1602  // at the end of block, but RAUW unconditionally replaces all uses
1603  // including the guards/assumes themselves and the uses before the
1604  // guard/assume.
1605  else if (OnlyVal && OnlyVal != MultipleVal &&
1606  CondInst->getParent() == BB)
1607  ReplaceFoldableUses(CondInst, OnlyVal);
1608  }
1609  return true;
1610  }
1611  }
1612 
1613  // Determine which is the most common successor. If we have many inputs and
1614  // this block is a switch, we want to start by threading the batch that goes
1615  // to the most popular destination first. If we only know about one
1616  // threadable destination (the common case) we can avoid this.
1617  BasicBlock *MostPopularDest = OnlyDest;
1618 
1619  if (MostPopularDest == MultipleDestSentinel)
1620  MostPopularDest = FindMostPopularDest(BB, PredToDestList);
1621 
1622  // Now that we know what the most popular destination is, factor all
1623  // predecessors that will jump to it into a single predecessor.
1624  SmallVector<BasicBlock*, 16> PredsToFactor;
1625  for (const auto &PredToDest : PredToDestList)
1626  if (PredToDest.second == MostPopularDest) {
1627  BasicBlock *Pred = PredToDest.first;
1628 
1629  // This predecessor may be a switch or something else that has multiple
1630  // edges to the block. Factor each of these edges by listing them
1631  // according to # occurrences in PredsToFactor.
1632  for (BasicBlock *Succ : successors(Pred))
1633  if (Succ == BB)
1634  PredsToFactor.push_back(Pred);
1635  }
1636 
1637  // If the threadable edges are branching on an undefined value, we get to pick
1638  // the destination that these predecessors should get to.
1639  if (!MostPopularDest)
1640  MostPopularDest = BB->getTerminator()->
1641  getSuccessor(GetBestDestForJumpOnUndef(BB));
1642 
1643  // Ok, try to thread it!
1644  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
1645 }
1646 
1647 /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
1648 /// a PHI node in the current block. See if there are any simplifications we
1649 /// can do based on inputs to the phi node.
1651  BasicBlock *BB = PN->getParent();
1652 
1653  // TODO: We could make use of this to do it once for blocks with common PHI
1654  // values.
1656  PredBBs.resize(1);
1657 
1658  // If any of the predecessor blocks end in an unconditional branch, we can
1659  // *duplicate* the conditional branch into that block in order to further
1660  // encourage jump threading and to eliminate cases where we have branch on a
1661  // phi of an icmp (branch on icmp is much better).
1662  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1663  BasicBlock *PredBB = PN->getIncomingBlock(i);
1664  if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1665  if (PredBr->isUnconditional()) {
1666  PredBBs[0] = PredBB;
1667  // Try to duplicate BB into PredBB.
1668  if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1669  return true;
1670  }
1671  }
1672 
1673  return false;
1674 }
1675 
1676 /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
1677 /// a xor instruction in the current block. See if there are any
1678 /// simplifications we can do based on inputs to the xor.
1680  BasicBlock *BB = BO->getParent();
1681 
1682  // If either the LHS or RHS of the xor is a constant, don't do this
1683  // optimization.
1684  if (isa<ConstantInt>(BO->getOperand(0)) ||
1685  isa<ConstantInt>(BO->getOperand(1)))
1686  return false;
1687 
1688  // If the first instruction in BB isn't a phi, we won't be able to infer
1689  // anything special about any particular predecessor.
1690  if (!isa<PHINode>(BB->front()))
1691  return false;
1692 
1693  // If this BB is a landing pad, we won't be able to split the edge into it.
1694  if (BB->isEHPad())
1695  return false;
1696 
1697  // If we have a xor as the branch input to this block, and we know that the
1698  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1699  // the condition into the predecessor and fix that value to true, saving some
1700  // logical ops on that path and encouraging other paths to simplify.
1701  //
1702  // This copies something like this:
1703  //
1704  // BB:
1705  // %X = phi i1 [1], [%X']
1706  // %Y = icmp eq i32 %A, %B
1707  // %Z = xor i1 %X, %Y
1708  // br i1 %Z, ...
1709  //
1710  // Into:
1711  // BB':
1712  // %Y = icmp ne i32 %A, %B
1713  // br i1 %Y, ...
1714 
1715  PredValueInfoTy XorOpValues;
1716  bool isLHS = true;
1717  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1718  WantInteger, BO)) {
1719  assert(XorOpValues.empty());
1720  if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1721  WantInteger, BO))
1722  return false;
1723  isLHS = false;
1724  }
1725 
1726  assert(!XorOpValues.empty() &&
1727  "ComputeValueKnownInPredecessors returned true with no values");
1728 
1729  // Scan the information to see which is most popular: true or false. The
1730  // predecessors can be of the set true, false, or undef.
1731  unsigned NumTrue = 0, NumFalse = 0;
1732  for (const auto &XorOpValue : XorOpValues) {
1733  if (isa<UndefValue>(XorOpValue.first))
1734  // Ignore undefs for the count.
1735  continue;
1736  if (cast<ConstantInt>(XorOpValue.first)->isZero())
1737  ++NumFalse;
1738  else
1739  ++NumTrue;
1740  }
1741 
1742  // Determine which value to split on, true, false, or undef if neither.
1743  ConstantInt *SplitVal = nullptr;
1744  if (NumTrue > NumFalse)
1745  SplitVal = ConstantInt::getTrue(BB->getContext());
1746  else if (NumTrue != 0 || NumFalse != 0)
1747  SplitVal = ConstantInt::getFalse(BB->getContext());
1748 
1749  // Collect all of the blocks that this can be folded into so that we can
1750  // factor this once and clone it once.
1751  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1752  for (const auto &XorOpValue : XorOpValues) {
1753  if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1754  continue;
1755 
1756  BlocksToFoldInto.push_back(XorOpValue.second);
1757  }
1758 
1759  // If we inferred a value for all of the predecessors, then duplication won't
1760  // help us. However, we can just replace the LHS or RHS with the constant.
1761  if (BlocksToFoldInto.size() ==
1762  cast<PHINode>(BB->front()).getNumIncomingValues()) {
1763  if (!SplitVal) {
1764  // If all preds provide undef, just nuke the xor, because it is undef too.
1766  BO->eraseFromParent();
1767  } else if (SplitVal->isZero()) {
1768  // If all preds provide 0, replace the xor with the other input.
1769  BO->replaceAllUsesWith(BO->getOperand(isLHS));
1770  BO->eraseFromParent();
1771  } else {
1772  // If all preds provide 1, set the computed value to 1.
1773  BO->setOperand(!isLHS, SplitVal);
1774  }
1775 
1776  return true;
1777  }
1778 
1779  // Try to duplicate BB into PredBB.
1780  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1781 }
1782 
1783 /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1784 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1785 /// NewPred using the entries from OldPred (suitably mapped).
1787  BasicBlock *OldPred,
1788  BasicBlock *NewPred,
1790  for (BasicBlock::iterator PNI = PHIBB->begin();
1791  PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
1792  // Ok, we have a PHI node. Figure out what the incoming value was for the
1793  // DestBlock.
1794  Value *IV = PN->getIncomingValueForBlock(OldPred);
1795 
1796  // Remap the value if necessary.
1797  if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1799  if (I != ValueMap.end())
1800  IV = I->second;
1801  }
1802 
1803  PN->addIncoming(IV, NewPred);
1804  }
1805 }
1806 
1807 /// ThreadEdge - We have decided that it is safe and profitable to factor the
1808 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
1809 /// across BB. Transform the IR to reflect this change.
1811  const SmallVectorImpl<BasicBlock *> &PredBBs,
1812  BasicBlock *SuccBB) {
1813  // If threading to the same block as we come from, we would infinite loop.
1814  if (SuccBB == BB) {
1815  DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
1816  << "' - would thread to self!\n");
1817  return false;
1818  }
1819 
1820  // If threading this would thread across a loop header, don't thread the edge.
1821  // See the comments above FindLoopHeaders for justifications and caveats.
1822  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
1823  DEBUG({
1824  bool BBIsHeader = LoopHeaders.count(BB);
1825  bool SuccIsHeader = LoopHeaders.count(SuccBB);
1826  dbgs() << " Not threading across "
1827  << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
1828  << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
1829  << SuccBB->getName() << "' - it might create an irreducible loop!\n";
1830  });
1831  return false;
1832  }
1833 
1834  unsigned JumpThreadCost =
1835  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
1836  if (JumpThreadCost > BBDupThreshold) {
1837  DEBUG(dbgs() << " Not threading BB '" << BB->getName()
1838  << "' - Cost is too high: " << JumpThreadCost << "\n");
1839  return false;
1840  }
1841 
1842  // And finally, do it! Start by factoring the predecessors if needed.
1843  BasicBlock *PredBB;
1844  if (PredBBs.size() == 1)
1845  PredBB = PredBBs[0];
1846  else {
1847  DEBUG(dbgs() << " Factoring out " << PredBBs.size()
1848  << " common predecessors.\n");
1849  PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
1850  }
1851 
1852  // And finally, do it!
1853  DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
1854  << SuccBB->getName() << "' with cost: " << JumpThreadCost
1855  << ", across block:\n "
1856  << *BB << "\n");
1857 
1858  LVI->threadEdge(PredBB, BB, SuccBB);
1859 
1860  // We are going to have to map operands from the original BB block to the new
1861  // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
1862  // account for entry from PredBB.
1864 
1865  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
1866  BB->getName()+".thread",
1867  BB->getParent(), BB);
1868  NewBB->moveAfter(PredBB);
1869 
1870  // Set the block frequency of NewBB.
1871  if (HasProfileData) {
1872  auto NewBBFreq =
1873  BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
1874  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
1875  }
1876 
1877  BasicBlock::iterator BI = BB->begin();
1878  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
1879  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
1880 
1881  // Clone the non-phi instructions of BB into NewBB, keeping track of the
1882  // mapping and using it to remap operands in the cloned instructions.
1883  for (; !isa<TerminatorInst>(BI); ++BI) {
1884  Instruction *New = BI->clone();
1885  New->setName(BI->getName());
1886  NewBB->getInstList().push_back(New);
1887  ValueMapping[&*BI] = New;
1888 
1889  // Remap operands to patch up intra-block references.
1890  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
1891  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
1892  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
1893  if (I != ValueMapping.end())
1894  New->setOperand(i, I->second);
1895  }
1896  }
1897 
1898  // We didn't copy the terminator from BB over to NewBB, because there is now
1899  // an unconditional jump to SuccBB. Insert the unconditional jump.
1900  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
1901  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
1902 
1903  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
1904  // PHI nodes for NewBB now.
1905  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
1906 
1907  // If there were values defined in BB that are used outside the block, then we
1908  // now have to update all uses of the value to use either the original value,
1909  // the cloned value, or some PHI derived value. This can require arbitrary
1910  // PHI insertion, of which we are prepared to do, clean these up now.
1911  SSAUpdater SSAUpdate;
1912  SmallVector<Use*, 16> UsesToRename;
1913  for (Instruction &I : *BB) {
1914  // Scan all uses of this instruction to see if it is used outside of its
1915  // block, and if so, record them in UsesToRename.
1916  for (Use &U : I.uses()) {
1917  Instruction *User = cast<Instruction>(U.getUser());
1918  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1919  if (UserPN->getIncomingBlock(U) == BB)
1920  continue;
1921  } else if (User->getParent() == BB)
1922  continue;
1923 
1924  UsesToRename.push_back(&U);
1925  }
1926 
1927  // If there are no uses outside the block, we're done with this instruction.
1928  if (UsesToRename.empty())
1929  continue;
1930 
1931  DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1932 
1933  // We found a use of I outside of BB. Rename all uses of I that are outside
1934  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1935  // with the two values we know.
1936  SSAUpdate.Initialize(I.getType(), I.getName());
1937  SSAUpdate.AddAvailableValue(BB, &I);
1938  SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1939 
1940  while (!UsesToRename.empty())
1941  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1942  DEBUG(dbgs() << "\n");
1943  }
1944 
1945  // Ok, NewBB is good to go. Update the terminator of PredBB to jump to
1946  // NewBB instead of BB. This eliminates predecessors from BB, which requires
1947  // us to simplify any PHI nodes in BB.
1948  TerminatorInst *PredTerm = PredBB->getTerminator();
1949  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
1950  if (PredTerm->getSuccessor(i) == BB) {
1951  BB->removePredecessor(PredBB, true);
1952  PredTerm->setSuccessor(i, NewBB);
1953  }
1954 
1955  // At this point, the IR is fully up to date and consistent. Do a quick scan
1956  // over the new instructions and zap any that are constants or dead. This
1957  // frequently happens because of phi translation.
1958  SimplifyInstructionsInBlock(NewBB, TLI);
1959 
1960  // Update the edge weight from BB to SuccBB, which should be less than before.
1961  UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
1962 
1963  // Threaded an edge!
1964  ++NumThreads;
1965  return true;
1966 }
1967 
1968 /// Create a new basic block that will be the predecessor of BB and successor of
1969 /// all blocks in Preds. When profile data is available, update the frequency of
1970 /// this new block.
1971 BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
1972  ArrayRef<BasicBlock *> Preds,
1973  const char *Suffix) {
1974  // Collect the frequencies of all predecessors of BB, which will be used to
1975  // update the edge weight on BB->SuccBB.
1976  BlockFrequency PredBBFreq(0);
1977  if (HasProfileData)
1978  for (auto Pred : Preds)
1979  PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
1980 
1981  BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
1982 
1983  // Set the block frequency of the newly created PredBB, which is the sum of
1984  // frequencies of Preds.
1985  if (HasProfileData)
1986  BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
1987  return PredBB;
1988 }
1989 
1990 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
1991  const TerminatorInst *TI = BB->getTerminator();
1992  assert(TI->getNumSuccessors() > 1 && "not a split");
1993 
1994  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
1995  if (!WeightsNode)
1996  return false;
1997 
1998  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
1999  if (MDName->getString() != "branch_weights")
2000  return false;
2001 
2002  // Ensure there are weights for all of the successors. Note that the first
2003  // operand to the metadata node is a name, not a weight.
2004  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2005 }
2006 
2007 /// Update the block frequency of BB and branch weight and the metadata on the
2008 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2009 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
2010 void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2011  BasicBlock *BB,
2012  BasicBlock *NewBB,
2013  BasicBlock *SuccBB) {
2014  if (!HasProfileData)
2015  return;
2016 
2017  assert(BFI && BPI && "BFI & BPI should have been created here");
2018 
2019  // As the edge from PredBB to BB is deleted, we have to update the block
2020  // frequency of BB.
2021  auto BBOrigFreq = BFI->getBlockFreq(BB);
2022  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2023  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2024  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2025  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2026 
2027  // Collect updated outgoing edges' frequencies from BB and use them to update
2028  // edge probabilities.
2029  SmallVector<uint64_t, 4> BBSuccFreq;
2030  for (BasicBlock *Succ : successors(BB)) {
2031  auto SuccFreq = (Succ == SuccBB)
2032  ? BB2SuccBBFreq - NewBBFreq
2033  : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2034  BBSuccFreq.push_back(SuccFreq.getFrequency());
2035  }
2036 
2037  uint64_t MaxBBSuccFreq =
2038  *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2039 
2041  if (MaxBBSuccFreq == 0)
2042  BBSuccProbs.assign(BBSuccFreq.size(),
2043  {1, static_cast<unsigned>(BBSuccFreq.size())});
2044  else {
2045  for (uint64_t Freq : BBSuccFreq)
2046  BBSuccProbs.push_back(
2047  BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2048  // Normalize edge probabilities so that they sum up to one.
2049  BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2050  BBSuccProbs.end());
2051  }
2052 
2053  // Update edge probabilities in BPI.
2054  for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
2055  BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
2056 
2057  // Update the profile metadata as well.
2058  //
2059  // Don't do this if the profile of the transformed blocks was statically
2060  // estimated. (This could occur despite the function having an entry
2061  // frequency in completely cold parts of the CFG.)
2062  //
2063  // In this case we don't want to suggest to subsequent passes that the
2064  // calculated weights are fully consistent. Consider this graph:
2065  //
2066  // check_1
2067  // 50% / |
2068  // eq_1 | 50%
2069  // \ |
2070  // check_2
2071  // 50% / |
2072  // eq_2 | 50%
2073  // \ |
2074  // check_3
2075  // 50% / |
2076  // eq_3 | 50%
2077  // \ |
2078  //
2079  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2080  // the overall probabilities are inconsistent; the total probability that the
2081  // value is either 1, 2 or 3 is 150%.
2082  //
2083  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2084  // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2085  // the loop exit edge. Then based solely on static estimation we would assume
2086  // the loop was extremely hot.
2087  //
2088  // FIXME this locally as well so that BPI and BFI are consistent as well. We
2089  // shouldn't make edges extremely likely or unlikely based solely on static
2090  // estimation.
2091  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
2092  SmallVector<uint32_t, 4> Weights;
2093  for (auto Prob : BBSuccProbs)
2094  Weights.push_back(Prob.getNumerator());
2095 
2096  auto TI = BB->getTerminator();
2097  TI->setMetadata(
2099  MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2100  }
2101 }
2102 
2103 /// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2104 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2105 /// If we can duplicate the contents of BB up into PredBB do so now, this
2106 /// improves the odds that the branch will be on an analyzable instruction like
2107 /// a compare.
2109  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2110  assert(!PredBBs.empty() && "Can't handle an empty set");
2111 
2112  // If BB is a loop header, then duplicating this block outside the loop would
2113  // cause us to transform this into an irreducible loop, don't do this.
2114  // See the comments above FindLoopHeaders for justifications and caveats.
2115  if (LoopHeaders.count(BB)) {
2116  DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2117  << "' into predecessor block '" << PredBBs[0]->getName()
2118  << "' - it might create an irreducible loop!\n");
2119  return false;
2120  }
2121 
2122  unsigned DuplicationCost =
2123  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2124  if (DuplicationCost > BBDupThreshold) {
2125  DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2126  << "' - Cost is too high: " << DuplicationCost << "\n");
2127  return false;
2128  }
2129 
2130  // And finally, do it! Start by factoring the predecessors if needed.
2131  BasicBlock *PredBB;
2132  if (PredBBs.size() == 1)
2133  PredBB = PredBBs[0];
2134  else {
2135  DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2136  << " common predecessors.\n");
2137  PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
2138  }
2139 
2140  // Okay, we decided to do this! Clone all the instructions in BB onto the end
2141  // of PredBB.
2142  DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
2143  << PredBB->getName() << "' to eliminate branch on phi. Cost: "
2144  << DuplicationCost << " block is:" << *BB << "\n");
2145 
2146  // Unless PredBB ends with an unconditional branch, split the edge so that we
2147  // can just clone the bits from BB into the end of the new PredBB.
2148  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2149 
2150  if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2151  PredBB = SplitEdge(PredBB, BB);
2152  OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2153  }
2154 
2155  // We are going to have to map operands from the original BB block into the
2156  // PredBB block. Evaluate PHI nodes in BB.
2158 
2159  BasicBlock::iterator BI = BB->begin();
2160  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2161  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2162  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2163  // mapping and using it to remap operands in the cloned instructions.
2164  for (; BI != BB->end(); ++BI) {
2165  Instruction *New = BI->clone();
2166 
2167  // Remap operands to patch up intra-block references.
2168  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2169  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2170  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2171  if (I != ValueMapping.end())
2172  New->setOperand(i, I->second);
2173  }
2174 
2175  // If this instruction can be simplified after the operands are updated,
2176  // just use the simplified value instead. This frequently happens due to
2177  // phi translation.
2178  if (Value *IV = SimplifyInstruction(
2179  New,
2180  {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2181  ValueMapping[&*BI] = IV;
2182  if (!New->mayHaveSideEffects()) {
2183  New->deleteValue();
2184  New = nullptr;
2185  }
2186  } else {
2187  ValueMapping[&*BI] = New;
2188  }
2189  if (New) {
2190  // Otherwise, insert the new instruction into the block.
2191  New->setName(BI->getName());
2192  PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2193  }
2194  }
2195 
2196  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2197  // add entries to the PHI nodes for branch from PredBB now.
2198  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2199  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2200  ValueMapping);
2201  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2202  ValueMapping);
2203 
2204  // If there were values defined in BB that are used outside the block, then we
2205  // now have to update all uses of the value to use either the original value,
2206  // the cloned value, or some PHI derived value. This can require arbitrary
2207  // PHI insertion, of which we are prepared to do, clean these up now.
2208  SSAUpdater SSAUpdate;
2209  SmallVector<Use*, 16> UsesToRename;
2210  for (Instruction &I : *BB) {
2211  // Scan all uses of this instruction to see if it is used outside of its
2212  // block, and if so, record them in UsesToRename.
2213  for (Use &U : I.uses()) {
2214  Instruction *User = cast<Instruction>(U.getUser());
2215  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2216  if (UserPN->getIncomingBlock(U) == BB)
2217  continue;
2218  } else if (User->getParent() == BB)
2219  continue;
2220 
2221  UsesToRename.push_back(&U);
2222  }
2223 
2224  // If there are no uses outside the block, we're done with this instruction.
2225  if (UsesToRename.empty())
2226  continue;
2227 
2228  DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2229 
2230  // We found a use of I outside of BB. Rename all uses of I that are outside
2231  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2232  // with the two values we know.
2233  SSAUpdate.Initialize(I.getType(), I.getName());
2234  SSAUpdate.AddAvailableValue(BB, &I);
2235  SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
2236 
2237  while (!UsesToRename.empty())
2238  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2239  DEBUG(dbgs() << "\n");
2240  }
2241 
2242  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2243  // that we nuked.
2244  BB->removePredecessor(PredBB, true);
2245 
2246  // Remove the unconditional branch at the end of the PredBB block.
2247  OldPredBranch->eraseFromParent();
2248 
2249  ++NumDupes;
2250  return true;
2251 }
2252 
2253 /// TryToUnfoldSelect - Look for blocks of the form
2254 /// bb1:
2255 /// %a = select
2256 /// br bb2
2257 ///
2258 /// bb2:
2259 /// %p = phi [%a, %bb1] ...
2260 /// %c = icmp %p
2261 /// br i1 %c
2262 ///
2263 /// And expand the select into a branch structure if one of its arms allows %c
2264 /// to be folded. This later enables threading from bb1 over bb2.
2266  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2267  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2268  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2269 
2270  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2271  CondLHS->getParent() != BB)
2272  return false;
2273 
2274  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2275  BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2276  SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2277 
2278  // Look if one of the incoming values is a select in the corresponding
2279  // predecessor.
2280  if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2281  continue;
2282 
2283  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2284  if (!PredTerm || !PredTerm->isUnconditional())
2285  continue;
2286 
2287  // Now check if one of the select values would allow us to constant fold the
2288  // terminator in BB. We don't do the transform if both sides fold, those
2289  // cases will be threaded in any case.
2290  LazyValueInfo::Tristate LHSFolds =
2291  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2292  CondRHS, Pred, BB, CondCmp);
2293  LazyValueInfo::Tristate RHSFolds =
2294  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2295  CondRHS, Pred, BB, CondCmp);
2296  if ((LHSFolds != LazyValueInfo::Unknown ||
2297  RHSFolds != LazyValueInfo::Unknown) &&
2298  LHSFolds != RHSFolds) {
2299  // Expand the select.
2300  //
2301  // Pred --
2302  // | v
2303  // | NewBB
2304  // | |
2305  // |-----
2306  // v
2307  // BB
2308  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2309  BB->getParent(), BB);
2310  // Move the unconditional branch to NewBB.
2311  PredTerm->removeFromParent();
2312  NewBB->getInstList().insert(NewBB->end(), PredTerm);
2313  // Create a conditional branch and update PHI nodes.
2314  BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2315  CondLHS->setIncomingValue(I, SI->getFalseValue());
2316  CondLHS->addIncoming(SI->getTrueValue(), NewBB);
2317  // The select is now dead.
2318  SI->eraseFromParent();
2319 
2320  // Update any other PHI nodes in BB.
2321  for (BasicBlock::iterator BI = BB->begin();
2322  PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2323  if (Phi != CondLHS)
2324  Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2325  return true;
2326  }
2327  }
2328  return false;
2329 }
2330 
2331 /// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2332 /// same BB in the form
2333 /// bb:
2334 /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2335 /// %s = select %p, trueval, falseval
2336 ///
2337 /// or
2338 ///
2339 /// bb:
2340 /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2341 /// %c = cmp %p, 0
2342 /// %s = select %c, trueval, falseval
2343 ///
2344 /// And expand the select into a branch structure. This later enables
2345 /// jump-threading over bb in this pass.
2346 ///
2347 /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2348 /// select if the associated PHI has at least one constant. If the unfolded
2349 /// select is not jump-threaded, it will be folded again in the later
2350 /// optimizations.
2352  // If threading this would thread across a loop header, don't thread the edge.
2353  // See the comments above FindLoopHeaders for justifications and caveats.
2354  if (LoopHeaders.count(BB))
2355  return false;
2356 
2357  for (BasicBlock::iterator BI = BB->begin();
2358  PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2359  // Look for a Phi having at least one constant incoming value.
2360  if (llvm::all_of(PN->incoming_values(),
2361  [](Value *V) { return !isa<ConstantInt>(V); }))
2362  continue;
2363 
2364  auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2365  // Check if SI is in BB and use V as condition.
2366  if (SI->getParent() != BB)
2367  return false;
2368  Value *Cond = SI->getCondition();
2369  return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
2370  };
2371 
2372  SelectInst *SI = nullptr;
2373  for (Use &U : PN->uses()) {
2374  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2375  // Look for a ICmp in BB that compares PN with a constant and is the
2376  // condition of a Select.
2377  if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2378  isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2379  if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2380  if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2381  SI = SelectI;
2382  break;
2383  }
2384  } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2385  // Look for a Select in BB that uses PN as condtion.
2386  if (isUnfoldCandidate(SelectI, U.get())) {
2387  SI = SelectI;
2388  break;
2389  }
2390  }
2391  }
2392 
2393  if (!SI)
2394  continue;
2395  // Expand the select.
2396  TerminatorInst *Term =
2397  SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
2398  PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2399  NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2400  NewPN->addIncoming(SI->getFalseValue(), BB);
2401  SI->replaceAllUsesWith(NewPN);
2402  SI->eraseFromParent();
2403  return true;
2404  }
2405  return false;
2406 }
2407 
2408 /// Try to propagate a guard from the current BB into one of its predecessors
2409 /// in case if another branch of execution implies that the condition of this
2410 /// guard is always true. Currently we only process the simplest case that
2411 /// looks like:
2412 ///
2413 /// Start:
2414 /// %cond = ...
2415 /// br i1 %cond, label %T1, label %F1
2416 /// T1:
2417 /// br label %Merge
2418 /// F1:
2419 /// br label %Merge
2420 /// Merge:
2421 /// %condGuard = ...
2422 /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2423 ///
2424 /// And cond either implies condGuard or !condGuard. In this case all the
2425 /// instructions before the guard can be duplicated in both branches, and the
2426 /// guard is then threaded to one of them.
2428  using namespace PatternMatch;
2429 
2430  // We only want to deal with two predecessors.
2431  BasicBlock *Pred1, *Pred2;
2432  auto PI = pred_begin(BB), PE = pred_end(BB);
2433  if (PI == PE)
2434  return false;
2435  Pred1 = *PI++;
2436  if (PI == PE)
2437  return false;
2438  Pred2 = *PI++;
2439  if (PI != PE)
2440  return false;
2441  if (Pred1 == Pred2)
2442  return false;
2443 
2444  // Try to thread one of the guards of the block.
2445  // TODO: Look up deeper than to immediate predecessor?
2446  auto *Parent = Pred1->getSinglePredecessor();
2447  if (!Parent || Parent != Pred2->getSinglePredecessor())
2448  return false;
2449 
2450  if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
2451  for (auto &I : *BB)
2452  if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
2453  if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
2454  return true;
2455 
2456  return false;
2457 }
2458 
2459 /// Try to propagate the guard from BB which is the lower block of a diamond
2460 /// to one of its branches, in case if diamond's condition implies guard's
2461 /// condition.
2463  BranchInst *BI) {
2464  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
2465  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
2466  Value *GuardCond = Guard->getArgOperand(0);
2467  Value *BranchCond = BI->getCondition();
2468  BasicBlock *TrueDest = BI->getSuccessor(0);
2469  BasicBlock *FalseDest = BI->getSuccessor(1);
2470 
2471  auto &DL = BB->getModule()->getDataLayout();
2472  bool TrueDestIsSafe = false;
2473  bool FalseDestIsSafe = false;
2474 
2475  // True dest is safe if BranchCond => GuardCond.
2476  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
2477  if (Impl && *Impl)
2478  TrueDestIsSafe = true;
2479  else {
2480  // False dest is safe if !BranchCond => GuardCond.
2481  Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
2482  if (Impl && *Impl)
2483  FalseDestIsSafe = true;
2484  }
2485 
2486  if (!TrueDestIsSafe && !FalseDestIsSafe)
2487  return false;
2488 
2489  BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
2490  BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
2491 
2492  ValueToValueMapTy UnguardedMapping, GuardedMapping;
2493  Instruction *AfterGuard = Guard->getNextNode();
2494  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
2495  if (Cost > BBDupThreshold)
2496  return false;
2497  // Duplicate all instructions before the guard and the guard itself to the
2498  // branch where implication is not proved.
2499  GuardedBlock = DuplicateInstructionsInSplitBetween(
2500  BB, GuardedBlock, AfterGuard, GuardedMapping);
2501  assert(GuardedBlock && "Could not create the guarded block?");
2502  // Duplicate all instructions before the guard in the unguarded branch.
2503  // Since we have successfully duplicated the guarded block and this block
2504  // has fewer instructions, we expect it to succeed.
2505  UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
2506  Guard, UnguardedMapping);
2507  assert(UnguardedBlock && "Could not create the unguarded block?");
2508  DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
2509  << GuardedBlock->getName() << "\n");
2510 
2511  // Some instructions before the guard may still have uses. For them, we need
2512  // to create Phi nodes merging their copies in both guarded and unguarded
2513  // branches. Those instructions that have no uses can be just removed.
2515  for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
2516  if (!isa<PHINode>(&*BI))
2517  ToRemove.push_back(&*BI);
2518 
2519  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
2520  assert(InsertionPoint && "Empty block?");
2521  // Substitute with Phis & remove.
2522  for (auto *Inst : reverse(ToRemove)) {
2523  if (!Inst->use_empty()) {
2524  PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
2525  NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
2526  NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
2527  NewPN->insertBefore(InsertionPoint);
2528  Inst->replaceAllUsesWith(NewPN);
2529  }
2530  Inst->eraseFromParent();
2531  }
2532  return true;
2533 }
Legacy wrapper pass to provide the GlobalsAAResult object.
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock *> &PredBBs, BasicBlock *SuccBB)
ThreadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:522
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
iterator_range< use_iterator > uses()
Definition: Value.h:360
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs=false)
Notify the BasicBlock that the predecessor Pred is no longer able to reach it.
Definition: BasicBlock.cpp:276
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BranchProbability getCompl() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Wrapper around LazyValueInfo.
This is the interface for a simple mod/ref and alias analysis over globals.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type &#39;Ty&#39;.
Definition: SSAUpdater.cpp:54
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
iterator end()
Definition: Function.h:590
void initializeJumpThreadingPass(PassRegistry &)
void DeleteDeadBlock(BasicBlock *BB)
Delete the specified block, which must have no predecessors.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:67
This class represents a function call, abstracting a target machine&#39;s calling convention.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
This file contains the declarations for metadata subclasses.
const Value * getTrueValue() const
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!)...
Definition: Local.cpp:615
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:233
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
bool isTerminator() const
Definition: Instruction.h:129
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:95
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:813
Value * FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AliasAnalysis *AA, bool *IsLoad, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:336
static BasicBlock * FindMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock *>> &PredToDestList)
FindMostPopularDest - The specified list contains multiple possible threadable destinations.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:535
Metadata node.
Definition: Metadata.h:862
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1831
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
This defines the Use class.
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:720
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
FunctionPass * createJumpThreadingPass(int Threshold=-1)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
The address of a basic block.
Definition: Constants.h:813
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:590
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1247
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
unsigned getNumSuccessors() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:102
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
jump threading
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:286
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
bool TryToUnfoldSelectInCurrBB(BasicBlock *BB)
TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:502
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:427
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
static Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible. ...
Definition: Constants.cpp:1710
Value * SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:904
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
Definition: Sink.cpp:201
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:292
iterator begin()
Definition: Function.h:588
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Value * getOperand(unsigned i) const
Definition: User.h:154
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:474
StringRef getString() const
Definition: Metadata.cpp:456
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:759
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:171
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:282
void setSuccessor(unsigned idx, BasicBlock *B)
Update the specified successor to point at the provided block.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes...
Definition: Local.cpp:868
Conditional or Unconditional Branch instruction.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1338
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:264
Indirect Branch Instruction.
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:536
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, bool HasProfileData_, std::unique_ptr< BlockFrequencyInfo > BFI_, std::unique_ptr< BranchProbabilityInfo > BPI_)
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
bool isUnordered() const
Definition: Instructions.h:264
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:820
This instruction compares its operands according to the predicate given to the constructor.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
jump Jump Threading
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:101
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:107
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2108
void FindLoopHeaders(Function &F)
FindLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
const Value * getCondition() const
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1329
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1319
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:558
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump...
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:63
bool ProcessBranchOnPHI(PHINode *PN)
ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node in the curren...
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
size_type size() const
Definition: SmallPtrSet.h:93
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:317
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:110
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
See the file comment.
Definition: ValueMap.h:86
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
iterator end()
Definition: BasicBlock.h:254
bool isExceptional() const
Definition: InstrTypes.h:84
bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI=nullptr)
Remove all blocks that can not be reached from the function&#39;s entry.
Definition: Local.cpp:1732
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
This pass performs &#39;jump threading&#39;, which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:76
This class represents a range of values.
Definition: ConstantRange.h:47
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
TerminatorInst * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:559
bool SimplifyPartiallyRedundantLoad(LoadInst *LI)
SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant load instruction, eliminate it by replacing it with a PHI node.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:515
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:172
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void push_back(pointer val)
Definition: ilist.h:326
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1434
const Value * getFalseValue() const
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:63
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
bool hasValue() const
Definition: Optional.h:137
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:538
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Analysis providing branch probability information.
iterator insert(iterator where, pointer New)
Definition: ilist.h:241
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:656
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
iterator begin()
Definition: DenseMap.h:70
bool ProcessBranchOnXOR(BinaryOperator *BO)
ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:245
bool ProcessImpliedCondition(BasicBlock *BB)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal)
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void combineMetadataForCSE(Instruction *K, const Instruction *J)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:1831
bool isUnconditional() const
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value *> &ValueMap)
AddPHINodeEntriesForMappedBlock - We&#39;re adding &#39;NewPred&#39; as a new predecessor to the PHIBB block...
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
Optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock *> > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them...
Definition: CFG.cpp:27
Analysis pass providing the TargetLibraryInfo.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Helper struct that represents how a value is mapped through different register banks.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
See the comments on JumpThreadingPass.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:383
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches...
bool ProcessGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:324
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
bool ProcessBlock(BasicBlock *BB)
ProcessBlock - If there are any predecessors whose control can be threaded through to a successor...
static const Function * getParent(const Value *V)
#define DEBUG(X)
Definition: Debug.h:118
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Split the edge connecting specified block.
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:418
A single uniqued string.
Definition: Metadata.h:602
A container for analyses that lazily runs them and caches their results.
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:178
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
This header defines various interfaces for pass management in LLVM.
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_END(JumpThreading
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1303
op_range incoming_values()
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock *> &PredBBs)
DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
static bool hasAddressTakenAndUsed(BasicBlock *BB)
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
uint32_t getNumerator() const
bool use_empty() const
Definition: Value.h:328
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:1862
Analysis to compute lazy value information.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
TryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2.
void resize(size_type N)
Definition: SmallVector.h:355
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:867