LLVM  13.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Jump Threading pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Intrinsics.h"
51 #include "llvm/IR/LLVMContext.h"
52 #include "llvm/IR/MDBuilder.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/IR/Module.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/PatternMatch.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/Use.h"
59 #include "llvm/IR/User.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <memory>
81 #include <utility>
82 
83 using namespace llvm;
84 using namespace jumpthreading;
85 
86 #define DEBUG_TYPE "jump-threading"
87 
88 STATISTIC(NumThreads, "Number of jumps threaded");
89 STATISTIC(NumFolds, "Number of terminators folded");
90 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
91 
92 static cl::opt<unsigned>
93 BBDuplicateThreshold("jump-threading-threshold",
94  cl::desc("Max block size to duplicate for jump threading"),
95  cl::init(6), cl::Hidden);
96 
97 static cl::opt<unsigned>
99  "jump-threading-implication-search-threshold",
100  cl::desc("The number of predecessors to search for a stronger "
101  "condition to use to thread over a weaker condition"),
102  cl::init(3), cl::Hidden);
103 
105  "print-lvi-after-jump-threading",
106  cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
107  cl::Hidden);
108 
110  "jump-threading-freeze-select-cond",
111  cl::desc("Freeze the condition when unfolding select"), cl::init(false),
112  cl::Hidden);
113 
115  "jump-threading-across-loop-headers",
116  cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
117  cl::init(false), cl::Hidden);
118 
119 
120 namespace {
121 
122  /// This pass performs 'jump threading', which looks at blocks that have
123  /// multiple predecessors and multiple successors. If one or more of the
124  /// predecessors of the block can be proven to always jump to one of the
125  /// successors, we forward the edge from the predecessor to the successor by
126  /// duplicating the contents of this block.
127  ///
128  /// An example of when this can occur is code like this:
129  ///
130  /// if () { ...
131  /// X = 4;
132  /// }
133  /// if (X < 3) {
134  ///
135  /// In this case, the unconditional branch at the end of the first if can be
136  /// revectored to the false side of the second if.
137  class JumpThreading : public FunctionPass {
138  JumpThreadingPass Impl;
139 
140  public:
141  static char ID; // Pass identification
142 
143  JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
144  : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
146  }
147 
148  bool runOnFunction(Function &F) override;
149 
150  void getAnalysisUsage(AnalysisUsage &AU) const override {
159  }
160 
161  void releaseMemory() override { Impl.releaseMemory(); }
162  };
163 
164 } // end anonymous namespace
165 
166 char JumpThreading::ID = 0;
167 
168 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
169  "Jump Threading", false, false)
174 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
176 
177 // Public interface to the Jump Threading pass
179  return new JumpThreading(InsertFr, Threshold);
180 }
181 
183  InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
184  DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
185 }
186 
187 // Update branch probability information according to conditional
188 // branch probability. This is usually made possible for cloned branches
189 // in inline instances by the context specific profile in the caller.
190 // For instance,
191 //
192 // [Block PredBB]
193 // [Branch PredBr]
194 // if (t) {
195 // Block A;
196 // } else {
197 // Block B;
198 // }
199 //
200 // [Block BB]
201 // cond = PN([true, %A], [..., %B]); // PHI node
202 // [Branch CondBr]
203 // if (cond) {
204 // ... // P(cond == true) = 1%
205 // }
206 //
207 // Here we know that when block A is taken, cond must be true, which means
208 // P(cond == true | A) = 1
209 //
210 // Given that P(cond == true) = P(cond == true | A) * P(A) +
211 // P(cond == true | B) * P(B)
212 // we get:
213 // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
214 //
215 // which gives us:
216 // P(A) is less than P(cond == true), i.e.
217 // P(t == true) <= P(cond == true)
218 //
219 // In other words, if we know P(cond == true) is unlikely, we know
220 // that P(t == true) is also unlikely.
221 //
223  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
224  if (!CondBr)
225  return;
226 
227  uint64_t TrueWeight, FalseWeight;
228  if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
229  return;
230 
231  if (TrueWeight + FalseWeight == 0)
232  // Zero branch_weights do not give a hint for getting branch probabilities.
233  // Technically it would result in division by zero denominator, which is
234  // TrueWeight + FalseWeight.
235  return;
236 
237  // Returns the outgoing edge of the dominating predecessor block
238  // that leads to the PhiNode's incoming block:
239  auto GetPredOutEdge =
240  [](BasicBlock *IncomingBB,
241  BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
242  auto *PredBB = IncomingBB;
243  auto *SuccBB = PhiBB;
245  while (true) {
246  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
247  if (PredBr && PredBr->isConditional())
248  return {PredBB, SuccBB};
249  Visited.insert(PredBB);
250  auto *SinglePredBB = PredBB->getSinglePredecessor();
251  if (!SinglePredBB)
252  return {nullptr, nullptr};
253 
254  // Stop searching when SinglePredBB has been visited. It means we see
255  // an unreachable loop.
256  if (Visited.count(SinglePredBB))
257  return {nullptr, nullptr};
258 
259  SuccBB = PredBB;
260  PredBB = SinglePredBB;
261  }
262  };
263 
264  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
265  Value *PhiOpnd = PN->getIncomingValue(i);
266  ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
267 
268  if (!CI || !CI->getType()->isIntegerTy(1))
269  continue;
270 
271  BranchProbability BP =
273  TrueWeight, TrueWeight + FalseWeight)
275  FalseWeight, TrueWeight + FalseWeight));
276 
277  auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
278  if (!PredOutEdge.first)
279  return;
280 
281  BasicBlock *PredBB = PredOutEdge.first;
282  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
283  if (!PredBr)
284  return;
285 
286  uint64_t PredTrueWeight, PredFalseWeight;
287  // FIXME: We currently only set the profile data when it is missing.
288  // With PGO, this can be used to refine even existing profile data with
289  // context information. This needs to be done after more performance
290  // testing.
291  if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
292  continue;
293 
294  // We can not infer anything useful when BP >= 50%, because BP is the
295  // upper bound probability value.
296  if (BP >= BranchProbability(50, 100))
297  continue;
298 
299  SmallVector<uint32_t, 2> Weights;
300  if (PredBr->getSuccessor(0) == PredOutEdge.second) {
301  Weights.push_back(BP.getNumerator());
302  Weights.push_back(BP.getCompl().getNumerator());
303  } else {
304  Weights.push_back(BP.getCompl().getNumerator());
305  Weights.push_back(BP.getNumerator());
306  }
307  PredBr->setMetadata(LLVMContext::MD_prof,
308  MDBuilder(PredBr->getParent()->getContext())
309  .createBranchWeights(Weights));
310  }
311 }
312 
313 /// runOnFunction - Toplevel algorithm.
315  if (skipFunction(F))
316  return false;
317  auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
318  // Jump Threading has no sense for the targets with divergent CF
319  if (TTI->hasBranchDivergence())
320  return false;
321  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
322  auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
323  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
324  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
326  std::unique_ptr<BlockFrequencyInfo> BFI;
327  std::unique_ptr<BranchProbabilityInfo> BPI;
328  if (F.hasProfileData()) {
329  LoopInfo LI{DominatorTree(F)};
330  BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
331  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
332  }
333 
334  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
335  std::move(BFI), std::move(BPI));
337  dbgs() << "LVI for function '" << F.getName() << "':\n";
338  LVI->printLVI(F, DTU.getDomTree(), dbgs());
339  }
340  return Changed;
341 }
342 
345  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
346  // Jump Threading has no sense for the targets with divergent CF
347  if (TTI.hasBranchDivergence())
348  return PreservedAnalyses::all();
349  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
350  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
351  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
352  auto &AA = AM.getResult<AAManager>(F);
354 
355  std::unique_ptr<BlockFrequencyInfo> BFI;
356  std::unique_ptr<BranchProbabilityInfo> BPI;
357  if (F.hasProfileData()) {
358  LoopInfo LI{DominatorTree(F)};
359  BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
360  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
361  }
362 
363  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
364  std::move(BFI), std::move(BPI));
365 
367  dbgs() << "LVI for function '" << F.getName() << "':\n";
368  LVI.printLVI(F, DTU.getDomTree(), dbgs());
369  }
370 
371  if (!Changed)
372  return PreservedAnalyses::all();
374  PA.preserve<GlobalsAA>();
377  return PA;
378 }
379 
381  LazyValueInfo *LVI_, AliasAnalysis *AA_,
382  DomTreeUpdater *DTU_, bool HasProfileData_,
383  std::unique_ptr<BlockFrequencyInfo> BFI_,
384  std::unique_ptr<BranchProbabilityInfo> BPI_) {
385  LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
386  TLI = TLI_;
387  LVI = LVI_;
388  AA = AA_;
389  DTU = DTU_;
390  BFI.reset();
391  BPI.reset();
392  // When profile data is available, we need to update edge weights after
393  // successful jump threading, which requires both BPI and BFI being available.
394  HasProfileData = HasProfileData_;
395  auto *GuardDecl = F.getParent()->getFunction(
396  Intrinsic::getName(Intrinsic::experimental_guard));
397  HasGuards = GuardDecl && !GuardDecl->use_empty();
398  if (HasProfileData) {
399  BPI = std::move(BPI_);
400  BFI = std::move(BFI_);
401  }
402 
403  // Reduce the number of instructions duplicated when optimizing strictly for
404  // size.
405  if (BBDuplicateThreshold.getNumOccurrences())
406  BBDupThreshold = BBDuplicateThreshold;
407  else if (F.hasFnAttribute(Attribute::MinSize))
408  BBDupThreshold = 3;
409  else
410  BBDupThreshold = DefaultBBDupThreshold;
411 
412  // JumpThreading must not processes blocks unreachable from entry. It's a
413  // waste of compute time and can potentially lead to hangs.
414  SmallPtrSet<BasicBlock *, 16> Unreachable;
415  assert(DTU && "DTU isn't passed into JumpThreading before using it.");
416  assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
417  DominatorTree &DT = DTU->getDomTree();
418  for (auto &BB : F)
419  if (!DT.isReachableFromEntry(&BB))
420  Unreachable.insert(&BB);
421 
423  findLoopHeaders(F);
424 
425  bool EverChanged = false;
426  bool Changed;
427  do {
428  Changed = false;
429  for (auto &BB : F) {
430  if (Unreachable.count(&BB))
431  continue;
432  while (processBlock(&BB)) // Thread all of the branches we can over BB.
433  Changed = true;
434 
435  // Jump threading may have introduced redundant debug values into BB
436  // which should be removed.
437  // Remove redundant pseudo probes as well.
438  if (Changed)
440 
441  // Stop processing BB if it's the entry or is now deleted. The following
442  // routines attempt to eliminate BB and locating a suitable replacement
443  // for the entry is non-trivial.
444  if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
445  continue;
446 
447  if (pred_empty(&BB)) {
448  // When processBlock makes BB unreachable it doesn't bother to fix up
449  // the instructions in it. We must remove BB to prevent invalid IR.
450  LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
451  << "' with terminator: " << *BB.getTerminator()
452  << '\n');
453  LoopHeaders.erase(&BB);
454  LVI->eraseBlock(&BB);
455  DeleteDeadBlock(&BB, DTU);
456  Changed = true;
457  continue;
458  }
459 
460  // processBlock doesn't thread BBs with unconditional TIs. However, if BB
461  // is "almost empty", we attempt to merge BB with its sole successor.
462  auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
463  if (BI && BI->isUnconditional()) {
464  BasicBlock *Succ = BI->getSuccessor(0);
465  if (
466  // The terminator must be the only non-phi instruction in BB.
467  BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
468  // Don't alter Loop headers and latches to ensure another pass can
469  // detect and transform nested loops later.
470  !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
472  RemoveRedundantDbgInstrs(Succ, true);
473  // BB is valid for cleanup here because we passed in DTU. F remains
474  // BB's parent until a DTU->getDomTree() event.
475  LVI->eraseBlock(&BB);
476  Changed = true;
477  }
478  }
479  }
480  EverChanged |= Changed;
481  } while (Changed);
482 
483  LoopHeaders.clear();
484  return EverChanged;
485 }
486 
487 // Replace uses of Cond with ToVal when safe to do so. If all uses are
488 // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
489 // because we may incorrectly replace uses when guards/assumes are uses of
490 // of `Cond` and we used the guards/assume to reason about the `Cond` value
491 // at the end of block. RAUW unconditionally replaces all uses
492 // including the guards/assumes themselves and the uses before the
493 // guard/assume.
494 static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
495  assert(Cond->getType() == ToVal->getType());
496  auto *BB = Cond->getParent();
497  // We can unconditionally replace all uses in non-local blocks (i.e. uses
498  // strictly dominated by BB), since LVI information is true from the
499  // terminator of BB.
501  for (Instruction &I : reverse(*BB)) {
502  // Reached the Cond whose uses we are trying to replace, so there are no
503  // more uses.
504  if (&I == Cond)
505  break;
506  // We only replace uses in instructions that are guaranteed to reach the end
507  // of BB, where we know Cond is ToVal.
509  break;
510  I.replaceUsesOfWith(Cond, ToVal);
511  }
512  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
513  Cond->eraseFromParent();
514 }
515 
516 /// Return the cost of duplicating a piece of this block from first non-phi
517 /// and before StopAt instruction to thread across it. Stop scanning the block
518 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
520  Instruction *StopAt,
521  unsigned Threshold) {
522  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
523  /// Ignore PHI nodes, these will be flattened when duplication happens.
524  BasicBlock::const_iterator I(BB->getFirstNonPHI());
525 
526  // FIXME: THREADING will delete values that are just used to compute the
527  // branch, so they shouldn't count against the duplication cost.
528 
529  unsigned Bonus = 0;
530  if (BB->getTerminator() == StopAt) {
531  // Threading through a switch statement is particularly profitable. If this
532  // block ends in a switch, decrease its cost to make it more likely to
533  // happen.
534  if (isa<SwitchInst>(StopAt))
535  Bonus = 6;
536 
537  // The same holds for indirect branches, but slightly more so.
538  if (isa<IndirectBrInst>(StopAt))
539  Bonus = 8;
540  }
541 
542  // Bump the threshold up so the early exit from the loop doesn't skip the
543  // terminator-based Size adjustment at the end.
544  Threshold += Bonus;
545 
546  // Sum up the cost of each instruction until we get to the terminator. Don't
547  // include the terminator because the copy won't include it.
548  unsigned Size = 0;
549  for (; &*I != StopAt; ++I) {
550 
551  // Stop scanning the block if we've reached the threshold.
552  if (Size > Threshold)
553  return Size;
554 
555  // Debugger intrinsics don't incur code size.
556  if (isa<DbgInfoIntrinsic>(I)) continue;
557 
558  // Pseudo-probes don't incur code size.
559  if (isa<PseudoProbeInst>(I))
560  continue;
561 
562  // If this is a pointer->pointer bitcast, it is free.
563  if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
564  continue;
565 
566  // Freeze instruction is free, too.
567  if (isa<FreezeInst>(I))
568  continue;
569 
570  // Bail out if this instruction gives back a token type, it is not possible
571  // to duplicate it if it is used outside this BB.
572  if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
573  return ~0U;
574 
575  // All other instructions count for at least one unit.
576  ++Size;
577 
578  // Calls are more expensive. If they are non-intrinsic calls, we model them
579  // as having cost of 4. If they are a non-vector intrinsic, we model them
580  // as having cost of 2 total, and if they are a vector intrinsic, we model
581  // them as having cost 1.
582  if (const CallInst *CI = dyn_cast<CallInst>(I)) {
583  if (CI->cannotDuplicate() || CI->isConvergent())
584  // Blocks with NoDuplicate are modelled as having infinite cost, so they
585  // are never duplicated.
586  return ~0U;
587  else if (!isa<IntrinsicInst>(CI))
588  Size += 3;
589  else if (!CI->getType()->isVectorTy())
590  Size += 1;
591  }
592  }
593 
594  return Size > Bonus ? Size - Bonus : 0;
595 }
596 
597 /// findLoopHeaders - We do not want jump threading to turn proper loop
598 /// structures into irreducible loops. Doing this breaks up the loop nesting
599 /// hierarchy and pessimizes later transformations. To prevent this from
600 /// happening, we first have to find the loop headers. Here we approximate this
601 /// by finding targets of backedges in the CFG.
602 ///
603 /// Note that there definitely are cases when we want to allow threading of
604 /// edges across a loop header. For example, threading a jump from outside the
605 /// loop (the preheader) to an exit block of the loop is definitely profitable.
606 /// It is also almost always profitable to thread backedges from within the loop
607 /// to exit blocks, and is often profitable to thread backedges to other blocks
608 /// within the loop (forming a nested loop). This simple analysis is not rich
609 /// enough to track all of these properties and keep it up-to-date as the CFG
610 /// mutates, so we don't allow any of these transformations.
613  FindFunctionBackedges(F, Edges);
614 
615  for (const auto &Edge : Edges)
616  LoopHeaders.insert(Edge.second);
617 }
618 
619 /// getKnownConstant - Helper method to determine if we can thread over a
620 /// terminator with the given value as its condition, and if so what value to
621 /// use for that. What kind of value this is depends on whether we want an
622 /// integer or a block address, but an undef is always accepted.
623 /// Returns null if Val is null or not an appropriate constant.
625  if (!Val)
626  return nullptr;
627 
628  // Undef is "known" enough.
629  if (UndefValue *U = dyn_cast<UndefValue>(Val))
630  return U;
631 
633  return dyn_cast<BlockAddress>(Val->stripPointerCasts());
634 
635  return dyn_cast<ConstantInt>(Val);
636 }
637 
638 /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
639 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
640 /// in any of our predecessors. If so, return the known list of value and pred
641 /// BB in the result vector.
642 ///
643 /// This returns true if there were any known values.
645  Value *V, BasicBlock *BB, PredValueInfo &Result,
647  Instruction *CxtI) {
648  // This method walks up use-def chains recursively. Because of this, we could
649  // get into an infinite loop going around loops in the use-def chain. To
650  // prevent this, keep track of what (value, block) pairs we've already visited
651  // and terminate the search if we loop back to them
652  if (!RecursionSet.insert(V).second)
653  return false;
654 
655  // If V is a constant, then it is known in all predecessors.
656  if (Constant *KC = getKnownConstant(V, Preference)) {
657  for (BasicBlock *Pred : predecessors(BB))
658  Result.emplace_back(KC, Pred);
659 
660  return !Result.empty();
661  }
662 
663  // If V is a non-instruction value, or an instruction in a different block,
664  // then it can't be derived from a PHI.
665  Instruction *I = dyn_cast<Instruction>(V);
666  if (!I || I->getParent() != BB) {
667 
668  // Okay, if this is a live-in value, see if it has a known value at the end
669  // of any of our predecessors.
670  //
671  // FIXME: This should be an edge property, not a block end property.
672  /// TODO: Per PR2563, we could infer value range information about a
673  /// predecessor based on its terminator.
674  //
675  // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
676  // "I" is a non-local compare-with-a-constant instruction. This would be
677  // able to handle value inequalities better, for example if the compare is
678  // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
679  // Perhaps getConstantOnEdge should be smart enough to do this?
680  for (BasicBlock *P : predecessors(BB)) {
681  // If the value is known by LazyValueInfo to be a constant in a
682  // predecessor, use that information to try to thread this block.
683  Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
684  if (Constant *KC = getKnownConstant(PredCst, Preference))
685  Result.emplace_back(KC, P);
686  }
687 
688  return !Result.empty();
689  }
690 
691  /// If I is a PHI node, then we know the incoming values for any constants.
692  if (PHINode *PN = dyn_cast<PHINode>(I)) {
693  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
694  Value *InVal = PN->getIncomingValue(i);
695  if (Constant *KC = getKnownConstant(InVal, Preference)) {
696  Result.emplace_back(KC, PN->getIncomingBlock(i));
697  } else {
698  Constant *CI = LVI->getConstantOnEdge(InVal,
699  PN->getIncomingBlock(i),
700  BB, CxtI);
701  if (Constant *KC = getKnownConstant(CI, Preference))
702  Result.emplace_back(KC, PN->getIncomingBlock(i));
703  }
704  }
705 
706  return !Result.empty();
707  }
708 
709  // Handle Cast instructions.
710  if (CastInst *CI = dyn_cast<CastInst>(I)) {
711  Value *Source = CI->getOperand(0);
712  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
713  RecursionSet, CxtI);
714  if (Result.empty())
715  return false;
716 
717  // Convert the known values.
718  for (auto &R : Result)
719  R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
720 
721  return true;
722  }
723 
724  if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
725  Value *Source = FI->getOperand(0);
726  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
727  RecursionSet, CxtI);
728 
729  erase_if(Result, [](auto &Pair) {
730  return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
731  });
732 
733  return !Result.empty();
734  }
735 
736  // Handle some boolean conditions.
737  if (I->getType()->getPrimitiveSizeInBits() == 1) {
738  using namespace PatternMatch;
739 
740  assert(Preference == WantInteger && "One-bit non-integer type?");
741  // X | true -> true
742  // X & false -> false
743  Value *Op0, *Op1;
744  if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
745  match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
746  PredValueInfoTy LHSVals, RHSVals;
747 
748  computeValueKnownInPredecessorsImpl(Op0, BB, LHSVals, WantInteger,
749  RecursionSet, CxtI);
750  computeValueKnownInPredecessorsImpl(Op1, BB, RHSVals, WantInteger,
751  RecursionSet, CxtI);
752 
753  if (LHSVals.empty() && RHSVals.empty())
754  return false;
755 
756  ConstantInt *InterestingVal;
757  if (match(I, m_LogicalOr()))
758  InterestingVal = ConstantInt::getTrue(I->getContext());
759  else
760  InterestingVal = ConstantInt::getFalse(I->getContext());
761 
762  SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
763 
764  // Scan for the sentinel. If we find an undef, force it to the
765  // interesting value: x|undef -> true and x&undef -> false.
766  for (const auto &LHSVal : LHSVals)
767  if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
768  Result.emplace_back(InterestingVal, LHSVal.second);
769  LHSKnownBBs.insert(LHSVal.second);
770  }
771  for (const auto &RHSVal : RHSVals)
772  if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
773  // If we already inferred a value for this block on the LHS, don't
774  // re-add it.
775  if (!LHSKnownBBs.count(RHSVal.second))
776  Result.emplace_back(InterestingVal, RHSVal.second);
777  }
778 
779  return !Result.empty();
780  }
781 
782  // Handle the NOT form of XOR.
783  if (I->getOpcode() == Instruction::Xor &&
784  isa<ConstantInt>(I->getOperand(1)) &&
785  cast<ConstantInt>(I->getOperand(1))->isOne()) {
786  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
787  WantInteger, RecursionSet, CxtI);
788  if (Result.empty())
789  return false;
790 
791  // Invert the known values.
792  for (auto &R : Result)
793  R.first = ConstantExpr::getNot(R.first);
794 
795  return true;
796  }
797 
798  // Try to simplify some other binary operator values.
799  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
801  && "A binary operator creating a block address?");
802  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
803  PredValueInfoTy LHSVals;
804  computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
805  WantInteger, RecursionSet, CxtI);
806 
807  // Try to use constant folding to simplify the binary operator.
808  for (const auto &LHSVal : LHSVals) {
809  Constant *V = LHSVal.first;
810  Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
811 
812  if (Constant *KC = getKnownConstant(Folded, WantInteger))
813  Result.emplace_back(KC, LHSVal.second);
814  }
815  }
816 
817  return !Result.empty();
818  }
819 
820  // Handle compare with phi operand, where the PHI is defined in this block.
821  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
822  assert(Preference == WantInteger && "Compares only produce integers");
823  Type *CmpType = Cmp->getType();
824  Value *CmpLHS = Cmp->getOperand(0);
825  Value *CmpRHS = Cmp->getOperand(1);
826  CmpInst::Predicate Pred = Cmp->getPredicate();
827 
828  PHINode *PN = dyn_cast<PHINode>(CmpLHS);
829  if (!PN)
830  PN = dyn_cast<PHINode>(CmpRHS);
831  if (PN && PN->getParent() == BB) {
832  const DataLayout &DL = PN->getModule()->getDataLayout();
833  // We can do this simplification if any comparisons fold to true or false.
834  // See if any do.
835  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
836  BasicBlock *PredBB = PN->getIncomingBlock(i);
837  Value *LHS, *RHS;
838  if (PN == CmpLHS) {
839  LHS = PN->getIncomingValue(i);
840  RHS = CmpRHS->DoPHITranslation(BB, PredBB);
841  } else {
842  LHS = CmpLHS->DoPHITranslation(BB, PredBB);
843  RHS = PN->getIncomingValue(i);
844  }
845  Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
846  if (!Res) {
847  if (!isa<Constant>(RHS))
848  continue;
849 
850  // getPredicateOnEdge call will make no sense if LHS is defined in BB.
851  auto LHSInst = dyn_cast<Instruction>(LHS);
852  if (LHSInst && LHSInst->getParent() == BB)
853  continue;
854 
856  ResT = LVI->getPredicateOnEdge(Pred, LHS,
857  cast<Constant>(RHS), PredBB, BB,
858  CxtI ? CxtI : Cmp);
859  if (ResT == LazyValueInfo::Unknown)
860  continue;
861  Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
862  }
863 
864  if (Constant *KC = getKnownConstant(Res, WantInteger))
865  Result.emplace_back(KC, PredBB);
866  }
867 
868  return !Result.empty();
869  }
870 
871  // If comparing a live-in value against a constant, see if we know the
872  // live-in value on any predecessors.
873  if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
874  Constant *CmpConst = cast<Constant>(CmpRHS);
875 
876  if (!isa<Instruction>(CmpLHS) ||
877  cast<Instruction>(CmpLHS)->getParent() != BB) {
878  for (BasicBlock *P : predecessors(BB)) {
879  // If the value is known by LazyValueInfo to be a constant in a
880  // predecessor, use that information to try to thread this block.
882  LVI->getPredicateOnEdge(Pred, CmpLHS,
883  CmpConst, P, BB, CxtI ? CxtI : Cmp);
884  if (Res == LazyValueInfo::Unknown)
885  continue;
886 
887  Constant *ResC = ConstantInt::get(CmpType, Res);
888  Result.emplace_back(ResC, P);
889  }
890 
891  return !Result.empty();
892  }
893 
894  // InstCombine can fold some forms of constant range checks into
895  // (icmp (add (x, C1)), C2). See if we have we have such a thing with
896  // x as a live-in.
897  {
898  using namespace PatternMatch;
899 
900  Value *AddLHS;
901  ConstantInt *AddConst;
902  if (isa<ConstantInt>(CmpConst) &&
903  match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
904  if (!isa<Instruction>(AddLHS) ||
905  cast<Instruction>(AddLHS)->getParent() != BB) {
906  for (BasicBlock *P : predecessors(BB)) {
907  // If the value is known by LazyValueInfo to be a ConstantRange in
908  // a predecessor, use that information to try to thread this
909  // block.
910  ConstantRange CR = LVI->getConstantRangeOnEdge(
911  AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
912  // Propagate the range through the addition.
913  CR = CR.add(AddConst->getValue());
914 
915  // Get the range where the compare returns true.
917  Pred, cast<ConstantInt>(CmpConst)->getValue());
918 
919  Constant *ResC;
920  if (CmpRange.contains(CR))
921  ResC = ConstantInt::getTrue(CmpType);
922  else if (CmpRange.inverse().contains(CR))
923  ResC = ConstantInt::getFalse(CmpType);
924  else
925  continue;
926 
927  Result.emplace_back(ResC, P);
928  }
929 
930  return !Result.empty();
931  }
932  }
933  }
934 
935  // Try to find a constant value for the LHS of a comparison,
936  // and evaluate it statically if we can.
937  PredValueInfoTy LHSVals;
938  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
939  WantInteger, RecursionSet, CxtI);
940 
941  for (const auto &LHSVal : LHSVals) {
942  Constant *V = LHSVal.first;
943  Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
944  if (Constant *KC = getKnownConstant(Folded, WantInteger))
945  Result.emplace_back(KC, LHSVal.second);
946  }
947 
948  return !Result.empty();
949  }
950  }
951 
952  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
953  // Handle select instructions where at least one operand is a known constant
954  // and we can figure out the condition value for any predecessor block.
955  Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
956  Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
957  PredValueInfoTy Conds;
958  if ((TrueVal || FalseVal) &&
959  computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
960  WantInteger, RecursionSet, CxtI)) {
961  for (auto &C : Conds) {
962  Constant *Cond = C.first;
963 
964  // Figure out what value to use for the condition.
965  bool KnownCond;
966  if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
967  // A known boolean.
968  KnownCond = CI->isOne();
969  } else {
970  assert(isa<UndefValue>(Cond) && "Unexpected condition value");
971  // Either operand will do, so be sure to pick the one that's a known
972  // constant.
973  // FIXME: Do this more cleverly if both values are known constants?
974  KnownCond = (TrueVal != nullptr);
975  }
976 
977  // See if the select has a known constant value for this predecessor.
978  if (Constant *Val = KnownCond ? TrueVal : FalseVal)
979  Result.emplace_back(Val, C.second);
980  }
981 
982  return !Result.empty();
983  }
984  }
985 
986  // If all else fails, see if LVI can figure out a constant value for us.
987  assert(CxtI->getParent() == BB && "CxtI should be in BB");
988  Constant *CI = LVI->getConstant(V, CxtI);
989  if (Constant *KC = getKnownConstant(CI, Preference)) {
990  for (BasicBlock *Pred : predecessors(BB))
991  Result.emplace_back(KC, Pred);
992  }
993 
994  return !Result.empty();
995 }
996 
997 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
998 /// in an undefined jump, decide which block is best to revector to.
999 ///
1000 /// Since we can pick an arbitrary destination, we pick the successor with the
1001 /// fewest predecessors. This should reduce the in-degree of the others.
1003  Instruction *BBTerm = BB->getTerminator();
1004  unsigned MinSucc = 0;
1005  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
1006  // Compute the successor with the minimum number of predecessors.
1007  unsigned MinNumPreds = pred_size(TestBB);
1008  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1009  TestBB = BBTerm->getSuccessor(i);
1010  unsigned NumPreds = pred_size(TestBB);
1011  if (NumPreds < MinNumPreds) {
1012  MinSucc = i;
1013  MinNumPreds = NumPreds;
1014  }
1015  }
1016 
1017  return MinSucc;
1018 }
1019 
1021  if (!BB->hasAddressTaken()) return false;
1022 
1023  // If the block has its address taken, it may be a tree of dead constants
1024  // hanging off of it. These shouldn't keep the block alive.
1027  return !BA->use_empty();
1028 }
1029 
1030 /// processBlock - If there are any predecessors whose control can be threaded
1031 /// through to a successor, transform them now.
1033  // If the block is trivially dead, just return and let the caller nuke it.
1034  // This simplifies other transformations.
1035  if (DTU->isBBPendingDeletion(BB) ||
1036  (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
1037  return false;
1038 
1039  // If this block has a single predecessor, and if that pred has a single
1040  // successor, merge the blocks. This encourages recursive jump threading
1041  // because now the condition in this block can be threaded through
1042  // predecessors of our predecessor block.
1043  if (maybeMergeBasicBlockIntoOnlyPred(BB))
1044  return true;
1045 
1046  if (tryToUnfoldSelectInCurrBB(BB))
1047  return true;
1048 
1049  // Look if we can propagate guards to predecessors.
1050  if (HasGuards && processGuards(BB))
1051  return true;
1052 
1053  // What kind of constant we're looking for.
1055 
1056  // Look to see if the terminator is a conditional branch, switch or indirect
1057  // branch, if not we can't thread it.
1058  Value *Condition;
1059  Instruction *Terminator = BB->getTerminator();
1060  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
1061  // Can't thread an unconditional jump.
1062  if (BI->isUnconditional()) return false;
1063  Condition = BI->getCondition();
1064  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
1065  Condition = SI->getCondition();
1066  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
1067  // Can't thread indirect branch with no successors.
1068  if (IB->getNumSuccessors() == 0) return false;
1069  Condition = IB->getAddress()->stripPointerCasts();
1071  } else {
1072  return false; // Must be an invoke or callbr.
1073  }
1074 
1075  // Keep track if we constant folded the condition in this invocation.
1076  bool ConstantFolded = false;
1077 
1078  // Run constant folding to see if we can reduce the condition to a simple
1079  // constant.
1080  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1081  Value *SimpleVal =
1082  ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
1083  if (SimpleVal) {
1084  I->replaceAllUsesWith(SimpleVal);
1085  if (isInstructionTriviallyDead(I, TLI))
1086  I->eraseFromParent();
1087  Condition = SimpleVal;
1088  ConstantFolded = true;
1089  }
1090  }
1091 
1092  // If the terminator is branching on an undef or freeze undef, we can pick any
1093  // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1094  auto *FI = dyn_cast<FreezeInst>(Condition);
1095  if (isa<UndefValue>(Condition) ||
1096  (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1097  unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1098  std::vector<DominatorTree::UpdateType> Updates;
1099 
1100  // Fold the branch/switch.
1101  Instruction *BBTerm = BB->getTerminator();
1102  Updates.reserve(BBTerm->getNumSuccessors());
1103  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1104  if (i == BestSucc) continue;
1105  BasicBlock *Succ = BBTerm->getSuccessor(i);
1106  Succ->removePredecessor(BB, true);
1107  Updates.push_back({DominatorTree::Delete, BB, Succ});
1108  }
1109 
1110  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1111  << "' folding undef terminator: " << *BBTerm << '\n');
1112  BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1113  ++NumFolds;
1114  BBTerm->eraseFromParent();
1115  DTU->applyUpdatesPermissive(Updates);
1116  if (FI)
1117  FI->eraseFromParent();
1118  return true;
1119  }
1120 
1121  // If the terminator of this block is branching on a constant, simplify the
1122  // terminator to an unconditional branch. This can occur due to threading in
1123  // other blocks.
1124  if (getKnownConstant(Condition, Preference)) {
1125  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1126  << "' folding terminator: " << *BB->getTerminator()
1127  << '\n');
1128  ++NumFolds;
1129  ConstantFoldTerminator(BB, true, nullptr, DTU);
1130  if (HasProfileData)
1131  BPI->eraseBlock(BB);
1132  return true;
1133  }
1134 
1135  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1136 
1137  // All the rest of our checks depend on the condition being an instruction.
1138  if (!CondInst) {
1139  // FIXME: Unify this with code below.
1140  if (processThreadableEdges(Condition, BB, Preference, Terminator))
1141  return true;
1142  return ConstantFolded;
1143  }
1144 
1145  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
1146  // If we're branching on a conditional, LVI might be able to determine
1147  // it's value at the branch instruction. We only handle comparisons
1148  // against a constant at this time.
1149  // TODO: This should be extended to handle switches as well.
1150  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
1151  Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
1152  if (CondBr && CondConst) {
1153  // We should have returned as soon as we turn a conditional branch to
1154  // unconditional. Because its no longer interesting as far as jump
1155  // threading is concerned.
1156  assert(CondBr->isConditional() && "Threading on unconditional terminator");
1157 
1159  LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1160  CondConst, CondBr, /*UseBlockValue=*/false);
1161  if (Ret != LazyValueInfo::Unknown) {
1162  unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
1163  unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
1164  BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
1165  ToRemoveSucc->removePredecessor(BB, true);
1166  BranchInst *UncondBr =
1167  BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
1168  UncondBr->setDebugLoc(CondBr->getDebugLoc());
1169  ++NumFolds;
1170  CondBr->eraseFromParent();
1171  if (CondCmp->use_empty())
1172  CondCmp->eraseFromParent();
1173  // We can safely replace *some* uses of the CondInst if it has
1174  // exactly one value as returned by LVI. RAUW is incorrect in the
1175  // presence of guards and assumes, that have the `Cond` as the use. This
1176  // is because we use the guards/assume to reason about the `Cond` value
1177  // at the end of block, but RAUW unconditionally replaces all uses
1178  // including the guards/assumes themselves and the uses before the
1179  // guard/assume.
1180  else if (CondCmp->getParent() == BB) {
1181  auto *CI = Ret == LazyValueInfo::True ?
1182  ConstantInt::getTrue(CondCmp->getType()) :
1183  ConstantInt::getFalse(CondCmp->getType());
1184  replaceFoldableUses(CondCmp, CI);
1185  }
1186  DTU->applyUpdatesPermissive(
1187  {{DominatorTree::Delete, BB, ToRemoveSucc}});
1188  if (HasProfileData)
1189  BPI->eraseBlock(BB);
1190  return true;
1191  }
1192 
1193  // We did not manage to simplify this branch, try to see whether
1194  // CondCmp depends on a known phi-select pattern.
1195  if (tryToUnfoldSelect(CondCmp, BB))
1196  return true;
1197  }
1198  }
1199 
1200  if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1201  if (tryToUnfoldSelect(SI, BB))
1202  return true;
1203 
1204  // Check for some cases that are worth simplifying. Right now we want to look
1205  // for loads that are used by a switch or by the condition for the branch. If
1206  // we see one, check to see if it's partially redundant. If so, insert a PHI
1207  // which can then be used to thread the values.
1208  Value *SimplifyValue = CondInst;
1209 
1210  if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
1211  // Look into freeze's operand
1212  SimplifyValue = FI->getOperand(0);
1213 
1214  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1215  if (isa<Constant>(CondCmp->getOperand(1)))
1216  SimplifyValue = CondCmp->getOperand(0);
1217 
1218  // TODO: There are other places where load PRE would be profitable, such as
1219  // more complex comparisons.
1220  if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1221  if (simplifyPartiallyRedundantLoad(LoadI))
1222  return true;
1223 
1224  // Before threading, try to propagate profile data backwards:
1225  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1226  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1228 
1229  // Handle a variety of cases where we are branching on something derived from
1230  // a PHI node in the current block. If we can prove that any predecessors
1231  // compute a predictable value based on a PHI node, thread those predecessors.
1232  if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1233  return true;
1234 
1235  // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1236  // the current block, see if we can simplify.
1237  PHINode *PN = dyn_cast<PHINode>(
1238  isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
1239  : CondInst);
1240 
1241  if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1242  return processBranchOnPHI(PN);
1243 
1244  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1245  if (CondInst->getOpcode() == Instruction::Xor &&
1246  CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1247  return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1248 
1249  // Search for a stronger dominating condition that can be used to simplify a
1250  // conditional branch leaving BB.
1251  if (processImpliedCondition(BB))
1252  return true;
1253 
1254  return false;
1255 }
1256 
1258  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1259  if (!BI || !BI->isConditional())
1260  return false;
1261 
1262  Value *Cond = BI->getCondition();
1263  BasicBlock *CurrentBB = BB;
1264  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1265  unsigned Iter = 0;
1266 
1267  auto &DL = BB->getModule()->getDataLayout();
1268 
1269  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1270  auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1271  if (!PBI || !PBI->isConditional())
1272  return false;
1273  if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1274  return false;
1275 
1276  bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1277  Optional<bool> Implication =
1278  isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1279  if (Implication) {
1280  BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1281  BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1282  RemoveSucc->removePredecessor(BB);
1283  BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
1284  UncondBI->setDebugLoc(BI->getDebugLoc());
1285  ++NumFolds;
1286  BI->eraseFromParent();
1287  DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1288  if (HasProfileData)
1289  BPI->eraseBlock(BB);
1290  return true;
1291  }
1292  CurrentBB = CurrentPred;
1293  CurrentPred = CurrentBB->getSinglePredecessor();
1294  }
1295 
1296  return false;
1297 }
1298 
1299 /// Return true if Op is an instruction defined in the given block.
1301  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1302  if (OpInst->getParent() == BB)
1303  return true;
1304  return false;
1305 }
1306 
1307 /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1308 /// redundant load instruction, eliminate it by replacing it with a PHI node.
1309 /// This is an important optimization that encourages jump threading, and needs
1310 /// to be run interlaced with other jump threading tasks.
1312  // Don't hack volatile and ordered loads.
1313  if (!LoadI->isUnordered()) return false;
1314 
1315  // If the load is defined in a block with exactly one predecessor, it can't be
1316  // partially redundant.
1317  BasicBlock *LoadBB = LoadI->getParent();
1318  if (LoadBB->getSinglePredecessor())
1319  return false;
1320 
1321  // If the load is defined in an EH pad, it can't be partially redundant,
1322  // because the edges between the invoke and the EH pad cannot have other
1323  // instructions between them.
1324  if (LoadBB->isEHPad())
1325  return false;
1326 
1327  Value *LoadedPtr = LoadI->getOperand(0);
1328 
1329  // If the loaded operand is defined in the LoadBB and its not a phi,
1330  // it can't be available in predecessors.
1331  if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1332  return false;
1333 
1334  // Scan a few instructions up from the load, to see if it is obviously live at
1335  // the entry to its block.
1336  BasicBlock::iterator BBIt(LoadI);
1337  bool IsLoadCSE;
1338  if (Value *AvailableVal = FindAvailableLoadedValue(
1339  LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1340  // If the value of the load is locally available within the block, just use
1341  // it. This frequently occurs for reg2mem'd allocas.
1342 
1343  if (IsLoadCSE) {
1344  LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1345  combineMetadataForCSE(NLoadI, LoadI, false);
1346  };
1347 
1348  // If the returned value is the load itself, replace with an undef. This can
1349  // only happen in dead loops.
1350  if (AvailableVal == LoadI)
1351  AvailableVal = UndefValue::get(LoadI->getType());
1352  if (AvailableVal->getType() != LoadI->getType())
1353  AvailableVal = CastInst::CreateBitOrPointerCast(
1354  AvailableVal, LoadI->getType(), "", LoadI);
1355  LoadI->replaceAllUsesWith(AvailableVal);
1356  LoadI->eraseFromParent();
1357  return true;
1358  }
1359 
1360  // Otherwise, if we scanned the whole block and got to the top of the block,
1361  // we know the block is locally transparent to the load. If not, something
1362  // might clobber its value.
1363  if (BBIt != LoadBB->begin())
1364  return false;
1365 
1366  // If all of the loads and stores that feed the value have the same AA tags,
1367  // then we can propagate them onto any newly inserted loads.
1368  AAMDNodes AATags;
1369  LoadI->getAAMetadata(AATags);
1370 
1371  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1372 
1373  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1374 
1375  AvailablePredsTy AvailablePreds;
1376  BasicBlock *OneUnavailablePred = nullptr;
1377  SmallVector<LoadInst*, 8> CSELoads;
1378 
1379  // If we got here, the loaded value is transparent through to the start of the
1380  // block. Check to see if it is available in any of the predecessor blocks.
1381  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1382  // If we already scanned this predecessor, skip it.
1383  if (!PredsScanned.insert(PredBB).second)
1384  continue;
1385 
1386  BBIt = PredBB->end();
1387  unsigned NumScanedInst = 0;
1388  Value *PredAvailable = nullptr;
1389  // NOTE: We don't CSE load that is volatile or anything stronger than
1390  // unordered, that should have been checked when we entered the function.
1391  assert(LoadI->isUnordered() &&
1392  "Attempting to CSE volatile or atomic loads");
1393  // If this is a load on a phi pointer, phi-translate it and search
1394  // for available load/store to the pointer in predecessors.
1395  Type *AccessTy = LoadI->getType();
1396  const auto &DL = LoadI->getModule()->getDataLayout();
1397  MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1398  LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1399  AATags);
1400  PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
1401  PredBB, BBIt, DefMaxInstsToScan,
1402  AA, &IsLoadCSE, &NumScanedInst);
1403 
1404  // If PredBB has a single predecessor, continue scanning through the
1405  // single predecessor.
1406  BasicBlock *SinglePredBB = PredBB;
1407  while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1408  NumScanedInst < DefMaxInstsToScan) {
1409  SinglePredBB = SinglePredBB->getSinglePredecessor();
1410  if (SinglePredBB) {
1411  BBIt = SinglePredBB->end();
1412  PredAvailable = findAvailablePtrLoadStore(
1413  Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1414  (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1415  &NumScanedInst);
1416  }
1417  }
1418 
1419  if (!PredAvailable) {
1420  OneUnavailablePred = PredBB;
1421  continue;
1422  }
1423 
1424  if (IsLoadCSE)
1425  CSELoads.push_back(cast<LoadInst>(PredAvailable));
1426 
1427  // If so, this load is partially redundant. Remember this info so that we
1428  // can create a PHI node.
1429  AvailablePreds.emplace_back(PredBB, PredAvailable);
1430  }
1431 
1432  // If the loaded value isn't available in any predecessor, it isn't partially
1433  // redundant.
1434  if (AvailablePreds.empty()) return false;
1435 
1436  // Okay, the loaded value is available in at least one (and maybe all!)
1437  // predecessors. If the value is unavailable in more than one unique
1438  // predecessor, we want to insert a merge block for those common predecessors.
1439  // This ensures that we only have to insert one reload, thus not increasing
1440  // code size.
1441  BasicBlock *UnavailablePred = nullptr;
1442 
1443  // If the value is unavailable in one of predecessors, we will end up
1444  // inserting a new instruction into them. It is only valid if all the
1445  // instructions before LoadI are guaranteed to pass execution to its
1446  // successor, or if LoadI is safe to speculate.
1447  // TODO: If this logic becomes more complex, and we will perform PRE insertion
1448  // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1449  // It requires domination tree analysis, so for this simple case it is an
1450  // overkill.
1451  if (PredsScanned.size() != AvailablePreds.size() &&
1453  for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1455  return false;
1456 
1457  // If there is exactly one predecessor where the value is unavailable, the
1458  // already computed 'OneUnavailablePred' block is it. If it ends in an
1459  // unconditional branch, we know that it isn't a critical edge.
1460  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1461  OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1462  UnavailablePred = OneUnavailablePred;
1463  } else if (PredsScanned.size() != AvailablePreds.size()) {
1464  // Otherwise, we had multiple unavailable predecessors or we had a critical
1465  // edge from the one.
1466  SmallVector<BasicBlock*, 8> PredsToSplit;
1467  SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1468 
1469  for (const auto &AvailablePred : AvailablePreds)
1470  AvailablePredSet.insert(AvailablePred.first);
1471 
1472  // Add all the unavailable predecessors to the PredsToSplit list.
1473  for (BasicBlock *P : predecessors(LoadBB)) {
1474  // If the predecessor is an indirect goto, we can't split the edge.
1475  // Same for CallBr.
1476  if (isa<IndirectBrInst>(P->getTerminator()) ||
1477  isa<CallBrInst>(P->getTerminator()))
1478  return false;
1479 
1480  if (!AvailablePredSet.count(P))
1481  PredsToSplit.push_back(P);
1482  }
1483 
1484  // Split them out to their own block.
1485  UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1486  }
1487 
1488  // If the value isn't available in all predecessors, then there will be
1489  // exactly one where it isn't available. Insert a load on that edge and add
1490  // it to the AvailablePreds list.
1491  if (UnavailablePred) {
1492  assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1493  "Can't handle critical edge here!");
1494  LoadInst *NewVal = new LoadInst(
1495  LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1496  LoadI->getName() + ".pr", false, LoadI->getAlign(),
1497  LoadI->getOrdering(), LoadI->getSyncScopeID(),
1498  UnavailablePred->getTerminator());
1499  NewVal->setDebugLoc(LoadI->getDebugLoc());
1500  if (AATags)
1501  NewVal->setAAMetadata(AATags);
1502 
1503  AvailablePreds.emplace_back(UnavailablePred, NewVal);
1504  }
1505 
1506  // Now we know that each predecessor of this block has a value in
1507  // AvailablePreds, sort them for efficient access as we're walking the preds.
1508  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1509 
1510  // Create a PHI node at the start of the block for the PRE'd load value.
1511  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1512  PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
1513  &LoadBB->front());
1514  PN->takeName(LoadI);
1515  PN->setDebugLoc(LoadI->getDebugLoc());
1516 
1517  // Insert new entries into the PHI for each predecessor. A single block may
1518  // have multiple entries here.
1519  for (pred_iterator PI = PB; PI != PE; ++PI) {
1520  BasicBlock *P = *PI;
1521  AvailablePredsTy::iterator I =
1522  llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1523 
1524  assert(I != AvailablePreds.end() && I->first == P &&
1525  "Didn't find entry for predecessor!");
1526 
1527  // If we have an available predecessor but it requires casting, insert the
1528  // cast in the predecessor and use the cast. Note that we have to update the
1529  // AvailablePreds vector as we go so that all of the PHI entries for this
1530  // predecessor use the same bitcast.
1531  Value *&PredV = I->second;
1532  if (PredV->getType() != LoadI->getType())
1533  PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
1534  P->getTerminator());
1535 
1536  PN->addIncoming(PredV, I->first);
1537  }
1538 
1539  for (LoadInst *PredLoadI : CSELoads) {
1540  combineMetadataForCSE(PredLoadI, LoadI, true);
1541  }
1542 
1543  LoadI->replaceAllUsesWith(PN);
1544  LoadI->eraseFromParent();
1545 
1546  return true;
1547 }
1548 
1549 /// findMostPopularDest - The specified list contains multiple possible
1550 /// threadable destinations. Pick the one that occurs the most frequently in
1551 /// the list.
1552 static BasicBlock *
1554  const SmallVectorImpl<std::pair<BasicBlock *,
1555  BasicBlock *>> &PredToDestList) {
1556  assert(!PredToDestList.empty());
1557 
1558  // Determine popularity. If there are multiple possible destinations, we
1559  // explicitly choose to ignore 'undef' destinations. We prefer to thread
1560  // blocks with known and real destinations to threading undef. We'll handle
1561  // them later if interesting.
1562  MapVector<BasicBlock *, unsigned> DestPopularity;
1563 
1564  // Populate DestPopularity with the successors in the order they appear in the
1565  // successor list. This way, we ensure determinism by iterating it in the
1566  // same order in std::max_element below. We map nullptr to 0 so that we can
1567  // return nullptr when PredToDestList contains nullptr only.
1568  DestPopularity[nullptr] = 0;
1569  for (auto *SuccBB : successors(BB))
1570  DestPopularity[SuccBB] = 0;
1571 
1572  for (const auto &PredToDest : PredToDestList)
1573  if (PredToDest.second)
1574  DestPopularity[PredToDest.second]++;
1575 
1576  // Find the most popular dest.
1577  using VT = decltype(DestPopularity)::value_type;
1578  auto MostPopular = std::max_element(
1579  DestPopularity.begin(), DestPopularity.end(),
1580  [](const VT &L, const VT &R) { return L.second < R.second; });
1581 
1582  // Okay, we have finally picked the most popular destination.
1583  return MostPopular->first;
1584 }
1585 
1586 // Try to evaluate the value of V when the control flows from PredPredBB to
1587 // BB->getSinglePredecessor() and then on to BB.
1589  BasicBlock *PredPredBB,
1590  Value *V) {
1591  BasicBlock *PredBB = BB->getSinglePredecessor();
1592  assert(PredBB && "Expected a single predecessor");
1593 
1594  if (Constant *Cst = dyn_cast<Constant>(V)) {
1595  return Cst;
1596  }
1597 
1598  // Consult LVI if V is not an instruction in BB or PredBB.
1599  Instruction *I = dyn_cast<Instruction>(V);
1600  if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1601  return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1602  }
1603 
1604  // Look into a PHI argument.
1605  if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1606  if (PHI->getParent() == PredBB)
1607  return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1608  return nullptr;
1609  }
1610 
1611  // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1612  if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1613  if (CondCmp->getParent() == BB) {
1614  Constant *Op0 =
1615  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1616  Constant *Op1 =
1617  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1618  if (Op0 && Op1) {
1619  return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1620  }
1621  }
1622  return nullptr;
1623  }
1624 
1625  return nullptr;
1626 }
1627 
1630  Instruction *CxtI) {
1631  // If threading this would thread across a loop header, don't even try to
1632  // thread the edge.
1633  if (LoopHeaders.count(BB))
1634  return false;
1635 
1636  PredValueInfoTy PredValues;
1637  if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1638  CxtI)) {
1639  // We don't have known values in predecessors. See if we can thread through
1640  // BB and its sole predecessor.
1641  return maybethreadThroughTwoBasicBlocks(BB, Cond);
1642  }
1643 
1644  assert(!PredValues.empty() &&
1645  "computeValueKnownInPredecessors returned true with no values");
1646 
1647  LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1648  for (const auto &PredValue : PredValues) {
1649  dbgs() << " BB '" << BB->getName()
1650  << "': FOUND condition = " << *PredValue.first
1651  << " for pred '" << PredValue.second->getName() << "'.\n";
1652  });
1653 
1654  // Decide what we want to thread through. Convert our list of known values to
1655  // a list of known destinations for each pred. This also discards duplicate
1656  // predecessors and keeps track of the undefined inputs (which are represented
1657  // as a null dest in the PredToDestList).
1658  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1660 
1661  BasicBlock *OnlyDest = nullptr;
1662  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1663  Constant *OnlyVal = nullptr;
1664  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1665 
1666  for (const auto &PredValue : PredValues) {
1667  BasicBlock *Pred = PredValue.second;
1668  if (!SeenPreds.insert(Pred).second)
1669  continue; // Duplicate predecessor entry.
1670 
1671  Constant *Val = PredValue.first;
1672 
1673  BasicBlock *DestBB;
1674  if (isa<UndefValue>(Val))
1675  DestBB = nullptr;
1676  else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1677  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1678  DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1679  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1680  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1681  DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1682  } else {
1683  assert(isa<IndirectBrInst>(BB->getTerminator())
1684  && "Unexpected terminator");
1685  assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1686  DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1687  }
1688 
1689  // If we have exactly one destination, remember it for efficiency below.
1690  if (PredToDestList.empty()) {
1691  OnlyDest = DestBB;
1692  OnlyVal = Val;
1693  } else {
1694  if (OnlyDest != DestBB)
1695  OnlyDest = MultipleDestSentinel;
1696  // It possible we have same destination, but different value, e.g. default
1697  // case in switchinst.
1698  if (Val != OnlyVal)
1699  OnlyVal = MultipleVal;
1700  }
1701 
1702  // If the predecessor ends with an indirect goto, we can't change its
1703  // destination. Same for CallBr.
1704  if (isa<IndirectBrInst>(Pred->getTerminator()) ||
1705  isa<CallBrInst>(Pred->getTerminator()))
1706  continue;
1707 
1708  PredToDestList.emplace_back(Pred, DestBB);
1709  }
1710 
1711  // If all edges were unthreadable, we fail.
1712  if (PredToDestList.empty())
1713  return false;
1714 
1715  // If all the predecessors go to a single known successor, we want to fold,
1716  // not thread. By doing so, we do not need to duplicate the current block and
1717  // also miss potential opportunities in case we dont/cant duplicate.
1718  if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1719  if (BB->hasNPredecessors(PredToDestList.size())) {
1720  bool SeenFirstBranchToOnlyDest = false;
1721  std::vector <DominatorTree::UpdateType> Updates;
1722  Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1723  for (BasicBlock *SuccBB : successors(BB)) {
1724  if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1725  SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1726  } else {
1727  SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1728  Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1729  }
1730  }
1731 
1732  // Finally update the terminator.
1733  Instruction *Term = BB->getTerminator();
1734  BranchInst::Create(OnlyDest, Term);
1735  ++NumFolds;
1736  Term->eraseFromParent();
1737  DTU->applyUpdatesPermissive(Updates);
1738  if (HasProfileData)
1739  BPI->eraseBlock(BB);
1740 
1741  // If the condition is now dead due to the removal of the old terminator,
1742  // erase it.
1743  if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1744  if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1745  CondInst->eraseFromParent();
1746  // We can safely replace *some* uses of the CondInst if it has
1747  // exactly one value as returned by LVI. RAUW is incorrect in the
1748  // presence of guards and assumes, that have the `Cond` as the use. This
1749  // is because we use the guards/assume to reason about the `Cond` value
1750  // at the end of block, but RAUW unconditionally replaces all uses
1751  // including the guards/assumes themselves and the uses before the
1752  // guard/assume.
1753  else if (OnlyVal && OnlyVal != MultipleVal &&
1754  CondInst->getParent() == BB)
1755  replaceFoldableUses(CondInst, OnlyVal);
1756  }
1757  return true;
1758  }
1759  }
1760 
1761  // Determine which is the most common successor. If we have many inputs and
1762  // this block is a switch, we want to start by threading the batch that goes
1763  // to the most popular destination first. If we only know about one
1764  // threadable destination (the common case) we can avoid this.
1765  BasicBlock *MostPopularDest = OnlyDest;
1766 
1767  if (MostPopularDest == MultipleDestSentinel) {
1768  // Remove any loop headers from the Dest list, threadEdge conservatively
1769  // won't process them, but we might have other destination that are eligible
1770  // and we still want to process.
1771  erase_if(PredToDestList,
1772  [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1773  return LoopHeaders.contains(PredToDest.second);
1774  });
1775 
1776  if (PredToDestList.empty())
1777  return false;
1778 
1779  MostPopularDest = findMostPopularDest(BB, PredToDestList);
1780  }
1781 
1782  // Now that we know what the most popular destination is, factor all
1783  // predecessors that will jump to it into a single predecessor.
1784  SmallVector<BasicBlock*, 16> PredsToFactor;
1785  for (const auto &PredToDest : PredToDestList)
1786  if (PredToDest.second == MostPopularDest) {
1787  BasicBlock *Pred = PredToDest.first;
1788 
1789  // This predecessor may be a switch or something else that has multiple
1790  // edges to the block. Factor each of these edges by listing them
1791  // according to # occurrences in PredsToFactor.
1792  for (BasicBlock *Succ : successors(Pred))
1793  if (Succ == BB)
1794  PredsToFactor.push_back(Pred);
1795  }
1796 
1797  // If the threadable edges are branching on an undefined value, we get to pick
1798  // the destination that these predecessors should get to.
1799  if (!MostPopularDest)
1800  MostPopularDest = BB->getTerminator()->
1801  getSuccessor(getBestDestForJumpOnUndef(BB));
1802 
1803  // Ok, try to thread it!
1804  return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1805 }
1806 
1807 /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1808 /// a PHI node (or freeze PHI) in the current block. See if there are any
1809 /// simplifications we can do based on inputs to the phi node.
1811  BasicBlock *BB = PN->getParent();
1812 
1813  // TODO: We could make use of this to do it once for blocks with common PHI
1814  // values.
1816  PredBBs.resize(1);
1817 
1818  // If any of the predecessor blocks end in an unconditional branch, we can
1819  // *duplicate* the conditional branch into that block in order to further
1820  // encourage jump threading and to eliminate cases where we have branch on a
1821  // phi of an icmp (branch on icmp is much better).
1822  // This is still beneficial when a frozen phi is used as the branch condition
1823  // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1824  // to br(icmp(freeze ...)).
1825  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1826  BasicBlock *PredBB = PN->getIncomingBlock(i);
1827  if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1828  if (PredBr->isUnconditional()) {
1829  PredBBs[0] = PredBB;
1830  // Try to duplicate BB into PredBB.
1831  if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1832  return true;
1833  }
1834  }
1835 
1836  return false;
1837 }
1838 
1839 /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1840 /// a xor instruction in the current block. See if there are any
1841 /// simplifications we can do based on inputs to the xor.
1843  BasicBlock *BB = BO->getParent();
1844 
1845  // If either the LHS or RHS of the xor is a constant, don't do this
1846  // optimization.
1847  if (isa<ConstantInt>(BO->getOperand(0)) ||
1848  isa<ConstantInt>(BO->getOperand(1)))
1849  return false;
1850 
1851  // If the first instruction in BB isn't a phi, we won't be able to infer
1852  // anything special about any particular predecessor.
1853  if (!isa<PHINode>(BB->front()))
1854  return false;
1855 
1856  // If this BB is a landing pad, we won't be able to split the edge into it.
1857  if (BB->isEHPad())
1858  return false;
1859 
1860  // If we have a xor as the branch input to this block, and we know that the
1861  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1862  // the condition into the predecessor and fix that value to true, saving some
1863  // logical ops on that path and encouraging other paths to simplify.
1864  //
1865  // This copies something like this:
1866  //
1867  // BB:
1868  // %X = phi i1 [1], [%X']
1869  // %Y = icmp eq i32 %A, %B
1870  // %Z = xor i1 %X, %Y
1871  // br i1 %Z, ...
1872  //
1873  // Into:
1874  // BB':
1875  // %Y = icmp ne i32 %A, %B
1876  // br i1 %Y, ...
1877 
1878  PredValueInfoTy XorOpValues;
1879  bool isLHS = true;
1880  if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1881  WantInteger, BO)) {
1882  assert(XorOpValues.empty());
1883  if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1884  WantInteger, BO))
1885  return false;
1886  isLHS = false;
1887  }
1888 
1889  assert(!XorOpValues.empty() &&
1890  "computeValueKnownInPredecessors returned true with no values");
1891 
1892  // Scan the information to see which is most popular: true or false. The
1893  // predecessors can be of the set true, false, or undef.
1894  unsigned NumTrue = 0, NumFalse = 0;
1895  for (const auto &XorOpValue : XorOpValues) {
1896  if (isa<UndefValue>(XorOpValue.first))
1897  // Ignore undefs for the count.
1898  continue;
1899  if (cast<ConstantInt>(XorOpValue.first)->isZero())
1900  ++NumFalse;
1901  else
1902  ++NumTrue;
1903  }
1904 
1905  // Determine which value to split on, true, false, or undef if neither.
1906  ConstantInt *SplitVal = nullptr;
1907  if (NumTrue > NumFalse)
1908  SplitVal = ConstantInt::getTrue(BB->getContext());
1909  else if (NumTrue != 0 || NumFalse != 0)
1910  SplitVal = ConstantInt::getFalse(BB->getContext());
1911 
1912  // Collect all of the blocks that this can be folded into so that we can
1913  // factor this once and clone it once.
1914  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1915  for (const auto &XorOpValue : XorOpValues) {
1916  if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1917  continue;
1918 
1919  BlocksToFoldInto.push_back(XorOpValue.second);
1920  }
1921 
1922  // If we inferred a value for all of the predecessors, then duplication won't
1923  // help us. However, we can just replace the LHS or RHS with the constant.
1924  if (BlocksToFoldInto.size() ==
1925  cast<PHINode>(BB->front()).getNumIncomingValues()) {
1926  if (!SplitVal) {
1927  // If all preds provide undef, just nuke the xor, because it is undef too.
1929  BO->eraseFromParent();
1930  } else if (SplitVal->isZero()) {
1931  // If all preds provide 0, replace the xor with the other input.
1932  BO->replaceAllUsesWith(BO->getOperand(isLHS));
1933  BO->eraseFromParent();
1934  } else {
1935  // If all preds provide 1, set the computed value to 1.
1936  BO->setOperand(!isLHS, SplitVal);
1937  }
1938 
1939  return true;
1940  }
1941 
1942  // If any of predecessors end with an indirect goto, we can't change its
1943  // destination. Same for CallBr.
1944  if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1945  return isa<IndirectBrInst>(Pred->getTerminator()) ||
1946  isa<CallBrInst>(Pred->getTerminator());
1947  }))
1948  return false;
1949 
1950  // Try to duplicate BB into PredBB.
1951  return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1952 }
1953 
1954 /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1955 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1956 /// NewPred using the entries from OldPred (suitably mapped).
1958  BasicBlock *OldPred,
1959  BasicBlock *NewPred,
1961  for (PHINode &PN : PHIBB->phis()) {
1962  // Ok, we have a PHI node. Figure out what the incoming value was for the
1963  // DestBlock.
1964  Value *IV = PN.getIncomingValueForBlock(OldPred);
1965 
1966  // Remap the value if necessary.
1967  if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1969  if (I != ValueMap.end())
1970  IV = I->second;
1971  }
1972 
1973  PN.addIncoming(IV, NewPred);
1974  }
1975 }
1976 
1977 /// Merge basic block BB into its sole predecessor if possible.
1979  BasicBlock *SinglePred = BB->getSinglePredecessor();
1980  if (!SinglePred)
1981  return false;
1982 
1983  const Instruction *TI = SinglePred->getTerminator();
1984  if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
1985  SinglePred == BB || hasAddressTakenAndUsed(BB))
1986  return false;
1987 
1988  // If SinglePred was a loop header, BB becomes one.
1989  if (LoopHeaders.erase(SinglePred))
1990  LoopHeaders.insert(BB);
1991 
1992  LVI->eraseBlock(SinglePred);
1994 
1995  // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1996  // BB code within one basic block `BB`), we need to invalidate the LVI
1997  // information associated with BB, because the LVI information need not be
1998  // true for all of BB after the merge. For example,
1999  // Before the merge, LVI info and code is as follows:
2000  // SinglePred: <LVI info1 for %p val>
2001  // %y = use of %p
2002  // call @exit() // need not transfer execution to successor.
2003  // assume(%p) // from this point on %p is true
2004  // br label %BB
2005  // BB: <LVI info2 for %p val, i.e. %p is true>
2006  // %x = use of %p
2007  // br label exit
2008  //
2009  // Note that this LVI info for blocks BB and SinglPred is correct for %p
2010  // (info2 and info1 respectively). After the merge and the deletion of the
2011  // LVI info1 for SinglePred. We have the following code:
2012  // BB: <LVI info2 for %p val>
2013  // %y = use of %p
2014  // call @exit()
2015  // assume(%p)
2016  // %x = use of %p <-- LVI info2 is correct from here onwards.
2017  // br label exit
2018  // LVI info2 for BB is incorrect at the beginning of BB.
2019 
2020  // Invalidate LVI information for BB if the LVI is not provably true for
2021  // all of BB.
2023  LVI->eraseBlock(BB);
2024  return true;
2025 }
2026 
2027 /// Update the SSA form. NewBB contains instructions that are copied from BB.
2028 /// ValueMapping maps old values in BB to new ones in NewBB.
2030  BasicBlock *BB, BasicBlock *NewBB,
2031  DenseMap<Instruction *, Value *> &ValueMapping) {
2032  // If there were values defined in BB that are used outside the block, then we
2033  // now have to update all uses of the value to use either the original value,
2034  // the cloned value, or some PHI derived value. This can require arbitrary
2035  // PHI insertion, of which we are prepared to do, clean these up now.
2036  SSAUpdater SSAUpdate;
2037  SmallVector<Use *, 16> UsesToRename;
2038 
2039  for (Instruction &I : *BB) {
2040  // Scan all uses of this instruction to see if it is used outside of its
2041  // block, and if so, record them in UsesToRename.
2042  for (Use &U : I.uses()) {
2043  Instruction *User = cast<Instruction>(U.getUser());
2044  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2045  if (UserPN->getIncomingBlock(U) == BB)
2046  continue;
2047  } else if (User->getParent() == BB)
2048  continue;
2049 
2050  UsesToRename.push_back(&U);
2051  }
2052 
2053  // If there are no uses outside the block, we're done with this instruction.
2054  if (UsesToRename.empty())
2055  continue;
2056  LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2057 
2058  // We found a use of I outside of BB. Rename all uses of I that are outside
2059  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2060  // with the two values we know.
2061  SSAUpdate.Initialize(I.getType(), I.getName());
2062  SSAUpdate.AddAvailableValue(BB, &I);
2063  SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2064 
2065  while (!UsesToRename.empty())
2066  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2067  LLVM_DEBUG(dbgs() << "\n");
2068  }
2069 }
2070 
2071 /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2072 /// arguments that come from PredBB. Return the map from the variables in the
2073 /// source basic block to the variables in the newly created basic block.
2076  BasicBlock::iterator BE, BasicBlock *NewBB,
2077  BasicBlock *PredBB) {
2078  // We are going to have to map operands from the source basic block to the new
2079  // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2080  // block, evaluate them to account for entry from PredBB.
2081  DenseMap<Instruction *, Value *> ValueMapping;
2082 
2083  // Clone the phi nodes of the source basic block into NewBB. The resulting
2084  // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2085  // might need to rewrite the operand of the cloned phi.
2086  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2087  PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2088  NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2089  ValueMapping[PN] = NewPN;
2090  }
2091 
2092  // Clone noalias scope declarations in the threaded block. When threading a
2093  // loop exit, we would otherwise end up with two idential scope declarations
2094  // visible at the same time.
2095  SmallVector<MDNode *> NoAliasScopes;
2096  DenseMap<MDNode *, MDNode *> ClonedScopes;
2097  LLVMContext &Context = PredBB->getContext();
2098  identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2099  cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2100 
2101  // Clone the non-phi instructions of the source basic block into NewBB,
2102  // keeping track of the mapping and using it to remap operands in the cloned
2103  // instructions.
2104  for (; BI != BE; ++BI) {
2105  Instruction *New = BI->clone();
2106  New->setName(BI->getName());
2107  NewBB->getInstList().push_back(New);
2108  ValueMapping[&*BI] = New;
2109  adaptNoAliasScopes(New, ClonedScopes, Context);
2110 
2111  // Remap operands to patch up intra-block references.
2112  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2113  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2114  DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
2115  if (I != ValueMapping.end())
2116  New->setOperand(i, I->second);
2117  }
2118  }
2119 
2120  return ValueMapping;
2121 }
2122 
2123 /// Attempt to thread through two successive basic blocks.
2125  Value *Cond) {
2126  // Consider:
2127  //
2128  // PredBB:
2129  // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2130  // %tobool = icmp eq i32 %cond, 0
2131  // br i1 %tobool, label %BB, label ...
2132  //
2133  // BB:
2134  // %cmp = icmp eq i32* %var, null
2135  // br i1 %cmp, label ..., label ...
2136  //
2137  // We don't know the value of %var at BB even if we know which incoming edge
2138  // we take to BB. However, once we duplicate PredBB for each of its incoming
2139  // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2140  // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2141 
2142  // Require that BB end with a Branch for simplicity.
2143  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2144  if (!CondBr)
2145  return false;
2146 
2147  // BB must have exactly one predecessor.
2148  BasicBlock *PredBB = BB->getSinglePredecessor();
2149  if (!PredBB)
2150  return false;
2151 
2152  // Require that PredBB end with a conditional Branch. If PredBB ends with an
2153  // unconditional branch, we should be merging PredBB and BB instead. For
2154  // simplicity, we don't deal with a switch.
2155  BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2156  if (!PredBBBranch || PredBBBranch->isUnconditional())
2157  return false;
2158 
2159  // If PredBB has exactly one incoming edge, we don't gain anything by copying
2160  // PredBB.
2161  if (PredBB->getSinglePredecessor())
2162  return false;
2163 
2164  // Don't thread through PredBB if it contains a successor edge to itself, in
2165  // which case we would infinite loop. Suppose we are threading an edge from
2166  // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2167  // successor edge to itself. If we allowed jump threading in this case, we
2168  // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2169  // PredBB.thread has a successor edge to PredBB, we would immediately come up
2170  // with another jump threading opportunity from PredBB.thread through PredBB
2171  // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2172  // would keep peeling one iteration from PredBB.
2173  if (llvm::is_contained(successors(PredBB), PredBB))
2174  return false;
2175 
2176  // Don't thread across a loop header.
2177  if (LoopHeaders.count(PredBB))
2178  return false;
2179 
2180  // Avoid complication with duplicating EH pads.
2181  if (PredBB->isEHPad())
2182  return false;
2183 
2184  // Find a predecessor that we can thread. For simplicity, we only consider a
2185  // successor edge out of BB to which we thread exactly one incoming edge into
2186  // PredBB.
2187  unsigned ZeroCount = 0;
2188  unsigned OneCount = 0;
2189  BasicBlock *ZeroPred = nullptr;
2190  BasicBlock *OnePred = nullptr;
2191  for (BasicBlock *P : predecessors(PredBB)) {
2192  if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2193  evaluateOnPredecessorEdge(BB, P, Cond))) {
2194  if (CI->isZero()) {
2195  ZeroCount++;
2196  ZeroPred = P;
2197  } else if (CI->isOne()) {
2198  OneCount++;
2199  OnePred = P;
2200  }
2201  }
2202  }
2203 
2204  // Disregard complicated cases where we have to thread multiple edges.
2205  BasicBlock *PredPredBB;
2206  if (ZeroCount == 1) {
2207  PredPredBB = ZeroPred;
2208  } else if (OneCount == 1) {
2209  PredPredBB = OnePred;
2210  } else {
2211  return false;
2212  }
2213 
2214  BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2215 
2216  // If threading to the same block as we come from, we would infinite loop.
2217  if (SuccBB == BB) {
2218  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2219  << "' - would thread to self!\n");
2220  return false;
2221  }
2222 
2223  // If threading this would thread across a loop header, don't thread the edge.
2224  // See the comments above findLoopHeaders for justifications and caveats.
2225  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2226  LLVM_DEBUG({
2227  bool BBIsHeader = LoopHeaders.count(BB);
2228  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2229  dbgs() << " Not threading across "
2230  << (BBIsHeader ? "loop header BB '" : "block BB '")
2231  << BB->getName() << "' to dest "
2232  << (SuccIsHeader ? "loop header BB '" : "block BB '")
2233  << SuccBB->getName()
2234  << "' - it might create an irreducible loop!\n";
2235  });
2236  return false;
2237  }
2238 
2239  // Compute the cost of duplicating BB and PredBB.
2240  unsigned BBCost =
2241  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2242  unsigned PredBBCost = getJumpThreadDuplicationCost(
2243  PredBB, PredBB->getTerminator(), BBDupThreshold);
2244 
2245  // Give up if costs are too high. We need to check BBCost and PredBBCost
2246  // individually before checking their sum because getJumpThreadDuplicationCost
2247  // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2248  if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2249  BBCost + PredBBCost > BBDupThreshold) {
2250  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2251  << "' - Cost is too high: " << PredBBCost
2252  << " for PredBB, " << BBCost << "for BB\n");
2253  return false;
2254  }
2255 
2256  // Now we are ready to duplicate PredBB.
2257  threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2258  return true;
2259 }
2260 
2262  BasicBlock *PredBB,
2263  BasicBlock *BB,
2264  BasicBlock *SuccBB) {
2265  LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2266  << BB->getName() << "'\n");
2267 
2268  BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2269  BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2270 
2271  BasicBlock *NewBB =
2272  BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2273  PredBB->getParent(), PredBB);
2274  NewBB->moveAfter(PredBB);
2275 
2276  // Set the block frequency of NewBB.
2277  if (HasProfileData) {
2278  auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2279  BPI->getEdgeProbability(PredPredBB, PredBB);
2280  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2281  }
2282 
2283  // We are going to have to map operands from the original BB block to the new
2284  // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2285  // to account for entry from PredPredBB.
2286  DenseMap<Instruction *, Value *> ValueMapping =
2287  cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2288 
2289  // Copy the edge probabilities from PredBB to NewBB.
2290  if (HasProfileData)
2291  BPI->copyEdgeProbabilities(PredBB, NewBB);
2292 
2293  // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2294  // This eliminates predecessors from PredPredBB, which requires us to simplify
2295  // any PHI nodes in PredBB.
2296  Instruction *PredPredTerm = PredPredBB->getTerminator();
2297  for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2298  if (PredPredTerm->getSuccessor(i) == PredBB) {
2299  PredBB->removePredecessor(PredPredBB, true);
2300  PredPredTerm->setSuccessor(i, NewBB);
2301  }
2302 
2303  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2304  ValueMapping);
2305  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2306  ValueMapping);
2307 
2308  DTU->applyUpdatesPermissive(
2309  {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2310  {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2311  {DominatorTree::Insert, PredPredBB, NewBB},
2312  {DominatorTree::Delete, PredPredBB, PredBB}});
2313 
2314  updateSSA(PredBB, NewBB, ValueMapping);
2315 
2316  // Clean up things like PHI nodes with single operands, dead instructions,
2317  // etc.
2318  SimplifyInstructionsInBlock(NewBB, TLI);
2319  SimplifyInstructionsInBlock(PredBB, TLI);
2320 
2321  SmallVector<BasicBlock *, 1> PredsToFactor;
2322  PredsToFactor.push_back(NewBB);
2323  threadEdge(BB, PredsToFactor, SuccBB);
2324 }
2325 
2326 /// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2328  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2329  BasicBlock *SuccBB) {
2330  // If threading to the same block as we come from, we would infinite loop.
2331  if (SuccBB == BB) {
2332  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2333  << "' - would thread to self!\n");
2334  return false;
2335  }
2336 
2337  // If threading this would thread across a loop header, don't thread the edge.
2338  // See the comments above findLoopHeaders for justifications and caveats.
2339  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2340  LLVM_DEBUG({
2341  bool BBIsHeader = LoopHeaders.count(BB);
2342  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2343  dbgs() << " Not threading across "
2344  << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2345  << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2346  << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2347  });
2348  return false;
2349  }
2350 
2351  unsigned JumpThreadCost =
2352  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2353  if (JumpThreadCost > BBDupThreshold) {
2354  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2355  << "' - Cost is too high: " << JumpThreadCost << "\n");
2356  return false;
2357  }
2358 
2359  threadEdge(BB, PredBBs, SuccBB);
2360  return true;
2361 }
2362 
2363 /// threadEdge - We have decided that it is safe and profitable to factor the
2364 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2365 /// across BB. Transform the IR to reflect this change.
2367  const SmallVectorImpl<BasicBlock *> &PredBBs,
2368  BasicBlock *SuccBB) {
2369  assert(SuccBB != BB && "Don't create an infinite loop");
2370 
2371  assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2372  "Don't thread across loop headers");
2373 
2374  // And finally, do it! Start by factoring the predecessors if needed.
2375  BasicBlock *PredBB;
2376  if (PredBBs.size() == 1)
2377  PredBB = PredBBs[0];
2378  else {
2379  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2380  << " common predecessors.\n");
2381  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2382  }
2383 
2384  // And finally, do it!
2385  LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2386  << "' to '" << SuccBB->getName()
2387  << ", across block:\n " << *BB << "\n");
2388 
2389  LVI->threadEdge(PredBB, BB, SuccBB);
2390 
2391  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
2392  BB->getName()+".thread",
2393  BB->getParent(), BB);
2394  NewBB->moveAfter(PredBB);
2395 
2396  // Set the block frequency of NewBB.
2397  if (HasProfileData) {
2398  auto NewBBFreq =
2399  BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2400  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2401  }
2402 
2403  // Copy all the instructions from BB to NewBB except the terminator.
2404  DenseMap<Instruction *, Value *> ValueMapping =
2405  cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
2406 
2407  // We didn't copy the terminator from BB over to NewBB, because there is now
2408  // an unconditional jump to SuccBB. Insert the unconditional jump.
2409  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2410  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2411 
2412  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2413  // PHI nodes for NewBB now.
2414  addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2415 
2416  // Update the terminator of PredBB to jump to NewBB instead of BB. This
2417  // eliminates predecessors from BB, which requires us to simplify any PHI
2418  // nodes in BB.
2419  Instruction *PredTerm = PredBB->getTerminator();
2420  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2421  if (PredTerm->getSuccessor(i) == BB) {
2422  BB->removePredecessor(PredBB, true);
2423  PredTerm->setSuccessor(i, NewBB);
2424  }
2425 
2426  // Enqueue required DT updates.
2427  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2428  {DominatorTree::Insert, PredBB, NewBB},
2429  {DominatorTree::Delete, PredBB, BB}});
2430 
2431  updateSSA(BB, NewBB, ValueMapping);
2432 
2433  // At this point, the IR is fully up to date and consistent. Do a quick scan
2434  // over the new instructions and zap any that are constants or dead. This
2435  // frequently happens because of phi translation.
2436  SimplifyInstructionsInBlock(NewBB, TLI);
2437 
2438  // Update the edge weight from BB to SuccBB, which should be less than before.
2439  updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
2440 
2441  // Threaded an edge!
2442  ++NumThreads;
2443 }
2444 
2445 /// Create a new basic block that will be the predecessor of BB and successor of
2446 /// all blocks in Preds. When profile data is available, update the frequency of
2447 /// this new block.
2448 BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2449  ArrayRef<BasicBlock *> Preds,
2450  const char *Suffix) {
2452 
2453  // Collect the frequencies of all predecessors of BB, which will be used to
2454  // update the edge weight of the result of splitting predecessors.
2456  if (HasProfileData)
2457  for (auto Pred : Preds)
2458  FreqMap.insert(std::make_pair(
2459  Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2460 
2461  // In the case when BB is a LandingPad block we create 2 new predecessors
2462  // instead of just one.
2463  if (BB->isLandingPad()) {
2464  std::string NewName = std::string(Suffix) + ".split-lp";
2465  SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2466  } else {
2467  NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2468  }
2469 
2470  std::vector<DominatorTree::UpdateType> Updates;
2471  Updates.reserve((2 * Preds.size()) + NewBBs.size());
2472  for (auto NewBB : NewBBs) {
2473  BlockFrequency NewBBFreq(0);
2474  Updates.push_back({DominatorTree::Insert, NewBB, BB});
2475  for (auto Pred : predecessors(NewBB)) {
2476  Updates.push_back({DominatorTree::Delete, Pred, BB});
2477  Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2478  if (HasProfileData) // Update frequencies between Pred -> NewBB.
2479  NewBBFreq += FreqMap.lookup(Pred);
2480  }
2481  if (HasProfileData) // Apply the summed frequency to NewBB.
2482  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2483  }
2484 
2485  DTU->applyUpdatesPermissive(Updates);
2486  return NewBBs[0];
2487 }
2488 
2489 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2490  const Instruction *TI = BB->getTerminator();
2491  assert(TI->getNumSuccessors() > 1 && "not a split");
2492 
2493  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
2494  if (!WeightsNode)
2495  return false;
2496 
2497  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
2498  if (MDName->getString() != "branch_weights")
2499  return false;
2500 
2501  // Ensure there are weights for all of the successors. Note that the first
2502  // operand to the metadata node is a name, not a weight.
2503  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2504 }
2505 
2506 /// Update the block frequency of BB and branch weight and the metadata on the
2507 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2508 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
2509 void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2510  BasicBlock *BB,
2511  BasicBlock *NewBB,
2512  BasicBlock *SuccBB) {
2513  if (!HasProfileData)
2514  return;
2515 
2516  assert(BFI && BPI && "BFI & BPI should have been created here");
2517 
2518  // As the edge from PredBB to BB is deleted, we have to update the block
2519  // frequency of BB.
2520  auto BBOrigFreq = BFI->getBlockFreq(BB);
2521  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2522  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2523  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2524  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2525 
2526  // Collect updated outgoing edges' frequencies from BB and use them to update
2527  // edge probabilities.
2528  SmallVector<uint64_t, 4> BBSuccFreq;
2529  for (BasicBlock *Succ : successors(BB)) {
2530  auto SuccFreq = (Succ == SuccBB)
2531  ? BB2SuccBBFreq - NewBBFreq
2532  : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2533  BBSuccFreq.push_back(SuccFreq.getFrequency());
2534  }
2535 
2536  uint64_t MaxBBSuccFreq =
2537  *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2538 
2540  if (MaxBBSuccFreq == 0)
2541  BBSuccProbs.assign(BBSuccFreq.size(),
2542  {1, static_cast<unsigned>(BBSuccFreq.size())});
2543  else {
2544  for (uint64_t Freq : BBSuccFreq)
2545  BBSuccProbs.push_back(
2546  BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2547  // Normalize edge probabilities so that they sum up to one.
2548  BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2549  BBSuccProbs.end());
2550  }
2551 
2552  // Update edge probabilities in BPI.
2553  BPI->setEdgeProbability(BB, BBSuccProbs);
2554 
2555  // Update the profile metadata as well.
2556  //
2557  // Don't do this if the profile of the transformed blocks was statically
2558  // estimated. (This could occur despite the function having an entry
2559  // frequency in completely cold parts of the CFG.)
2560  //
2561  // In this case we don't want to suggest to subsequent passes that the
2562  // calculated weights are fully consistent. Consider this graph:
2563  //
2564  // check_1
2565  // 50% / |
2566  // eq_1 | 50%
2567  // \ |
2568  // check_2
2569  // 50% / |
2570  // eq_2 | 50%
2571  // \ |
2572  // check_3
2573  // 50% / |
2574  // eq_3 | 50%
2575  // \ |
2576  //
2577  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2578  // the overall probabilities are inconsistent; the total probability that the
2579  // value is either 1, 2 or 3 is 150%.
2580  //
2581  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2582  // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2583  // the loop exit edge. Then based solely on static estimation we would assume
2584  // the loop was extremely hot.
2585  //
2586  // FIXME this locally as well so that BPI and BFI are consistent as well. We
2587  // shouldn't make edges extremely likely or unlikely based solely on static
2588  // estimation.
2589  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
2590  SmallVector<uint32_t, 4> Weights;
2591  for (auto Prob : BBSuccProbs)
2592  Weights.push_back(Prob.getNumerator());
2593 
2594  auto TI = BB->getTerminator();
2595  TI->setMetadata(
2596  LLVMContext::MD_prof,
2597  MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2598  }
2599 }
2600 
2601 /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2602 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2603 /// If we can duplicate the contents of BB up into PredBB do so now, this
2604 /// improves the odds that the branch will be on an analyzable instruction like
2605 /// a compare.
2607  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2608  assert(!PredBBs.empty() && "Can't handle an empty set");
2609 
2610  // If BB is a loop header, then duplicating this block outside the loop would
2611  // cause us to transform this into an irreducible loop, don't do this.
2612  // See the comments above findLoopHeaders for justifications and caveats.
2613  if (LoopHeaders.count(BB)) {
2614  LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2615  << "' into predecessor block '" << PredBBs[0]->getName()
2616  << "' - it might create an irreducible loop!\n");
2617  return false;
2618  }
2619 
2620  unsigned DuplicationCost =
2621  getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2622  if (DuplicationCost > BBDupThreshold) {
2623  LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2624  << "' - Cost is too high: " << DuplicationCost << "\n");
2625  return false;
2626  }
2627 
2628  // And finally, do it! Start by factoring the predecessors if needed.
2629  std::vector<DominatorTree::UpdateType> Updates;
2630  BasicBlock *PredBB;
2631  if (PredBBs.size() == 1)
2632  PredBB = PredBBs[0];
2633  else {
2634  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2635  << " common predecessors.\n");
2636  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2637  }
2638  Updates.push_back({DominatorTree::Delete, PredBB, BB});
2639 
2640  // Okay, we decided to do this! Clone all the instructions in BB onto the end
2641  // of PredBB.
2642  LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2643  << "' into end of '" << PredBB->getName()
2644  << "' to eliminate branch on phi. Cost: "
2645  << DuplicationCost << " block is:" << *BB << "\n");
2646 
2647  // Unless PredBB ends with an unconditional branch, split the edge so that we
2648  // can just clone the bits from BB into the end of the new PredBB.
2649  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2650 
2651  if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2652  BasicBlock *OldPredBB = PredBB;
2653  PredBB = SplitEdge(OldPredBB, BB);
2654  Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2655  Updates.push_back({DominatorTree::Insert, PredBB, BB});
2656  Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2657  OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2658  }
2659 
2660  // We are going to have to map operands from the original BB block into the
2661  // PredBB block. Evaluate PHI nodes in BB.
2662  DenseMap<Instruction*, Value*> ValueMapping;
2663 
2664  BasicBlock::iterator BI = BB->begin();
2665  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2666  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2667  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2668  // mapping and using it to remap operands in the cloned instructions.
2669  for (; BI != BB->end(); ++BI) {
2670  Instruction *New = BI->clone();
2671 
2672  // Remap operands to patch up intra-block references.
2673  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2674  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2675  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2676  if (I != ValueMapping.end())
2677  New->setOperand(i, I->second);
2678  }
2679 
2680  // If this instruction can be simplified after the operands are updated,
2681  // just use the simplified value instead. This frequently happens due to
2682  // phi translation.
2683  if (Value *IV = SimplifyInstruction(
2684  New,
2685  {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2686  ValueMapping[&*BI] = IV;
2687  if (!New->mayHaveSideEffects()) {
2688  New->deleteValue();
2689  New = nullptr;
2690  }
2691  } else {
2692  ValueMapping[&*BI] = New;
2693  }
2694  if (New) {
2695  // Otherwise, insert the new instruction into the block.
2696  New->setName(BI->getName());
2697  PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2698  // Update Dominance from simplified New instruction operands.
2699  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2700  if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2701  Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2702  }
2703  }
2704 
2705  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2706  // add entries to the PHI nodes for branch from PredBB now.
2707  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2708  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2709  ValueMapping);
2710  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2711  ValueMapping);
2712 
2713  updateSSA(BB, PredBB, ValueMapping);
2714 
2715  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2716  // that we nuked.
2717  BB->removePredecessor(PredBB, true);
2718 
2719  // Remove the unconditional branch at the end of the PredBB block.
2720  OldPredBranch->eraseFromParent();
2721  if (HasProfileData)
2722  BPI->copyEdgeProbabilities(BB, PredBB);
2723  DTU->applyUpdatesPermissive(Updates);
2724 
2725  ++NumDupes;
2726  return true;
2727 }
2728 
2729 // Pred is a predecessor of BB with an unconditional branch to BB. SI is
2730 // a Select instruction in Pred. BB has other predecessors and SI is used in
2731 // a PHI node in BB. SI has no other use.
2732 // A new basic block, NewBB, is created and SI is converted to compare and
2733 // conditional branch. SI is erased from parent.
2735  SelectInst *SI, PHINode *SIUse,
2736  unsigned Idx) {
2737  // Expand the select.
2738  //
2739  // Pred --
2740  // | v
2741  // | NewBB
2742  // | |
2743  // |-----
2744  // v
2745  // BB
2746  BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2747  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2748  BB->getParent(), BB);
2749  // Move the unconditional branch to NewBB.
2750  PredTerm->removeFromParent();
2751  NewBB->getInstList().insert(NewBB->end(), PredTerm);
2752  // Create a conditional branch and update PHI nodes.
2753  auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2754  BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2755  SIUse->setIncomingValue(Idx, SI->getFalseValue());
2756  SIUse->addIncoming(SI->getTrueValue(), NewBB);
2757 
2758  // The select is now dead.
2759  SI->eraseFromParent();
2760  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2761  {DominatorTree::Insert, Pred, NewBB}});
2762 
2763  // Update any other PHI nodes in BB.
2764  for (BasicBlock::iterator BI = BB->begin();
2765  PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2766  if (Phi != SIUse)
2767  Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2768 }
2769 
2771  PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2772 
2773  if (!CondPHI || CondPHI->getParent() != BB)
2774  return false;
2775 
2776  for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2777  BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2778  SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2779 
2780  // The second and third condition can be potentially relaxed. Currently
2781  // the conditions help to simplify the code and allow us to reuse existing
2782  // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2783  if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2784  continue;
2785 
2786  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2787  if (!PredTerm || !PredTerm->isUnconditional())
2788  continue;
2789 
2790  unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2791  return true;
2792  }
2793  return false;
2794 }
2795 
2796 /// tryToUnfoldSelect - Look for blocks of the form
2797 /// bb1:
2798 /// %a = select
2799 /// br bb2
2800 ///
2801 /// bb2:
2802 /// %p = phi [%a, %bb1] ...
2803 /// %c = icmp %p
2804 /// br i1 %c
2805 ///
2806 /// And expand the select into a branch structure if one of its arms allows %c
2807 /// to be folded. This later enables threading from bb1 over bb2.
2809  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2810  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2811  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2812 
2813  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2814  CondLHS->getParent() != BB)
2815  return false;
2816 
2817  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2818  BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2819  SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2820 
2821  // Look if one of the incoming values is a select in the corresponding
2822  // predecessor.
2823  if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2824  continue;
2825 
2826  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2827  if (!PredTerm || !PredTerm->isUnconditional())
2828  continue;
2829 
2830  // Now check if one of the select values would allow us to constant fold the
2831  // terminator in BB. We don't do the transform if both sides fold, those
2832  // cases will be threaded in any case.
2833  LazyValueInfo::Tristate LHSFolds =
2834  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2835  CondRHS, Pred, BB, CondCmp);
2836  LazyValueInfo::Tristate RHSFolds =
2837  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2838  CondRHS, Pred, BB, CondCmp);
2839  if ((LHSFolds != LazyValueInfo::Unknown ||
2840  RHSFolds != LazyValueInfo::Unknown) &&
2841  LHSFolds != RHSFolds) {
2842  unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2843  return true;
2844  }
2845  }
2846  return false;
2847 }
2848 
2849 /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2850 /// same BB in the form
2851 /// bb:
2852 /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2853 /// %s = select %p, trueval, falseval
2854 ///
2855 /// or
2856 ///
2857 /// bb:
2858 /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2859 /// %c = cmp %p, 0
2860 /// %s = select %c, trueval, falseval
2861 ///
2862 /// And expand the select into a branch structure. This later enables
2863 /// jump-threading over bb in this pass.
2864 ///
2865 /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2866 /// select if the associated PHI has at least one constant. If the unfolded
2867 /// select is not jump-threaded, it will be folded again in the later
2868 /// optimizations.
2870  // This transform would reduce the quality of msan diagnostics.
2871  // Disable this transform under MemorySanitizer.
2872  if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2873  return false;
2874 
2875  // If threading this would thread across a loop header, don't thread the edge.
2876  // See the comments above findLoopHeaders for justifications and caveats.
2877  if (LoopHeaders.count(BB))
2878  return false;
2879 
2880  for (BasicBlock::iterator BI = BB->begin();
2881  PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2882  // Look for a Phi having at least one constant incoming value.
2883  if (llvm::all_of(PN->incoming_values(),
2884  [](Value *V) { return !isa<ConstantInt>(V); }))
2885  continue;
2886 
2887  auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2888  using namespace PatternMatch;
2889 
2890  // Check if SI is in BB and use V as condition.
2891  if (SI->getParent() != BB)
2892  return false;
2893  Value *Cond = SI->getCondition();
2894  bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2895  return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2896  };
2897 
2898  SelectInst *SI = nullptr;
2899  for (Use &U : PN->uses()) {
2900  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2901  // Look for a ICmp in BB that compares PN with a constant and is the
2902  // condition of a Select.
2903  if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2904  isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2905  if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2906  if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2907  SI = SelectI;
2908  break;
2909  }
2910  } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2911  // Look for a Select in BB that uses PN as condition.
2912  if (isUnfoldCandidate(SelectI, U.get())) {
2913  SI = SelectI;
2914  break;
2915  }
2916  }
2917  }
2918 
2919  if (!SI)
2920  continue;
2921  // Expand the select.
2922  Value *Cond = SI->getCondition();
2923  if (InsertFreezeWhenUnfoldingSelect &&
2925  &DTU->getDomTree()))
2926  Cond = new FreezeInst(Cond, "cond.fr", SI);
2928  BasicBlock *SplitBB = SI->getParent();
2929  BasicBlock *NewBB = Term->getParent();
2930  PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2931  NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2932  NewPN->addIncoming(SI->getFalseValue(), BB);
2933  SI->replaceAllUsesWith(NewPN);
2934  SI->eraseFromParent();
2935  // NewBB and SplitBB are newly created blocks which require insertion.
2936  std::vector<DominatorTree::UpdateType> Updates;
2937  Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2938  Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2939  Updates.push_back({DominatorTree::Insert, BB, NewBB});
2940  Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2941  // BB's successors were moved to SplitBB, update DTU accordingly.
2942  for (auto *Succ : successors(SplitBB)) {
2943  Updates.push_back({DominatorTree::Delete, BB, Succ});
2944  Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2945  }
2946  DTU->applyUpdatesPermissive(Updates);
2947  return true;
2948  }
2949  return false;
2950 }
2951 
2952 /// Try to propagate a guard from the current BB into one of its predecessors
2953 /// in case if another branch of execution implies that the condition of this
2954 /// guard is always true. Currently we only process the simplest case that
2955 /// looks like:
2956 ///
2957 /// Start:
2958 /// %cond = ...
2959 /// br i1 %cond, label %T1, label %F1
2960 /// T1:
2961 /// br label %Merge
2962 /// F1:
2963 /// br label %Merge
2964 /// Merge:
2965 /// %condGuard = ...
2966 /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2967 ///
2968 /// And cond either implies condGuard or !condGuard. In this case all the
2969 /// instructions before the guard can be duplicated in both branches, and the
2970 /// guard is then threaded to one of them.
2972  using namespace PatternMatch;
2973 
2974  // We only want to deal with two predecessors.
2975  BasicBlock *Pred1, *Pred2;
2976  auto PI = pred_begin(BB), PE = pred_end(BB);
2977  if (PI == PE)
2978  return false;
2979  Pred1 = *PI++;
2980  if (PI == PE)
2981  return false;
2982  Pred2 = *PI++;
2983  if (PI != PE)
2984  return false;
2985  if (Pred1 == Pred2)
2986  return false;
2987 
2988  // Try to thread one of the guards of the block.
2989  // TODO: Look up deeper than to immediate predecessor?
2990  auto *Parent = Pred1->getSinglePredecessor();
2991  if (!Parent || Parent != Pred2->getSinglePredecessor())
2992  return false;
2993 
2994  if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
2995  for (auto &I : *BB)
2996  if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
2997  return true;
2998 
2999  return false;
3000 }
3001 
3002 /// Try to propagate the guard from BB which is the lower block of a diamond
3003 /// to one of its branches, in case if diamond's condition implies guard's
3004 /// condition.
3006  BranchInst *BI) {
3007  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3008  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3009  Value *GuardCond = Guard->getArgOperand(0);
3010  Value *BranchCond = BI->getCondition();
3011  BasicBlock *TrueDest = BI->getSuccessor(0);
3012  BasicBlock *FalseDest = BI->getSuccessor(1);
3013 
3014  auto &DL = BB->getModule()->getDataLayout();
3015  bool TrueDestIsSafe = false;
3016  bool FalseDestIsSafe = false;
3017 
3018  // True dest is safe if BranchCond => GuardCond.
3019  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3020  if (Impl && *Impl)
3021  TrueDestIsSafe = true;
3022  else {
3023  // False dest is safe if !BranchCond => GuardCond.
3024  Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3025  if (Impl && *Impl)
3026  FalseDestIsSafe = true;
3027  }
3028 
3029  if (!TrueDestIsSafe && !FalseDestIsSafe)
3030  return false;
3031 
3032  BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3033  BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3034 
3035  ValueToValueMapTy UnguardedMapping, GuardedMapping;
3036  Instruction *AfterGuard = Guard->getNextNode();
3037  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
3038  if (Cost > BBDupThreshold)
3039  return false;
3040  // Duplicate all instructions before the guard and the guard itself to the
3041  // branch where implication is not proved.
3043  BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3044  assert(GuardedBlock && "Could not create the guarded block?");
3045  // Duplicate all instructions before the guard in the unguarded branch.
3046  // Since we have successfully duplicated the guarded block and this block
3047  // has fewer instructions, we expect it to succeed.
3049  BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3050  assert(UnguardedBlock && "Could not create the unguarded block?");
3051  LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3052  << GuardedBlock->getName() << "\n");
3053  // Some instructions before the guard may still have uses. For them, we need
3054  // to create Phi nodes merging their copies in both guarded and unguarded
3055  // branches. Those instructions that have no uses can be just removed.
3057  for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3058  if (!isa<PHINode>(&*BI))
3059  ToRemove.push_back(&*BI);
3060 
3061  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
3062  assert(InsertionPoint && "Empty block?");
3063  // Substitute with Phis & remove.
3064  for (auto *Inst : reverse(ToRemove)) {
3065  if (!Inst->use_empty()) {
3066  PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3067  NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3068  NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3069  NewPN->insertBefore(InsertionPoint);
3070  Inst->replaceAllUsesWith(NewPN);
3071  }
3072  Inst->eraseFromParent();
3073  }
3074  return true;
3075 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
llvm::SSAUpdater::Initialize
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::array_pod_sort
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1404
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2261
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
Definition: AllocatorList.h:23
getBestDestForJumpOnUndef
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
Definition: JumpThreading.cpp:1002
llvm::SplitLandingPadPredecessors
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition: BasicBlockUtils.cpp:1274
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::ConstantExpr::getNot
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2658
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
intptr_t
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
Metadata.h
replaceFoldableUses
static void replaceFoldableUses(Instruction *Cond, Value *ToVal)
Definition: JumpThreading.cpp:494
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::JumpThreadingPass::unfoldSelectInstr
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
Definition: JumpThreading.cpp:2734
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4523
llvm::FindFunctionBackedges
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:171
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
Scalar.h
llvm::JumpThreadingPass::findLoopHeaders
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
Definition: JumpThreading.cpp:611
Loads.h
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1615
llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition: BranchProbability.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::isImpliedCondition
Optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
Definition: ValueTracking.cpp:6527
llvm::JumpThreadingPass::processBranchOnXOR
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
Definition: JumpThreading.cpp:1842
hasAddressTakenAndUsed
static bool hasAddressTakenAndUsed(BasicBlock *BB)
Definition: JumpThreading.cpp:1020
ImplicationSearchThreshold
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5174
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::LazyValueAnalysis
Analysis to compute lazy value information.
Definition: LazyValueInfo.h:131
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::jumpthreading::WantBlockAddress
@ WantBlockAddress
Definition: JumpThreading.h:60
llvm::replaceNonLocalUsesWith
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:2749
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:959
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:166
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:540
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:802
MapVector.h
DomTreeUpdater.h
llvm::JumpThreadingPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, std::unique_ptr< BlockFrequencyInfo > BFI, std::unique_ptr< BranchProbabilityInfo > BPI)
Definition: JumpThreading.cpp:380
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1674
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
isOpDefinedInBlock
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
Definition: JumpThreading.cpp:1300
llvm::jumpthreading::ConstantPreference
ConstantPreference
Definition: JumpThreading.h:60
llvm::JumpThreadingPass::maybethreadThroughTwoBasicBlocks
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
Definition: JumpThreading.cpp:2124
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
llvm::DenseMapIterator
Definition: DenseMap.h:56
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::JumpThreadingPass::JumpThreadingPass
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect=false, int T=-1)
Definition: JumpThreading.cpp:182
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::JumpThreadingPass::evaluateOnPredecessorEdge
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond)
Definition: JumpThreading.cpp:1588
JumpThreading.h
llvm::Optional< bool >
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:52
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3096
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::SimplifyCmpInst
Value * SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5241
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
llvm::JumpThreadingPass::processImpliedCondition
bool processImpliedCondition(BasicBlock *BB)
Definition: JumpThreading.cpp:1257
LazyValueInfo.h
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:222
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2669
ConstantFolding.h
llvm::JumpThreadingPass::tryThreadEdge
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
Definition: JumpThreading.cpp:2327
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::Instruction::isExceptionalTerminator
bool isExceptionalTerminator() const
Definition: Instruction.h:170
Use.h
llvm::combineMetadataForCSE
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2633
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
llvm::JumpThreadingPass::duplicateCondBranchOnPHIIntoPred
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
Definition: JumpThreading.cpp:2606
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
getKnownConstant
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
Definition: JumpThreading.cpp:624
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
AliasAnalysis.h
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Instruction.h
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::JumpThreadingPass::threadEdge
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
Definition: JumpThreading.cpp:2366
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:736
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::MapVector::begin
iterator begin()
Definition: MapVector.h:69
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:677
BBDuplicateThreshold
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::LazyValueInfo::Unknown
@ Unknown
Definition: LazyValueInfo.h:61
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::Instruction::getAAMetadata
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
Definition: TypeBasedAliasAnalysis.cpp:524
llvm::initializeJumpThreadingPass
void initializeJumpThreadingPass(PassRegistry &)
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::JumpThreadingPass::releaseMemory
void releaseMemory()
Definition: JumpThreading.h:109
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:236
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::jumpthreading::WantInteger
@ WantInteger
Definition: JumpThreading.h:60
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:177
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:389
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::LazyValueInfo::True
@ True
Definition: LazyValueInfo.h:61
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2755
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:101
llvm::Instruction
Definition: Instruction.h:45
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:5852
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
Threading
jump Jump Threading
Definition: JumpThreading.cpp:175
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
threading
jump threading
Definition: JumpThreading.cpp:174
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:748
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1154
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::ConstantRange::add
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
Definition: ConstantRange.cpp:908
PatternMatch.h
llvm::JumpThreadingPass::tryToUnfoldSelect
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
Definition: JumpThreading.cpp:2808
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
llvm::Instruction::extractProfMetadata
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1390
llvm::JumpThreadingPass::tryToUnfoldSelectInCurrBB
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
Definition: JumpThreading.cpp:2869
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:357
Type.h
BranchProbability.h
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:277
CFG.h
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3086
llvm::findAvailablePtrLoadStore
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:521
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:712
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::DenseSet< Value * >
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::DuplicateInstructionsInSplitBetween
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
Definition: CloneFunction.cpp:884
llvm::TryToSimplifyUncondBranchFromEmptyBlock
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1021
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2372
llvm::ConstantRange::inverse
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
Definition: ConstantRange.cpp:1504
llvm::BlockFrequency
Definition: BlockFrequency.h:24
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
BranchProbabilityInfo.h
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1178
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2317
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2457
llvm::LazyValueInfoWrapperPass
Wrapper around LazyValueInfo.
Definition: LazyValueInfo.h:142
llvm::JumpThreadingPass::simplifyPartiallyRedundantLoad
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
Definition: JumpThreading.cpp:1311
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
findMostPopularDest
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
Definition: JumpThreading.cpp:1553
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3061
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:2241
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1347
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::adaptNoAliasScopes
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
Definition: CloneFunction.cpp:954
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1570
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1962
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1715
llvm::cloneNoAliasScopes
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
Definition: CloneFunction.cpp:929
llvm::LazyValueInfo
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:31
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3083
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::JumpThreadingPass::processGuards
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
Definition: JumpThreading.cpp:2971
llvm::DominatorTreeBase::reset
void reset()
Definition: GenericDomTree.h:806
CFG.h
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:847
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:389
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:738
llvm::BinaryOperator
Definition: InstrTypes.h:190
getJumpThreadDuplicationCost
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
Definition: JumpThreading.cpp:519
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::BasicBlock::moveAfter
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:138
DataLayout.h
llvm::JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
Definition: JumpThreading.cpp:1978
llvm::JumpThreadingPass::threadGuard
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
Definition: JumpThreading.cpp:3005
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1025
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:760
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::pred_size
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:124
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
llvm::JumpThreadingPass::threadThroughTwoBasicBlocks
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
Definition: JumpThreading.cpp:2261
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:526
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:759
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:937
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
ConstantRange.h
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:432
updatePredecessorProfileMetadata
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
Definition: JumpThreading.cpp:222
SSAUpdater.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:419
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:231
llvm::PredIterator
Definition: CFG.h:43
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::ValueMap
See the file comment.
Definition: ValueMap.h:85
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:636
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::MapVector::end
iterator end()
Definition: MapVector.h:71
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
addPHINodeEntriesForMappedBlock
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value * > &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
Definition: JumpThreading.cpp:1957
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:583
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:501
llvm::ConstantFoldTerminator
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
PrintLVIAfterJumpThreading
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
llvm::JumpThreadingPass::processThreadableEdges
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
Definition: JumpThreading.cpp:1628
llvm::CastInst::CreateBitOrPointerCast
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Definition: Instructions.cpp:3100
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5205
Casting.h
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1364
Function.h
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:431
JumpThreadingFreezeSelectCond
static cl::opt< bool > JumpThreadingFreezeSelectCond("jump-threading-freeze-select-cond", cl::desc("Freeze the condition when unfolding select"), cl::init(false), cl::Hidden)
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:394
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_END(JumpThreading
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3551
llvm::BranchProbability::getCompl
BranchProbability getCompl() const
Definition: BranchProbability.h:69
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
GuardUtils.h
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::JumpThreadingPass::updateSSA
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap< Instruction *, Value * > &ValueMapping)
Update the SSA form.
Definition: JumpThreading.cpp:2029
llvm::pred_begin
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
Instructions.h
llvm::JumpThreadingPass::computeValueKnownInPredecessorsImpl
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet< Value * > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
Definition: JumpThreading.cpp:644
llvm::BranchProbability::normalizeProbabilities
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Definition: BranchProbability.h:205
SmallVector.h
llvm::LazyValueInfo::Tristate
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:60
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
User.h
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:929
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5287
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::SSAUpdater::RewriteUse
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:799
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1808
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
TargetTransformInfo.h
llvm::RemoveRedundantDbgInstrs
bool RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp=false)
Try to remove redundant dbg.value instructions from given basic block.
Definition: BasicBlockUtils.cpp:438
llvm::PHINode
Definition: Instructions.h:2572
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::ConstantRange::makeExactICmpRegion
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
Definition: ConstantRange.cpp:138
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::DeleteDeadBlock
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Definition: BasicBlockUtils.cpp:89
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:43
ThreadAcrossLoopHeaders
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:198
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:260
llvm::MergeBasicBlockIntoOnlyPred
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:717
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3149
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:376
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1424
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:96
llvm::SSAUpdater::AddAvailableValue
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:477
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
llvm::ConstantFoldInstruction
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Definition: ConstantFolding.cpp:1165
raw_ostream.h
llvm::SSAUpdater
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
llvm::JumpThreadingPass::cloneInstructions
DenseMap< Instruction *, Value * > cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
Definition: JumpThreading.cpp:2075
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
BasicBlockUtils.h
llvm::JumpThreadingPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: JumpThreading.cpp:343
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
Value.h
InitializePasses.h
llvm::BasicBlock::isEHPad
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:465
llvm::JumpThreadingPass::processBlock
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
Definition: JumpThreading.cpp:1032
llvm::MCID::Terminator
@ Terminator
Definition: MCInstrDesc.h:156
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:421
llvm::JumpThreadingPass::processBranchOnPHI
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
Definition: JumpThreading.cpp:1810
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3084
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2446
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3098
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::createJumpThreadingPass
FunctionPass * createJumpThreadingPass(bool FreezeSelectCond=false, int Threshold=-1)
Definition: JumpThreading.cpp:178
llvm::DefMaxInstsToScan
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38