LLVM  16.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Jump Threading pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/CFG.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Intrinsics.h"
51 #include "llvm/IR/LLVMContext.h"
52 #include "llvm/IR/MDBuilder.h"
53 #include "llvm/IR/Metadata.h"
54 #include "llvm/IR/Module.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/PatternMatch.h"
57 #include "llvm/IR/ProfDataUtils.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/Use.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
65 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <iterator>
79 #include <memory>
80 #include <utility>
81 
82 using namespace llvm;
83 using namespace jumpthreading;
84 
85 #define DEBUG_TYPE "jump-threading"
86 
87 STATISTIC(NumThreads, "Number of jumps threaded");
88 STATISTIC(NumFolds, "Number of terminators folded");
89 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
90 
91 static cl::opt<unsigned>
92 BBDuplicateThreshold("jump-threading-threshold",
93  cl::desc("Max block size to duplicate for jump threading"),
94  cl::init(6), cl::Hidden);
95 
96 static cl::opt<unsigned>
98  "jump-threading-implication-search-threshold",
99  cl::desc("The number of predecessors to search for a stronger "
100  "condition to use to thread over a weaker condition"),
101  cl::init(3), cl::Hidden);
102 
104  "jump-threading-phi-threshold",
105  cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
106  cl::Hidden);
107 
109  "print-lvi-after-jump-threading",
110  cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
111  cl::Hidden);
112 
114  "jump-threading-across-loop-headers",
115  cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
116  cl::init(false), cl::Hidden);
117 
118 
119 namespace {
120 
121  /// This pass performs 'jump threading', which looks at blocks that have
122  /// multiple predecessors and multiple successors. If one or more of the
123  /// predecessors of the block can be proven to always jump to one of the
124  /// successors, we forward the edge from the predecessor to the successor by
125  /// duplicating the contents of this block.
126  ///
127  /// An example of when this can occur is code like this:
128  ///
129  /// if () { ...
130  /// X = 4;
131  /// }
132  /// if (X < 3) {
133  ///
134  /// In this case, the unconditional branch at the end of the first if can be
135  /// revectored to the false side of the second if.
136  class JumpThreading : public FunctionPass {
137  JumpThreadingPass Impl;
138 
139  public:
140  static char ID; // Pass identification
141 
142  JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
144  }
145 
146  bool runOnFunction(Function &F) override;
147 
148  void getAnalysisUsage(AnalysisUsage &AU) const override {
157  }
158 
159  void releaseMemory() override { Impl.releaseMemory(); }
160  };
161 
162 } // end anonymous namespace
163 
164 char JumpThreading::ID = 0;
165 
166 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
167  "Jump Threading", false, false)
174 
175 // Public interface to the Jump Threading pass
177  return new JumpThreading(Threshold);
178 }
179 
181  DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
182 }
183 
184 // Update branch probability information according to conditional
185 // branch probability. This is usually made possible for cloned branches
186 // in inline instances by the context specific profile in the caller.
187 // For instance,
188 //
189 // [Block PredBB]
190 // [Branch PredBr]
191 // if (t) {
192 // Block A;
193 // } else {
194 // Block B;
195 // }
196 //
197 // [Block BB]
198 // cond = PN([true, %A], [..., %B]); // PHI node
199 // [Branch CondBr]
200 // if (cond) {
201 // ... // P(cond == true) = 1%
202 // }
203 //
204 // Here we know that when block A is taken, cond must be true, which means
205 // P(cond == true | A) = 1
206 //
207 // Given that P(cond == true) = P(cond == true | A) * P(A) +
208 // P(cond == true | B) * P(B)
209 // we get:
210 // P(cond == true ) = P(A) + P(cond == true | B) * P(B)
211 //
212 // which gives us:
213 // P(A) is less than P(cond == true), i.e.
214 // P(t == true) <= P(cond == true)
215 //
216 // In other words, if we know P(cond == true) is unlikely, we know
217 // that P(t == true) is also unlikely.
218 //
220  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
221  if (!CondBr)
222  return;
223 
224  uint64_t TrueWeight, FalseWeight;
225  if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
226  return;
227 
228  if (TrueWeight + FalseWeight == 0)
229  // Zero branch_weights do not give a hint for getting branch probabilities.
230  // Technically it would result in division by zero denominator, which is
231  // TrueWeight + FalseWeight.
232  return;
233 
234  // Returns the outgoing edge of the dominating predecessor block
235  // that leads to the PhiNode's incoming block:
236  auto GetPredOutEdge =
237  [](BasicBlock *IncomingBB,
238  BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
239  auto *PredBB = IncomingBB;
240  auto *SuccBB = PhiBB;
242  while (true) {
243  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
244  if (PredBr && PredBr->isConditional())
245  return {PredBB, SuccBB};
246  Visited.insert(PredBB);
247  auto *SinglePredBB = PredBB->getSinglePredecessor();
248  if (!SinglePredBB)
249  return {nullptr, nullptr};
250 
251  // Stop searching when SinglePredBB has been visited. It means we see
252  // an unreachable loop.
253  if (Visited.count(SinglePredBB))
254  return {nullptr, nullptr};
255 
256  SuccBB = PredBB;
257  PredBB = SinglePredBB;
258  }
259  };
260 
261  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
262  Value *PhiOpnd = PN->getIncomingValue(i);
263  ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
264 
265  if (!CI || !CI->getType()->isIntegerTy(1))
266  continue;
267 
268  BranchProbability BP =
270  TrueWeight, TrueWeight + FalseWeight)
272  FalseWeight, TrueWeight + FalseWeight));
273 
274  auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
275  if (!PredOutEdge.first)
276  return;
277 
278  BasicBlock *PredBB = PredOutEdge.first;
279  BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
280  if (!PredBr)
281  return;
282 
283  uint64_t PredTrueWeight, PredFalseWeight;
284  // FIXME: We currently only set the profile data when it is missing.
285  // With PGO, this can be used to refine even existing profile data with
286  // context information. This needs to be done after more performance
287  // testing.
288  if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
289  continue;
290 
291  // We can not infer anything useful when BP >= 50%, because BP is the
292  // upper bound probability value.
293  if (BP >= BranchProbability(50, 100))
294  continue;
295 
296  SmallVector<uint32_t, 2> Weights;
297  if (PredBr->getSuccessor(0) == PredOutEdge.second) {
298  Weights.push_back(BP.getNumerator());
299  Weights.push_back(BP.getCompl().getNumerator());
300  } else {
301  Weights.push_back(BP.getCompl().getNumerator());
302  Weights.push_back(BP.getNumerator());
303  }
304  PredBr->setMetadata(LLVMContext::MD_prof,
305  MDBuilder(PredBr->getParent()->getContext())
306  .createBranchWeights(Weights));
307  }
308 }
309 
310 /// runOnFunction - Toplevel algorithm.
312  if (skipFunction(F))
313  return false;
314  auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
315  // Jump Threading has no sense for the targets with divergent CF
316  if (TTI->hasBranchDivergence())
317  return false;
318  auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
319  auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
320  auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
321  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
323  std::unique_ptr<BlockFrequencyInfo> BFI;
324  std::unique_ptr<BranchProbabilityInfo> BPI;
325  if (F.hasProfileData()) {
326  LoopInfo LI{*DT};
327  BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
328  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
329  }
330 
331  bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
332  std::move(BFI), std::move(BPI));
334  dbgs() << "LVI for function '" << F.getName() << "':\n";
335  LVI->printLVI(F, DTU.getDomTree(), dbgs());
336  }
337  return Changed;
338 }
339 
342  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
343  // Jump Threading has no sense for the targets with divergent CF
344  if (TTI.hasBranchDivergence())
345  return PreservedAnalyses::all();
346  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
347  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
348  auto &LVI = AM.getResult<LazyValueAnalysis>(F);
349  auto &AA = AM.getResult<AAManager>(F);
351 
352  std::unique_ptr<BlockFrequencyInfo> BFI;
353  std::unique_ptr<BranchProbabilityInfo> BPI;
354  if (F.hasProfileData()) {
355  LoopInfo LI{DT};
356  BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
357  BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
358  }
359 
360  bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
361  std::move(BFI), std::move(BPI));
362 
364  dbgs() << "LVI for function '" << F.getName() << "':\n";
365  LVI.printLVI(F, DTU.getDomTree(), dbgs());
366  }
367 
368  if (!Changed)
369  return PreservedAnalyses::all();
373  return PA;
374 }
375 
377  TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
378  AliasAnalysis *AA_, DomTreeUpdater *DTU_,
379  bool HasProfileData_,
380  std::unique_ptr<BlockFrequencyInfo> BFI_,
381  std::unique_ptr<BranchProbabilityInfo> BPI_) {
382  LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
383  TLI = TLI_;
384  TTI = TTI_;
385  LVI = LVI_;
386  AA = AA_;
387  DTU = DTU_;
388  BFI.reset();
389  BPI.reset();
390  // When profile data is available, we need to update edge weights after
391  // successful jump threading, which requires both BPI and BFI being available.
392  HasProfileData = HasProfileData_;
393  auto *GuardDecl = F.getParent()->getFunction(
394  Intrinsic::getName(Intrinsic::experimental_guard));
395  HasGuards = GuardDecl && !GuardDecl->use_empty();
396  if (HasProfileData) {
397  BPI = std::move(BPI_);
398  BFI = std::move(BFI_);
399  }
400 
401  // Reduce the number of instructions duplicated when optimizing strictly for
402  // size.
403  if (BBDuplicateThreshold.getNumOccurrences())
404  BBDupThreshold = BBDuplicateThreshold;
405  else if (F.hasFnAttribute(Attribute::MinSize))
406  BBDupThreshold = 3;
407  else
408  BBDupThreshold = DefaultBBDupThreshold;
409 
410  // JumpThreading must not processes blocks unreachable from entry. It's a
411  // waste of compute time and can potentially lead to hangs.
412  SmallPtrSet<BasicBlock *, 16> Unreachable;
413  assert(DTU && "DTU isn't passed into JumpThreading before using it.");
414  assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
415  DominatorTree &DT = DTU->getDomTree();
416  for (auto &BB : F)
417  if (!DT.isReachableFromEntry(&BB))
418  Unreachable.insert(&BB);
419 
421  findLoopHeaders(F);
422 
423  bool EverChanged = false;
424  bool Changed;
425  do {
426  Changed = false;
427  for (auto &BB : F) {
428  if (Unreachable.count(&BB))
429  continue;
430  while (processBlock(&BB)) // Thread all of the branches we can over BB.
431  Changed = true;
432 
433  // Jump threading may have introduced redundant debug values into BB
434  // which should be removed.
435  if (Changed)
437 
438  // Stop processing BB if it's the entry or is now deleted. The following
439  // routines attempt to eliminate BB and locating a suitable replacement
440  // for the entry is non-trivial.
441  if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
442  continue;
443 
444  if (pred_empty(&BB)) {
445  // When processBlock makes BB unreachable it doesn't bother to fix up
446  // the instructions in it. We must remove BB to prevent invalid IR.
447  LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
448  << "' with terminator: " << *BB.getTerminator()
449  << '\n');
450  LoopHeaders.erase(&BB);
451  LVI->eraseBlock(&BB);
452  DeleteDeadBlock(&BB, DTU);
453  Changed = true;
454  continue;
455  }
456 
457  // processBlock doesn't thread BBs with unconditional TIs. However, if BB
458  // is "almost empty", we attempt to merge BB with its sole successor.
459  auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
460  if (BI && BI->isUnconditional()) {
461  BasicBlock *Succ = BI->getSuccessor(0);
462  if (
463  // The terminator must be the only non-phi instruction in BB.
464  BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
465  // Don't alter Loop headers and latches to ensure another pass can
466  // detect and transform nested loops later.
467  !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
470  // BB is valid for cleanup here because we passed in DTU. F remains
471  // BB's parent until a DTU->getDomTree() event.
472  LVI->eraseBlock(&BB);
473  Changed = true;
474  }
475  }
476  }
477  EverChanged |= Changed;
478  } while (Changed);
479 
480  LoopHeaders.clear();
481  return EverChanged;
482 }
483 
484 // Replace uses of Cond with ToVal when safe to do so. If all uses are
485 // replaced, we can remove Cond. We cannot blindly replace all uses of Cond
486 // because we may incorrectly replace uses when guards/assumes are uses of
487 // of `Cond` and we used the guards/assume to reason about the `Cond` value
488 // at the end of block. RAUW unconditionally replaces all uses
489 // including the guards/assumes themselves and the uses before the
490 // guard/assume.
492  BasicBlock *KnownAtEndOfBB) {
493  bool Changed = false;
494  assert(Cond->getType() == ToVal->getType());
495  // We can unconditionally replace all uses in non-local blocks (i.e. uses
496  // strictly dominated by BB), since LVI information is true from the
497  // terminator of BB.
498  if (Cond->getParent() == KnownAtEndOfBB)
499  Changed |= replaceNonLocalUsesWith(Cond, ToVal);
500  for (Instruction &I : reverse(*KnownAtEndOfBB)) {
501  // Reached the Cond whose uses we are trying to replace, so there are no
502  // more uses.
503  if (&I == Cond)
504  break;
505  // We only replace uses in instructions that are guaranteed to reach the end
506  // of BB, where we know Cond is ToVal.
508  break;
509  Changed |= I.replaceUsesOfWith(Cond, ToVal);
510  }
511  if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
512  Cond->eraseFromParent();
513  Changed = true;
514  }
515  return Changed;
516 }
517 
518 /// Return the cost of duplicating a piece of this block from first non-phi
519 /// and before StopAt instruction to thread across it. Stop scanning the block
520 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
522  BasicBlock *BB,
523  Instruction *StopAt,
524  unsigned Threshold) {
525  assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
526 
527  // Do not duplicate the BB if it has a lot of PHI nodes.
528  // If a threadable chain is too long then the number of PHI nodes can add up,
529  // leading to a substantial increase in compile time when rewriting the SSA.
530  unsigned PhiCount = 0;
531  Instruction *FirstNonPHI = nullptr;
532  for (Instruction &I : *BB) {
533  if (!isa<PHINode>(&I)) {
534  FirstNonPHI = &I;
535  break;
536  }
537  if (++PhiCount > PhiDuplicateThreshold)
538  return ~0U;
539  }
540 
541  /// Ignore PHI nodes, these will be flattened when duplication happens.
542  BasicBlock::const_iterator I(FirstNonPHI);
543 
544  // FIXME: THREADING will delete values that are just used to compute the
545  // branch, so they shouldn't count against the duplication cost.
546 
547  unsigned Bonus = 0;
548  if (BB->getTerminator() == StopAt) {
549  // Threading through a switch statement is particularly profitable. If this
550  // block ends in a switch, decrease its cost to make it more likely to
551  // happen.
552  if (isa<SwitchInst>(StopAt))
553  Bonus = 6;
554 
555  // The same holds for indirect branches, but slightly more so.
556  if (isa<IndirectBrInst>(StopAt))
557  Bonus = 8;
558  }
559 
560  // Bump the threshold up so the early exit from the loop doesn't skip the
561  // terminator-based Size adjustment at the end.
562  Threshold += Bonus;
563 
564  // Sum up the cost of each instruction until we get to the terminator. Don't
565  // include the terminator because the copy won't include it.
566  unsigned Size = 0;
567  for (; &*I != StopAt; ++I) {
568 
569  // Stop scanning the block if we've reached the threshold.
570  if (Size > Threshold)
571  return Size;
572 
573  // Bail out if this instruction gives back a token type, it is not possible
574  // to duplicate it if it is used outside this BB.
575  if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
576  return ~0U;
577 
578  // Blocks with NoDuplicate are modelled as having infinite cost, so they
579  // are never duplicated.
580  if (const CallInst *CI = dyn_cast<CallInst>(I))
581  if (CI->cannotDuplicate() || CI->isConvergent())
582  return ~0U;
583 
586  continue;
587 
588  // All other instructions count for at least one unit.
589  ++Size;
590 
591  // Calls are more expensive. If they are non-intrinsic calls, we model them
592  // as having cost of 4. If they are a non-vector intrinsic, we model them
593  // as having cost of 2 total, and if they are a vector intrinsic, we model
594  // them as having cost 1.
595  if (const CallInst *CI = dyn_cast<CallInst>(I)) {
596  if (!isa<IntrinsicInst>(CI))
597  Size += 3;
598  else if (!CI->getType()->isVectorTy())
599  Size += 1;
600  }
601  }
602 
603  return Size > Bonus ? Size - Bonus : 0;
604 }
605 
606 /// findLoopHeaders - We do not want jump threading to turn proper loop
607 /// structures into irreducible loops. Doing this breaks up the loop nesting
608 /// hierarchy and pessimizes later transformations. To prevent this from
609 /// happening, we first have to find the loop headers. Here we approximate this
610 /// by finding targets of backedges in the CFG.
611 ///
612 /// Note that there definitely are cases when we want to allow threading of
613 /// edges across a loop header. For example, threading a jump from outside the
614 /// loop (the preheader) to an exit block of the loop is definitely profitable.
615 /// It is also almost always profitable to thread backedges from within the loop
616 /// to exit blocks, and is often profitable to thread backedges to other blocks
617 /// within the loop (forming a nested loop). This simple analysis is not rich
618 /// enough to track all of these properties and keep it up-to-date as the CFG
619 /// mutates, so we don't allow any of these transformations.
622  FindFunctionBackedges(F, Edges);
623 
624  for (const auto &Edge : Edges)
625  LoopHeaders.insert(Edge.second);
626 }
627 
628 /// getKnownConstant - Helper method to determine if we can thread over a
629 /// terminator with the given value as its condition, and if so what value to
630 /// use for that. What kind of value this is depends on whether we want an
631 /// integer or a block address, but an undef is always accepted.
632 /// Returns null if Val is null or not an appropriate constant.
634  if (!Val)
635  return nullptr;
636 
637  // Undef is "known" enough.
638  if (UndefValue *U = dyn_cast<UndefValue>(Val))
639  return U;
640 
642  return dyn_cast<BlockAddress>(Val->stripPointerCasts());
643 
644  return dyn_cast<ConstantInt>(Val);
645 }
646 
647 /// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
648 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef
649 /// in any of our predecessors. If so, return the known list of value and pred
650 /// BB in the result vector.
651 ///
652 /// This returns true if there were any known values.
654  Value *V, BasicBlock *BB, PredValueInfo &Result,
656  Instruction *CxtI) {
657  // This method walks up use-def chains recursively. Because of this, we could
658  // get into an infinite loop going around loops in the use-def chain. To
659  // prevent this, keep track of what (value, block) pairs we've already visited
660  // and terminate the search if we loop back to them
661  if (!RecursionSet.insert(V).second)
662  return false;
663 
664  // If V is a constant, then it is known in all predecessors.
665  if (Constant *KC = getKnownConstant(V, Preference)) {
666  for (BasicBlock *Pred : predecessors(BB))
667  Result.emplace_back(KC, Pred);
668 
669  return !Result.empty();
670  }
671 
672  // If V is a non-instruction value, or an instruction in a different block,
673  // then it can't be derived from a PHI.
674  Instruction *I = dyn_cast<Instruction>(V);
675  if (!I || I->getParent() != BB) {
676 
677  // Okay, if this is a live-in value, see if it has a known value at the any
678  // edge from our predecessors.
679  for (BasicBlock *P : predecessors(BB)) {
680  using namespace PatternMatch;
681  // If the value is known by LazyValueInfo to be a constant in a
682  // predecessor, use that information to try to thread this block.
683  Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
684  // If I is a non-local compare-with-constant instruction, use more-rich
685  // 'getPredicateOnEdge' method. This would be able to handle value
686  // inequalities better, for example if the compare is "X < 4" and "X < 3"
687  // is known true but "X < 4" itself is not available.
688  CmpInst::Predicate Pred;
689  Value *Val;
690  Constant *Cst;
691  if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
692  auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
693  if (Res != LazyValueInfo::Unknown)
694  PredCst = ConstantInt::getBool(V->getContext(), Res);
695  }
696  if (Constant *KC = getKnownConstant(PredCst, Preference))
697  Result.emplace_back(KC, P);
698  }
699 
700  return !Result.empty();
701  }
702 
703  /// If I is a PHI node, then we know the incoming values for any constants.
704  if (PHINode *PN = dyn_cast<PHINode>(I)) {
705  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
706  Value *InVal = PN->getIncomingValue(i);
707  if (Constant *KC = getKnownConstant(InVal, Preference)) {
708  Result.emplace_back(KC, PN->getIncomingBlock(i));
709  } else {
710  Constant *CI = LVI->getConstantOnEdge(InVal,
711  PN->getIncomingBlock(i),
712  BB, CxtI);
713  if (Constant *KC = getKnownConstant(CI, Preference))
714  Result.emplace_back(KC, PN->getIncomingBlock(i));
715  }
716  }
717 
718  return !Result.empty();
719  }
720 
721  // Handle Cast instructions.
722  if (CastInst *CI = dyn_cast<CastInst>(I)) {
723  Value *Source = CI->getOperand(0);
724  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
725  RecursionSet, CxtI);
726  if (Result.empty())
727  return false;
728 
729  // Convert the known values.
730  for (auto &R : Result)
731  R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
732 
733  return true;
734  }
735 
736  if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
737  Value *Source = FI->getOperand(0);
738  computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
739  RecursionSet, CxtI);
740 
741  erase_if(Result, [](auto &Pair) {
742  return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
743  });
744 
745  return !Result.empty();
746  }
747 
748  // Handle some boolean conditions.
749  if (I->getType()->getPrimitiveSizeInBits() == 1) {
750  using namespace PatternMatch;
751  if (Preference != WantInteger)
752  return false;
753  // X | true -> true
754  // X & false -> false
755  Value *Op0, *Op1;
756  if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
757  match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
758  PredValueInfoTy LHSVals, RHSVals;
759 
760  computeValueKnownInPredecessorsImpl(Op0, BB, LHSVals, WantInteger,
761  RecursionSet, CxtI);
762  computeValueKnownInPredecessorsImpl(Op1, BB, RHSVals, WantInteger,
763  RecursionSet, CxtI);
764 
765  if (LHSVals.empty() && RHSVals.empty())
766  return false;
767 
768  ConstantInt *InterestingVal;
769  if (match(I, m_LogicalOr()))
770  InterestingVal = ConstantInt::getTrue(I->getContext());
771  else
772  InterestingVal = ConstantInt::getFalse(I->getContext());
773 
774  SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
775 
776  // Scan for the sentinel. If we find an undef, force it to the
777  // interesting value: x|undef -> true and x&undef -> false.
778  for (const auto &LHSVal : LHSVals)
779  if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
780  Result.emplace_back(InterestingVal, LHSVal.second);
781  LHSKnownBBs.insert(LHSVal.second);
782  }
783  for (const auto &RHSVal : RHSVals)
784  if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
785  // If we already inferred a value for this block on the LHS, don't
786  // re-add it.
787  if (!LHSKnownBBs.count(RHSVal.second))
788  Result.emplace_back(InterestingVal, RHSVal.second);
789  }
790 
791  return !Result.empty();
792  }
793 
794  // Handle the NOT form of XOR.
795  if (I->getOpcode() == Instruction::Xor &&
796  isa<ConstantInt>(I->getOperand(1)) &&
797  cast<ConstantInt>(I->getOperand(1))->isOne()) {
798  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
799  WantInteger, RecursionSet, CxtI);
800  if (Result.empty())
801  return false;
802 
803  // Invert the known values.
804  for (auto &R : Result)
805  R.first = ConstantExpr::getNot(R.first);
806 
807  return true;
808  }
809 
810  // Try to simplify some other binary operator values.
811  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
812  if (Preference != WantInteger)
813  return false;
814  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
815  const DataLayout &DL = BO->getModule()->getDataLayout();
816  PredValueInfoTy LHSVals;
817  computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
818  WantInteger, RecursionSet, CxtI);
819 
820  // Try to use constant folding to simplify the binary operator.
821  for (const auto &LHSVal : LHSVals) {
822  Constant *V = LHSVal.first;
823  Constant *Folded =
824  ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
825 
826  if (Constant *KC = getKnownConstant(Folded, WantInteger))
827  Result.emplace_back(KC, LHSVal.second);
828  }
829  }
830 
831  return !Result.empty();
832  }
833 
834  // Handle compare with phi operand, where the PHI is defined in this block.
835  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
836  if (Preference != WantInteger)
837  return false;
838  Type *CmpType = Cmp->getType();
839  Value *CmpLHS = Cmp->getOperand(0);
840  Value *CmpRHS = Cmp->getOperand(1);
841  CmpInst::Predicate Pred = Cmp->getPredicate();
842 
843  PHINode *PN = dyn_cast<PHINode>(CmpLHS);
844  if (!PN)
845  PN = dyn_cast<PHINode>(CmpRHS);
846  if (PN && PN->getParent() == BB) {
847  const DataLayout &DL = PN->getModule()->getDataLayout();
848  // We can do this simplification if any comparisons fold to true or false.
849  // See if any do.
850  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
851  BasicBlock *PredBB = PN->getIncomingBlock(i);
852  Value *LHS, *RHS;
853  if (PN == CmpLHS) {
854  LHS = PN->getIncomingValue(i);
855  RHS = CmpRHS->DoPHITranslation(BB, PredBB);
856  } else {
857  LHS = CmpLHS->DoPHITranslation(BB, PredBB);
858  RHS = PN->getIncomingValue(i);
859  }
860  Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
861  if (!Res) {
862  if (!isa<Constant>(RHS))
863  continue;
864 
865  // getPredicateOnEdge call will make no sense if LHS is defined in BB.
866  auto LHSInst = dyn_cast<Instruction>(LHS);
867  if (LHSInst && LHSInst->getParent() == BB)
868  continue;
869 
871  ResT = LVI->getPredicateOnEdge(Pred, LHS,
872  cast<Constant>(RHS), PredBB, BB,
873  CxtI ? CxtI : Cmp);
874  if (ResT == LazyValueInfo::Unknown)
875  continue;
877  }
878 
879  if (Constant *KC = getKnownConstant(Res, WantInteger))
880  Result.emplace_back(KC, PredBB);
881  }
882 
883  return !Result.empty();
884  }
885 
886  // If comparing a live-in value against a constant, see if we know the
887  // live-in value on any predecessors.
888  if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
889  Constant *CmpConst = cast<Constant>(CmpRHS);
890 
891  if (!isa<Instruction>(CmpLHS) ||
892  cast<Instruction>(CmpLHS)->getParent() != BB) {
893  for (BasicBlock *P : predecessors(BB)) {
894  // If the value is known by LazyValueInfo to be a constant in a
895  // predecessor, use that information to try to thread this block.
897  LVI->getPredicateOnEdge(Pred, CmpLHS,
898  CmpConst, P, BB, CxtI ? CxtI : Cmp);
899  if (Res == LazyValueInfo::Unknown)
900  continue;
901 
902  Constant *ResC = ConstantInt::get(CmpType, Res);
903  Result.emplace_back(ResC, P);
904  }
905 
906  return !Result.empty();
907  }
908 
909  // InstCombine can fold some forms of constant range checks into
910  // (icmp (add (x, C1)), C2). See if we have we have such a thing with
911  // x as a live-in.
912  {
913  using namespace PatternMatch;
914 
915  Value *AddLHS;
916  ConstantInt *AddConst;
917  if (isa<ConstantInt>(CmpConst) &&
918  match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
919  if (!isa<Instruction>(AddLHS) ||
920  cast<Instruction>(AddLHS)->getParent() != BB) {
921  for (BasicBlock *P : predecessors(BB)) {
922  // If the value is known by LazyValueInfo to be a ConstantRange in
923  // a predecessor, use that information to try to thread this
924  // block.
925  ConstantRange CR = LVI->getConstantRangeOnEdge(
926  AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
927  // Propagate the range through the addition.
928  CR = CR.add(AddConst->getValue());
929 
930  // Get the range where the compare returns true.
932  Pred, cast<ConstantInt>(CmpConst)->getValue());
933 
934  Constant *ResC;
935  if (CmpRange.contains(CR))
936  ResC = ConstantInt::getTrue(CmpType);
937  else if (CmpRange.inverse().contains(CR))
938  ResC = ConstantInt::getFalse(CmpType);
939  else
940  continue;
941 
942  Result.emplace_back(ResC, P);
943  }
944 
945  return !Result.empty();
946  }
947  }
948  }
949 
950  // Try to find a constant value for the LHS of a comparison,
951  // and evaluate it statically if we can.
952  PredValueInfoTy LHSVals;
953  computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
954  WantInteger, RecursionSet, CxtI);
955 
956  for (const auto &LHSVal : LHSVals) {
957  Constant *V = LHSVal.first;
958  Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
959  if (Constant *KC = getKnownConstant(Folded, WantInteger))
960  Result.emplace_back(KC, LHSVal.second);
961  }
962 
963  return !Result.empty();
964  }
965  }
966 
967  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
968  // Handle select instructions where at least one operand is a known constant
969  // and we can figure out the condition value for any predecessor block.
970  Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
971  Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
972  PredValueInfoTy Conds;
973  if ((TrueVal || FalseVal) &&
974  computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
975  WantInteger, RecursionSet, CxtI)) {
976  for (auto &C : Conds) {
977  Constant *Cond = C.first;
978 
979  // Figure out what value to use for the condition.
980  bool KnownCond;
981  if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
982  // A known boolean.
983  KnownCond = CI->isOne();
984  } else {
985  assert(isa<UndefValue>(Cond) && "Unexpected condition value");
986  // Either operand will do, so be sure to pick the one that's a known
987  // constant.
988  // FIXME: Do this more cleverly if both values are known constants?
989  KnownCond = (TrueVal != nullptr);
990  }
991 
992  // See if the select has a known constant value for this predecessor.
993  if (Constant *Val = KnownCond ? TrueVal : FalseVal)
994  Result.emplace_back(Val, C.second);
995  }
996 
997  return !Result.empty();
998  }
999  }
1000 
1001  // If all else fails, see if LVI can figure out a constant value for us.
1002  assert(CxtI->getParent() == BB && "CxtI should be in BB");
1003  Constant *CI = LVI->getConstant(V, CxtI);
1004  if (Constant *KC = getKnownConstant(CI, Preference)) {
1005  for (BasicBlock *Pred : predecessors(BB))
1006  Result.emplace_back(KC, Pred);
1007  }
1008 
1009  return !Result.empty();
1010 }
1011 
1012 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends
1013 /// in an undefined jump, decide which block is best to revector to.
1014 ///
1015 /// Since we can pick an arbitrary destination, we pick the successor with the
1016 /// fewest predecessors. This should reduce the in-degree of the others.
1018  Instruction *BBTerm = BB->getTerminator();
1019  unsigned MinSucc = 0;
1020  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
1021  // Compute the successor with the minimum number of predecessors.
1022  unsigned MinNumPreds = pred_size(TestBB);
1023  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1024  TestBB = BBTerm->getSuccessor(i);
1025  unsigned NumPreds = pred_size(TestBB);
1026  if (NumPreds < MinNumPreds) {
1027  MinSucc = i;
1028  MinNumPreds = NumPreds;
1029  }
1030  }
1031 
1032  return MinSucc;
1033 }
1034 
1036  if (!BB->hasAddressTaken()) return false;
1037 
1038  // If the block has its address taken, it may be a tree of dead constants
1039  // hanging off of it. These shouldn't keep the block alive.
1042  return !BA->use_empty();
1043 }
1044 
1045 /// processBlock - If there are any predecessors whose control can be threaded
1046 /// through to a successor, transform them now.
1048  // If the block is trivially dead, just return and let the caller nuke it.
1049  // This simplifies other transformations.
1050  if (DTU->isBBPendingDeletion(BB) ||
1051  (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
1052  return false;
1053 
1054  // If this block has a single predecessor, and if that pred has a single
1055  // successor, merge the blocks. This encourages recursive jump threading
1056  // because now the condition in this block can be threaded through
1057  // predecessors of our predecessor block.
1058  if (maybeMergeBasicBlockIntoOnlyPred(BB))
1059  return true;
1060 
1061  if (tryToUnfoldSelectInCurrBB(BB))
1062  return true;
1063 
1064  // Look if we can propagate guards to predecessors.
1065  if (HasGuards && processGuards(BB))
1066  return true;
1067 
1068  // What kind of constant we're looking for.
1070 
1071  // Look to see if the terminator is a conditional branch, switch or indirect
1072  // branch, if not we can't thread it.
1073  Value *Condition;
1074  Instruction *Terminator = BB->getTerminator();
1075  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
1076  // Can't thread an unconditional jump.
1077  if (BI->isUnconditional()) return false;
1078  Condition = BI->getCondition();
1079  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
1080  Condition = SI->getCondition();
1081  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
1082  // Can't thread indirect branch with no successors.
1083  if (IB->getNumSuccessors() == 0) return false;
1084  Condition = IB->getAddress()->stripPointerCasts();
1086  } else {
1087  return false; // Must be an invoke or callbr.
1088  }
1089 
1090  // Keep track if we constant folded the condition in this invocation.
1091  bool ConstantFolded = false;
1092 
1093  // Run constant folding to see if we can reduce the condition to a simple
1094  // constant.
1095  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1096  Value *SimpleVal =
1097  ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
1098  if (SimpleVal) {
1099  I->replaceAllUsesWith(SimpleVal);
1100  if (isInstructionTriviallyDead(I, TLI))
1101  I->eraseFromParent();
1102  Condition = SimpleVal;
1103  ConstantFolded = true;
1104  }
1105  }
1106 
1107  // If the terminator is branching on an undef or freeze undef, we can pick any
1108  // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1109  auto *FI = dyn_cast<FreezeInst>(Condition);
1110  if (isa<UndefValue>(Condition) ||
1111  (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1112  unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1113  std::vector<DominatorTree::UpdateType> Updates;
1114 
1115  // Fold the branch/switch.
1116  Instruction *BBTerm = BB->getTerminator();
1117  Updates.reserve(BBTerm->getNumSuccessors());
1118  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1119  if (i == BestSucc) continue;
1120  BasicBlock *Succ = BBTerm->getSuccessor(i);
1121  Succ->removePredecessor(BB, true);
1122  Updates.push_back({DominatorTree::Delete, BB, Succ});
1123  }
1124 
1125  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1126  << "' folding undef terminator: " << *BBTerm << '\n');
1127  BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1128  ++NumFolds;
1129  BBTerm->eraseFromParent();
1130  DTU->applyUpdatesPermissive(Updates);
1131  if (FI)
1132  FI->eraseFromParent();
1133  return true;
1134  }
1135 
1136  // If the terminator of this block is branching on a constant, simplify the
1137  // terminator to an unconditional branch. This can occur due to threading in
1138  // other blocks.
1139  if (getKnownConstant(Condition, Preference)) {
1140  LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1141  << "' folding terminator: " << *BB->getTerminator()
1142  << '\n');
1143  ++NumFolds;
1144  ConstantFoldTerminator(BB, true, nullptr, DTU);
1145  if (HasProfileData)
1146  BPI->eraseBlock(BB);
1147  return true;
1148  }
1149 
1150  Instruction *CondInst = dyn_cast<Instruction>(Condition);
1151 
1152  // All the rest of our checks depend on the condition being an instruction.
1153  if (!CondInst) {
1154  // FIXME: Unify this with code below.
1155  if (processThreadableEdges(Condition, BB, Preference, Terminator))
1156  return true;
1157  return ConstantFolded;
1158  }
1159 
1160  // Some of the following optimization can safely work on the unfrozen cond.
1161  Value *CondWithoutFreeze = CondInst;
1162  if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1163  CondWithoutFreeze = FI->getOperand(0);
1164 
1165  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1166  // If we're branching on a conditional, LVI might be able to determine
1167  // it's value at the branch instruction. We only handle comparisons
1168  // against a constant at this time.
1169  if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1171  LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1172  CondConst, BB->getTerminator(),
1173  /*UseBlockValue=*/false);
1174  if (Ret != LazyValueInfo::Unknown) {
1175  // We can safely replace *some* uses of the CondInst if it has
1176  // exactly one value as returned by LVI. RAUW is incorrect in the
1177  // presence of guards and assumes, that have the `Cond` as the use. This
1178  // is because we use the guards/assume to reason about the `Cond` value
1179  // at the end of block, but RAUW unconditionally replaces all uses
1180  // including the guards/assumes themselves and the uses before the
1181  // guard/assume.
1182  auto *CI = Ret == LazyValueInfo::True ?
1183  ConstantInt::getTrue(CondCmp->getType()) :
1184  ConstantInt::getFalse(CondCmp->getType());
1185  if (replaceFoldableUses(CondCmp, CI, BB))
1186  return true;
1187  }
1188 
1189  // We did not manage to simplify this branch, try to see whether
1190  // CondCmp depends on a known phi-select pattern.
1191  if (tryToUnfoldSelect(CondCmp, BB))
1192  return true;
1193  }
1194  }
1195 
1196  if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1197  if (tryToUnfoldSelect(SI, BB))
1198  return true;
1199 
1200  // Check for some cases that are worth simplifying. Right now we want to look
1201  // for loads that are used by a switch or by the condition for the branch. If
1202  // we see one, check to see if it's partially redundant. If so, insert a PHI
1203  // which can then be used to thread the values.
1204  Value *SimplifyValue = CondWithoutFreeze;
1205 
1206  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1207  if (isa<Constant>(CondCmp->getOperand(1)))
1208  SimplifyValue = CondCmp->getOperand(0);
1209 
1210  // TODO: There are other places where load PRE would be profitable, such as
1211  // more complex comparisons.
1212  if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1213  if (simplifyPartiallyRedundantLoad(LoadI))
1214  return true;
1215 
1216  // Before threading, try to propagate profile data backwards:
1217  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1218  if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1220 
1221  // Handle a variety of cases where we are branching on something derived from
1222  // a PHI node in the current block. If we can prove that any predecessors
1223  // compute a predictable value based on a PHI node, thread those predecessors.
1224  if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1225  return true;
1226 
1227  // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1228  // the current block, see if we can simplify.
1229  PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1230  if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1231  return processBranchOnPHI(PN);
1232 
1233  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1234  if (CondInst->getOpcode() == Instruction::Xor &&
1235  CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1236  return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1237 
1238  // Search for a stronger dominating condition that can be used to simplify a
1239  // conditional branch leaving BB.
1240  if (processImpliedCondition(BB))
1241  return true;
1242 
1243  return false;
1244 }
1245 
1247  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1248  if (!BI || !BI->isConditional())
1249  return false;
1250 
1251  Value *Cond = BI->getCondition();
1252  // Assuming that predecessor's branch was taken, if pred's branch condition
1253  // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1254  // freeze(Cond) is either true or a nondeterministic value.
1255  // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1256  // without affecting other instructions.
1257  auto *FICond = dyn_cast<FreezeInst>(Cond);
1258  if (FICond && FICond->hasOneUse())
1259  Cond = FICond->getOperand(0);
1260  else
1261  FICond = nullptr;
1262 
1263  BasicBlock *CurrentBB = BB;
1264  BasicBlock *CurrentPred = BB->getSinglePredecessor();
1265  unsigned Iter = 0;
1266 
1267  auto &DL = BB->getModule()->getDataLayout();
1268 
1269  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1270  auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1271  if (!PBI || !PBI->isConditional())
1272  return false;
1273  if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1274  return false;
1275 
1276  bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1277  Optional<bool> Implication =
1278  isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1279 
1280  // If the branch condition of BB (which is Cond) and CurrentPred are
1281  // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1282  if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1283  if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1284  FICond->getOperand(0))
1285  Implication = CondIsTrue;
1286  }
1287 
1288  if (Implication) {
1289  BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1290  BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1291  RemoveSucc->removePredecessor(BB);
1292  BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
1293  UncondBI->setDebugLoc(BI->getDebugLoc());
1294  ++NumFolds;
1295  BI->eraseFromParent();
1296  if (FICond)
1297  FICond->eraseFromParent();
1298 
1299  DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1300  if (HasProfileData)
1301  BPI->eraseBlock(BB);
1302  return true;
1303  }
1304  CurrentBB = CurrentPred;
1305  CurrentPred = CurrentBB->getSinglePredecessor();
1306  }
1307 
1308  return false;
1309 }
1310 
1311 /// Return true if Op is an instruction defined in the given block.
1313  if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1314  if (OpInst->getParent() == BB)
1315  return true;
1316  return false;
1317 }
1318 
1319 /// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1320 /// redundant load instruction, eliminate it by replacing it with a PHI node.
1321 /// This is an important optimization that encourages jump threading, and needs
1322 /// to be run interlaced with other jump threading tasks.
1324  // Don't hack volatile and ordered loads.
1325  if (!LoadI->isUnordered()) return false;
1326 
1327  // If the load is defined in a block with exactly one predecessor, it can't be
1328  // partially redundant.
1329  BasicBlock *LoadBB = LoadI->getParent();
1330  if (LoadBB->getSinglePredecessor())
1331  return false;
1332 
1333  // If the load is defined in an EH pad, it can't be partially redundant,
1334  // because the edges between the invoke and the EH pad cannot have other
1335  // instructions between them.
1336  if (LoadBB->isEHPad())
1337  return false;
1338 
1339  Value *LoadedPtr = LoadI->getOperand(0);
1340 
1341  // If the loaded operand is defined in the LoadBB and its not a phi,
1342  // it can't be available in predecessors.
1343  if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1344  return false;
1345 
1346  // Scan a few instructions up from the load, to see if it is obviously live at
1347  // the entry to its block.
1348  BasicBlock::iterator BBIt(LoadI);
1349  bool IsLoadCSE;
1350  if (Value *AvailableVal = FindAvailableLoadedValue(
1351  LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1352  // If the value of the load is locally available within the block, just use
1353  // it. This frequently occurs for reg2mem'd allocas.
1354 
1355  if (IsLoadCSE) {
1356  LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1357  combineMetadataForCSE(NLoadI, LoadI, false);
1358  };
1359 
1360  // If the returned value is the load itself, replace with poison. This can
1361  // only happen in dead loops.
1362  if (AvailableVal == LoadI)
1363  AvailableVal = PoisonValue::get(LoadI->getType());
1364  if (AvailableVal->getType() != LoadI->getType())
1365  AvailableVal = CastInst::CreateBitOrPointerCast(
1366  AvailableVal, LoadI->getType(), "", LoadI);
1367  LoadI->replaceAllUsesWith(AvailableVal);
1368  LoadI->eraseFromParent();
1369  return true;
1370  }
1371 
1372  // Otherwise, if we scanned the whole block and got to the top of the block,
1373  // we know the block is locally transparent to the load. If not, something
1374  // might clobber its value.
1375  if (BBIt != LoadBB->begin())
1376  return false;
1377 
1378  // If all of the loads and stores that feed the value have the same AA tags,
1379  // then we can propagate them onto any newly inserted loads.
1380  AAMDNodes AATags = LoadI->getAAMetadata();
1381 
1382  SmallPtrSet<BasicBlock*, 8> PredsScanned;
1383 
1384  using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1385 
1386  AvailablePredsTy AvailablePreds;
1387  BasicBlock *OneUnavailablePred = nullptr;
1388  SmallVector<LoadInst*, 8> CSELoads;
1389 
1390  // If we got here, the loaded value is transparent through to the start of the
1391  // block. Check to see if it is available in any of the predecessor blocks.
1392  for (BasicBlock *PredBB : predecessors(LoadBB)) {
1393  // If we already scanned this predecessor, skip it.
1394  if (!PredsScanned.insert(PredBB).second)
1395  continue;
1396 
1397  BBIt = PredBB->end();
1398  unsigned NumScanedInst = 0;
1399  Value *PredAvailable = nullptr;
1400  // NOTE: We don't CSE load that is volatile or anything stronger than
1401  // unordered, that should have been checked when we entered the function.
1402  assert(LoadI->isUnordered() &&
1403  "Attempting to CSE volatile or atomic loads");
1404  // If this is a load on a phi pointer, phi-translate it and search
1405  // for available load/store to the pointer in predecessors.
1406  Type *AccessTy = LoadI->getType();
1407  const auto &DL = LoadI->getModule()->getDataLayout();
1408  MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1409  LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1410  AATags);
1411  PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
1412  PredBB, BBIt, DefMaxInstsToScan,
1413  AA, &IsLoadCSE, &NumScanedInst);
1414 
1415  // If PredBB has a single predecessor, continue scanning through the
1416  // single predecessor.
1417  BasicBlock *SinglePredBB = PredBB;
1418  while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1419  NumScanedInst < DefMaxInstsToScan) {
1420  SinglePredBB = SinglePredBB->getSinglePredecessor();
1421  if (SinglePredBB) {
1422  BBIt = SinglePredBB->end();
1423  PredAvailable = findAvailablePtrLoadStore(
1424  Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1425  (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1426  &NumScanedInst);
1427  }
1428  }
1429 
1430  if (!PredAvailable) {
1431  OneUnavailablePred = PredBB;
1432  continue;
1433  }
1434 
1435  if (IsLoadCSE)
1436  CSELoads.push_back(cast<LoadInst>(PredAvailable));
1437 
1438  // If so, this load is partially redundant. Remember this info so that we
1439  // can create a PHI node.
1440  AvailablePreds.emplace_back(PredBB, PredAvailable);
1441  }
1442 
1443  // If the loaded value isn't available in any predecessor, it isn't partially
1444  // redundant.
1445  if (AvailablePreds.empty()) return false;
1446 
1447  // Okay, the loaded value is available in at least one (and maybe all!)
1448  // predecessors. If the value is unavailable in more than one unique
1449  // predecessor, we want to insert a merge block for those common predecessors.
1450  // This ensures that we only have to insert one reload, thus not increasing
1451  // code size.
1452  BasicBlock *UnavailablePred = nullptr;
1453 
1454  // If the value is unavailable in one of predecessors, we will end up
1455  // inserting a new instruction into them. It is only valid if all the
1456  // instructions before LoadI are guaranteed to pass execution to its
1457  // successor, or if LoadI is safe to speculate.
1458  // TODO: If this logic becomes more complex, and we will perform PRE insertion
1459  // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1460  // It requires domination tree analysis, so for this simple case it is an
1461  // overkill.
1462  if (PredsScanned.size() != AvailablePreds.size() &&
1464  for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1466  return false;
1467 
1468  // If there is exactly one predecessor where the value is unavailable, the
1469  // already computed 'OneUnavailablePred' block is it. If it ends in an
1470  // unconditional branch, we know that it isn't a critical edge.
1471  if (PredsScanned.size() == AvailablePreds.size()+1 &&
1472  OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1473  UnavailablePred = OneUnavailablePred;
1474  } else if (PredsScanned.size() != AvailablePreds.size()) {
1475  // Otherwise, we had multiple unavailable predecessors or we had a critical
1476  // edge from the one.
1477  SmallVector<BasicBlock*, 8> PredsToSplit;
1478  SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1479 
1480  for (const auto &AvailablePred : AvailablePreds)
1481  AvailablePredSet.insert(AvailablePred.first);
1482 
1483  // Add all the unavailable predecessors to the PredsToSplit list.
1484  for (BasicBlock *P : predecessors(LoadBB)) {
1485  // If the predecessor is an indirect goto, we can't split the edge.
1486  if (isa<IndirectBrInst>(P->getTerminator()))
1487  return false;
1488 
1489  if (!AvailablePredSet.count(P))
1490  PredsToSplit.push_back(P);
1491  }
1492 
1493  // Split them out to their own block.
1494  UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1495  }
1496 
1497  // If the value isn't available in all predecessors, then there will be
1498  // exactly one where it isn't available. Insert a load on that edge and add
1499  // it to the AvailablePreds list.
1500  if (UnavailablePred) {
1501  assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1502  "Can't handle critical edge here!");
1503  LoadInst *NewVal = new LoadInst(
1504  LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1505  LoadI->getName() + ".pr", false, LoadI->getAlign(),
1506  LoadI->getOrdering(), LoadI->getSyncScopeID(),
1507  UnavailablePred->getTerminator());
1508  NewVal->setDebugLoc(LoadI->getDebugLoc());
1509  if (AATags)
1510  NewVal->setAAMetadata(AATags);
1511 
1512  AvailablePreds.emplace_back(UnavailablePred, NewVal);
1513  }
1514 
1515  // Now we know that each predecessor of this block has a value in
1516  // AvailablePreds, sort them for efficient access as we're walking the preds.
1517  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1518 
1519  // Create a PHI node at the start of the block for the PRE'd load value.
1520  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1521  PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "",
1522  &LoadBB->front());
1523  PN->takeName(LoadI);
1524  PN->setDebugLoc(LoadI->getDebugLoc());
1525 
1526  // Insert new entries into the PHI for each predecessor. A single block may
1527  // have multiple entries here.
1528  for (pred_iterator PI = PB; PI != PE; ++PI) {
1529  BasicBlock *P = *PI;
1530  AvailablePredsTy::iterator I =
1531  llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1532 
1533  assert(I != AvailablePreds.end() && I->first == P &&
1534  "Didn't find entry for predecessor!");
1535 
1536  // If we have an available predecessor but it requires casting, insert the
1537  // cast in the predecessor and use the cast. Note that we have to update the
1538  // AvailablePreds vector as we go so that all of the PHI entries for this
1539  // predecessor use the same bitcast.
1540  Value *&PredV = I->second;
1541  if (PredV->getType() != LoadI->getType())
1542  PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
1543  P->getTerminator());
1544 
1545  PN->addIncoming(PredV, I->first);
1546  }
1547 
1548  for (LoadInst *PredLoadI : CSELoads) {
1549  combineMetadataForCSE(PredLoadI, LoadI, true);
1550  }
1551 
1552  LoadI->replaceAllUsesWith(PN);
1553  LoadI->eraseFromParent();
1554 
1555  return true;
1556 }
1557 
1558 /// findMostPopularDest - The specified list contains multiple possible
1559 /// threadable destinations. Pick the one that occurs the most frequently in
1560 /// the list.
1561 static BasicBlock *
1563  const SmallVectorImpl<std::pair<BasicBlock *,
1564  BasicBlock *>> &PredToDestList) {
1565  assert(!PredToDestList.empty());
1566 
1567  // Determine popularity. If there are multiple possible destinations, we
1568  // explicitly choose to ignore 'undef' destinations. We prefer to thread
1569  // blocks with known and real destinations to threading undef. We'll handle
1570  // them later if interesting.
1571  MapVector<BasicBlock *, unsigned> DestPopularity;
1572 
1573  // Populate DestPopularity with the successors in the order they appear in the
1574  // successor list. This way, we ensure determinism by iterating it in the
1575  // same order in std::max_element below. We map nullptr to 0 so that we can
1576  // return nullptr when PredToDestList contains nullptr only.
1577  DestPopularity[nullptr] = 0;
1578  for (auto *SuccBB : successors(BB))
1579  DestPopularity[SuccBB] = 0;
1580 
1581  for (const auto &PredToDest : PredToDestList)
1582  if (PredToDest.second)
1583  DestPopularity[PredToDest.second]++;
1584 
1585  // Find the most popular dest.
1586  auto MostPopular = std::max_element(
1587  DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
1588 
1589  // Okay, we have finally picked the most popular destination.
1590  return MostPopular->first;
1591 }
1592 
1593 // Try to evaluate the value of V when the control flows from PredPredBB to
1594 // BB->getSinglePredecessor() and then on to BB.
1596  BasicBlock *PredPredBB,
1597  Value *V) {
1598  BasicBlock *PredBB = BB->getSinglePredecessor();
1599  assert(PredBB && "Expected a single predecessor");
1600 
1601  if (Constant *Cst = dyn_cast<Constant>(V)) {
1602  return Cst;
1603  }
1604 
1605  // Consult LVI if V is not an instruction in BB or PredBB.
1606  Instruction *I = dyn_cast<Instruction>(V);
1607  if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1608  return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1609  }
1610 
1611  // Look into a PHI argument.
1612  if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1613  if (PHI->getParent() == PredBB)
1614  return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1615  return nullptr;
1616  }
1617 
1618  // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1619  if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1620  if (CondCmp->getParent() == BB) {
1621  Constant *Op0 =
1622  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1623  Constant *Op1 =
1624  evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1625  if (Op0 && Op1) {
1626  return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1627  }
1628  }
1629  return nullptr;
1630  }
1631 
1632  return nullptr;
1633 }
1634 
1637  Instruction *CxtI) {
1638  // If threading this would thread across a loop header, don't even try to
1639  // thread the edge.
1640  if (LoopHeaders.count(BB))
1641  return false;
1642 
1643  PredValueInfoTy PredValues;
1644  if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1645  CxtI)) {
1646  // We don't have known values in predecessors. See if we can thread through
1647  // BB and its sole predecessor.
1648  return maybethreadThroughTwoBasicBlocks(BB, Cond);
1649  }
1650 
1651  assert(!PredValues.empty() &&
1652  "computeValueKnownInPredecessors returned true with no values");
1653 
1654  LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1655  for (const auto &PredValue : PredValues) {
1656  dbgs() << " BB '" << BB->getName()
1657  << "': FOUND condition = " << *PredValue.first
1658  << " for pred '" << PredValue.second->getName() << "'.\n";
1659  });
1660 
1661  // Decide what we want to thread through. Convert our list of known values to
1662  // a list of known destinations for each pred. This also discards duplicate
1663  // predecessors and keeps track of the undefined inputs (which are represented
1664  // as a null dest in the PredToDestList).
1665  SmallPtrSet<BasicBlock*, 16> SeenPreds;
1667 
1668  BasicBlock *OnlyDest = nullptr;
1669  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1670  Constant *OnlyVal = nullptr;
1671  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1672 
1673  for (const auto &PredValue : PredValues) {
1674  BasicBlock *Pred = PredValue.second;
1675  if (!SeenPreds.insert(Pred).second)
1676  continue; // Duplicate predecessor entry.
1677 
1678  Constant *Val = PredValue.first;
1679 
1680  BasicBlock *DestBB;
1681  if (isa<UndefValue>(Val))
1682  DestBB = nullptr;
1683  else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1684  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1685  DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1686  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1687  assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1688  DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1689  } else {
1690  assert(isa<IndirectBrInst>(BB->getTerminator())
1691  && "Unexpected terminator");
1692  assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1693  DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1694  }
1695 
1696  // If we have exactly one destination, remember it for efficiency below.
1697  if (PredToDestList.empty()) {
1698  OnlyDest = DestBB;
1699  OnlyVal = Val;
1700  } else {
1701  if (OnlyDest != DestBB)
1702  OnlyDest = MultipleDestSentinel;
1703  // It possible we have same destination, but different value, e.g. default
1704  // case in switchinst.
1705  if (Val != OnlyVal)
1706  OnlyVal = MultipleVal;
1707  }
1708 
1709  // If the predecessor ends with an indirect goto, we can't change its
1710  // destination.
1711  if (isa<IndirectBrInst>(Pred->getTerminator()))
1712  continue;
1713 
1714  PredToDestList.emplace_back(Pred, DestBB);
1715  }
1716 
1717  // If all edges were unthreadable, we fail.
1718  if (PredToDestList.empty())
1719  return false;
1720 
1721  // If all the predecessors go to a single known successor, we want to fold,
1722  // not thread. By doing so, we do not need to duplicate the current block and
1723  // also miss potential opportunities in case we dont/cant duplicate.
1724  if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1725  if (BB->hasNPredecessors(PredToDestList.size())) {
1726  bool SeenFirstBranchToOnlyDest = false;
1727  std::vector <DominatorTree::UpdateType> Updates;
1728  Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1729  for (BasicBlock *SuccBB : successors(BB)) {
1730  if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1731  SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1732  } else {
1733  SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1734  Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1735  }
1736  }
1737 
1738  // Finally update the terminator.
1739  Instruction *Term = BB->getTerminator();
1740  BranchInst::Create(OnlyDest, Term);
1741  ++NumFolds;
1742  Term->eraseFromParent();
1743  DTU->applyUpdatesPermissive(Updates);
1744  if (HasProfileData)
1745  BPI->eraseBlock(BB);
1746 
1747  // If the condition is now dead due to the removal of the old terminator,
1748  // erase it.
1749  if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1750  if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1751  CondInst->eraseFromParent();
1752  // We can safely replace *some* uses of the CondInst if it has
1753  // exactly one value as returned by LVI. RAUW is incorrect in the
1754  // presence of guards and assumes, that have the `Cond` as the use. This
1755  // is because we use the guards/assume to reason about the `Cond` value
1756  // at the end of block, but RAUW unconditionally replaces all uses
1757  // including the guards/assumes themselves and the uses before the
1758  // guard/assume.
1759  else if (OnlyVal && OnlyVal != MultipleVal)
1760  replaceFoldableUses(CondInst, OnlyVal, BB);
1761  }
1762  return true;
1763  }
1764  }
1765 
1766  // Determine which is the most common successor. If we have many inputs and
1767  // this block is a switch, we want to start by threading the batch that goes
1768  // to the most popular destination first. If we only know about one
1769  // threadable destination (the common case) we can avoid this.
1770  BasicBlock *MostPopularDest = OnlyDest;
1771 
1772  if (MostPopularDest == MultipleDestSentinel) {
1773  // Remove any loop headers from the Dest list, threadEdge conservatively
1774  // won't process them, but we might have other destination that are eligible
1775  // and we still want to process.
1776  erase_if(PredToDestList,
1777  [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1778  return LoopHeaders.contains(PredToDest.second);
1779  });
1780 
1781  if (PredToDestList.empty())
1782  return false;
1783 
1784  MostPopularDest = findMostPopularDest(BB, PredToDestList);
1785  }
1786 
1787  // Now that we know what the most popular destination is, factor all
1788  // predecessors that will jump to it into a single predecessor.
1789  SmallVector<BasicBlock*, 16> PredsToFactor;
1790  for (const auto &PredToDest : PredToDestList)
1791  if (PredToDest.second == MostPopularDest) {
1792  BasicBlock *Pred = PredToDest.first;
1793 
1794  // This predecessor may be a switch or something else that has multiple
1795  // edges to the block. Factor each of these edges by listing them
1796  // according to # occurrences in PredsToFactor.
1797  for (BasicBlock *Succ : successors(Pred))
1798  if (Succ == BB)
1799  PredsToFactor.push_back(Pred);
1800  }
1801 
1802  // If the threadable edges are branching on an undefined value, we get to pick
1803  // the destination that these predecessors should get to.
1804  if (!MostPopularDest)
1805  MostPopularDest = BB->getTerminator()->
1806  getSuccessor(getBestDestForJumpOnUndef(BB));
1807 
1808  // Ok, try to thread it!
1809  return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1810 }
1811 
1812 /// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1813 /// a PHI node (or freeze PHI) in the current block. See if there are any
1814 /// simplifications we can do based on inputs to the phi node.
1816  BasicBlock *BB = PN->getParent();
1817 
1818  // TODO: We could make use of this to do it once for blocks with common PHI
1819  // values.
1821  PredBBs.resize(1);
1822 
1823  // If any of the predecessor blocks end in an unconditional branch, we can
1824  // *duplicate* the conditional branch into that block in order to further
1825  // encourage jump threading and to eliminate cases where we have branch on a
1826  // phi of an icmp (branch on icmp is much better).
1827  // This is still beneficial when a frozen phi is used as the branch condition
1828  // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1829  // to br(icmp(freeze ...)).
1830  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1831  BasicBlock *PredBB = PN->getIncomingBlock(i);
1832  if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1833  if (PredBr->isUnconditional()) {
1834  PredBBs[0] = PredBB;
1835  // Try to duplicate BB into PredBB.
1836  if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1837  return true;
1838  }
1839  }
1840 
1841  return false;
1842 }
1843 
1844 /// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1845 /// a xor instruction in the current block. See if there are any
1846 /// simplifications we can do based on inputs to the xor.
1848  BasicBlock *BB = BO->getParent();
1849 
1850  // If either the LHS or RHS of the xor is a constant, don't do this
1851  // optimization.
1852  if (isa<ConstantInt>(BO->getOperand(0)) ||
1853  isa<ConstantInt>(BO->getOperand(1)))
1854  return false;
1855 
1856  // If the first instruction in BB isn't a phi, we won't be able to infer
1857  // anything special about any particular predecessor.
1858  if (!isa<PHINode>(BB->front()))
1859  return false;
1860 
1861  // If this BB is a landing pad, we won't be able to split the edge into it.
1862  if (BB->isEHPad())
1863  return false;
1864 
1865  // If we have a xor as the branch input to this block, and we know that the
1866  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1867  // the condition into the predecessor and fix that value to true, saving some
1868  // logical ops on that path and encouraging other paths to simplify.
1869  //
1870  // This copies something like this:
1871  //
1872  // BB:
1873  // %X = phi i1 [1], [%X']
1874  // %Y = icmp eq i32 %A, %B
1875  // %Z = xor i1 %X, %Y
1876  // br i1 %Z, ...
1877  //
1878  // Into:
1879  // BB':
1880  // %Y = icmp ne i32 %A, %B
1881  // br i1 %Y, ...
1882 
1883  PredValueInfoTy XorOpValues;
1884  bool isLHS = true;
1885  if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1886  WantInteger, BO)) {
1887  assert(XorOpValues.empty());
1888  if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1889  WantInteger, BO))
1890  return false;
1891  isLHS = false;
1892  }
1893 
1894  assert(!XorOpValues.empty() &&
1895  "computeValueKnownInPredecessors returned true with no values");
1896 
1897  // Scan the information to see which is most popular: true or false. The
1898  // predecessors can be of the set true, false, or undef.
1899  unsigned NumTrue = 0, NumFalse = 0;
1900  for (const auto &XorOpValue : XorOpValues) {
1901  if (isa<UndefValue>(XorOpValue.first))
1902  // Ignore undefs for the count.
1903  continue;
1904  if (cast<ConstantInt>(XorOpValue.first)->isZero())
1905  ++NumFalse;
1906  else
1907  ++NumTrue;
1908  }
1909 
1910  // Determine which value to split on, true, false, or undef if neither.
1911  ConstantInt *SplitVal = nullptr;
1912  if (NumTrue > NumFalse)
1913  SplitVal = ConstantInt::getTrue(BB->getContext());
1914  else if (NumTrue != 0 || NumFalse != 0)
1915  SplitVal = ConstantInt::getFalse(BB->getContext());
1916 
1917  // Collect all of the blocks that this can be folded into so that we can
1918  // factor this once and clone it once.
1919  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1920  for (const auto &XorOpValue : XorOpValues) {
1921  if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1922  continue;
1923 
1924  BlocksToFoldInto.push_back(XorOpValue.second);
1925  }
1926 
1927  // If we inferred a value for all of the predecessors, then duplication won't
1928  // help us. However, we can just replace the LHS or RHS with the constant.
1929  if (BlocksToFoldInto.size() ==
1930  cast<PHINode>(BB->front()).getNumIncomingValues()) {
1931  if (!SplitVal) {
1932  // If all preds provide undef, just nuke the xor, because it is undef too.
1934  BO->eraseFromParent();
1935  } else if (SplitVal->isZero()) {
1936  // If all preds provide 0, replace the xor with the other input.
1937  BO->replaceAllUsesWith(BO->getOperand(isLHS));
1938  BO->eraseFromParent();
1939  } else {
1940  // If all preds provide 1, set the computed value to 1.
1941  BO->setOperand(!isLHS, SplitVal);
1942  }
1943 
1944  return true;
1945  }
1946 
1947  // If any of predecessors end with an indirect goto, we can't change its
1948  // destination.
1949  if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1950  return isa<IndirectBrInst>(Pred->getTerminator());
1951  }))
1952  return false;
1953 
1954  // Try to duplicate BB into PredBB.
1955  return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1956 }
1957 
1958 /// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1959 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1960 /// NewPred using the entries from OldPred (suitably mapped).
1962  BasicBlock *OldPred,
1963  BasicBlock *NewPred,
1965  for (PHINode &PN : PHIBB->phis()) {
1966  // Ok, we have a PHI node. Figure out what the incoming value was for the
1967  // DestBlock.
1968  Value *IV = PN.getIncomingValueForBlock(OldPred);
1969 
1970  // Remap the value if necessary.
1971  if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1973  if (I != ValueMap.end())
1974  IV = I->second;
1975  }
1976 
1977  PN.addIncoming(IV, NewPred);
1978  }
1979 }
1980 
1981 /// Merge basic block BB into its sole predecessor if possible.
1983  BasicBlock *SinglePred = BB->getSinglePredecessor();
1984  if (!SinglePred)
1985  return false;
1986 
1987  const Instruction *TI = SinglePred->getTerminator();
1988  if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
1989  SinglePred == BB || hasAddressTakenAndUsed(BB))
1990  return false;
1991 
1992  // If SinglePred was a loop header, BB becomes one.
1993  if (LoopHeaders.erase(SinglePred))
1994  LoopHeaders.insert(BB);
1995 
1996  LVI->eraseBlock(SinglePred);
1998 
1999  // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
2000  // BB code within one basic block `BB`), we need to invalidate the LVI
2001  // information associated with BB, because the LVI information need not be
2002  // true for all of BB after the merge. For example,
2003  // Before the merge, LVI info and code is as follows:
2004  // SinglePred: <LVI info1 for %p val>
2005  // %y = use of %p
2006  // call @exit() // need not transfer execution to successor.
2007  // assume(%p) // from this point on %p is true
2008  // br label %BB
2009  // BB: <LVI info2 for %p val, i.e. %p is true>
2010  // %x = use of %p
2011  // br label exit
2012  //
2013  // Note that this LVI info for blocks BB and SinglPred is correct for %p
2014  // (info2 and info1 respectively). After the merge and the deletion of the
2015  // LVI info1 for SinglePred. We have the following code:
2016  // BB: <LVI info2 for %p val>
2017  // %y = use of %p
2018  // call @exit()
2019  // assume(%p)
2020  // %x = use of %p <-- LVI info2 is correct from here onwards.
2021  // br label exit
2022  // LVI info2 for BB is incorrect at the beginning of BB.
2023 
2024  // Invalidate LVI information for BB if the LVI is not provably true for
2025  // all of BB.
2027  LVI->eraseBlock(BB);
2028  return true;
2029 }
2030 
2031 /// Update the SSA form. NewBB contains instructions that are copied from BB.
2032 /// ValueMapping maps old values in BB to new ones in NewBB.
2034  BasicBlock *BB, BasicBlock *NewBB,
2035  DenseMap<Instruction *, Value *> &ValueMapping) {
2036  // If there were values defined in BB that are used outside the block, then we
2037  // now have to update all uses of the value to use either the original value,
2038  // the cloned value, or some PHI derived value. This can require arbitrary
2039  // PHI insertion, of which we are prepared to do, clean these up now.
2040  SSAUpdater SSAUpdate;
2041  SmallVector<Use *, 16> UsesToRename;
2042 
2043  for (Instruction &I : *BB) {
2044  // Scan all uses of this instruction to see if it is used outside of its
2045  // block, and if so, record them in UsesToRename.
2046  for (Use &U : I.uses()) {
2047  Instruction *User = cast<Instruction>(U.getUser());
2048  if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
2049  if (UserPN->getIncomingBlock(U) == BB)
2050  continue;
2051  } else if (User->getParent() == BB)
2052  continue;
2053 
2054  UsesToRename.push_back(&U);
2055  }
2056 
2057  // If there are no uses outside the block, we're done with this instruction.
2058  if (UsesToRename.empty())
2059  continue;
2060  LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
2061 
2062  // We found a use of I outside of BB. Rename all uses of I that are outside
2063  // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
2064  // with the two values we know.
2065  SSAUpdate.Initialize(I.getType(), I.getName());
2066  SSAUpdate.AddAvailableValue(BB, &I);
2067  SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
2068 
2069  while (!UsesToRename.empty())
2070  SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
2071  LLVM_DEBUG(dbgs() << "\n");
2072  }
2073 }
2074 
2075 /// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2076 /// arguments that come from PredBB. Return the map from the variables in the
2077 /// source basic block to the variables in the newly created basic block.
2080  BasicBlock::iterator BE, BasicBlock *NewBB,
2081  BasicBlock *PredBB) {
2082  // We are going to have to map operands from the source basic block to the new
2083  // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2084  // block, evaluate them to account for entry from PredBB.
2085  DenseMap<Instruction *, Value *> ValueMapping;
2086 
2087  // Clone the phi nodes of the source basic block into NewBB. The resulting
2088  // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2089  // might need to rewrite the operand of the cloned phi.
2090  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2091  PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2092  NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2093  ValueMapping[PN] = NewPN;
2094  }
2095 
2096  // Clone noalias scope declarations in the threaded block. When threading a
2097  // loop exit, we would otherwise end up with two idential scope declarations
2098  // visible at the same time.
2099  SmallVector<MDNode *> NoAliasScopes;
2100  DenseMap<MDNode *, MDNode *> ClonedScopes;
2101  LLVMContext &Context = PredBB->getContext();
2102  identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2103  cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2104 
2105  // Clone the non-phi instructions of the source basic block into NewBB,
2106  // keeping track of the mapping and using it to remap operands in the cloned
2107  // instructions.
2108  for (; BI != BE; ++BI) {
2109  Instruction *New = BI->clone();
2110  New->setName(BI->getName());
2111  NewBB->getInstList().push_back(New);
2112  ValueMapping[&*BI] = New;
2113  adaptNoAliasScopes(New, ClonedScopes, Context);
2114 
2115  // Remap operands to patch up intra-block references.
2116  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2117  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2118  DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
2119  if (I != ValueMapping.end())
2120  New->setOperand(i, I->second);
2121  }
2122  }
2123 
2124  return ValueMapping;
2125 }
2126 
2127 /// Attempt to thread through two successive basic blocks.
2129  Value *Cond) {
2130  // Consider:
2131  //
2132  // PredBB:
2133  // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2134  // %tobool = icmp eq i32 %cond, 0
2135  // br i1 %tobool, label %BB, label ...
2136  //
2137  // BB:
2138  // %cmp = icmp eq i32* %var, null
2139  // br i1 %cmp, label ..., label ...
2140  //
2141  // We don't know the value of %var at BB even if we know which incoming edge
2142  // we take to BB. However, once we duplicate PredBB for each of its incoming
2143  // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2144  // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2145 
2146  // Require that BB end with a Branch for simplicity.
2147  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2148  if (!CondBr)
2149  return false;
2150 
2151  // BB must have exactly one predecessor.
2152  BasicBlock *PredBB = BB->getSinglePredecessor();
2153  if (!PredBB)
2154  return false;
2155 
2156  // Require that PredBB end with a conditional Branch. If PredBB ends with an
2157  // unconditional branch, we should be merging PredBB and BB instead. For
2158  // simplicity, we don't deal with a switch.
2159  BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2160  if (!PredBBBranch || PredBBBranch->isUnconditional())
2161  return false;
2162 
2163  // If PredBB has exactly one incoming edge, we don't gain anything by copying
2164  // PredBB.
2165  if (PredBB->getSinglePredecessor())
2166  return false;
2167 
2168  // Don't thread through PredBB if it contains a successor edge to itself, in
2169  // which case we would infinite loop. Suppose we are threading an edge from
2170  // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2171  // successor edge to itself. If we allowed jump threading in this case, we
2172  // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2173  // PredBB.thread has a successor edge to PredBB, we would immediately come up
2174  // with another jump threading opportunity from PredBB.thread through PredBB
2175  // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2176  // would keep peeling one iteration from PredBB.
2177  if (llvm::is_contained(successors(PredBB), PredBB))
2178  return false;
2179 
2180  // Don't thread across a loop header.
2181  if (LoopHeaders.count(PredBB))
2182  return false;
2183 
2184  // Avoid complication with duplicating EH pads.
2185  if (PredBB->isEHPad())
2186  return false;
2187 
2188  // Find a predecessor that we can thread. For simplicity, we only consider a
2189  // successor edge out of BB to which we thread exactly one incoming edge into
2190  // PredBB.
2191  unsigned ZeroCount = 0;
2192  unsigned OneCount = 0;
2193  BasicBlock *ZeroPred = nullptr;
2194  BasicBlock *OnePred = nullptr;
2195  for (BasicBlock *P : predecessors(PredBB)) {
2196  // If PredPred ends with IndirectBrInst, we can't handle it.
2197  if (isa<IndirectBrInst>(P->getTerminator()))
2198  continue;
2199  if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2200  evaluateOnPredecessorEdge(BB, P, Cond))) {
2201  if (CI->isZero()) {
2202  ZeroCount++;
2203  ZeroPred = P;
2204  } else if (CI->isOne()) {
2205  OneCount++;
2206  OnePred = P;
2207  }
2208  }
2209  }
2210 
2211  // Disregard complicated cases where we have to thread multiple edges.
2212  BasicBlock *PredPredBB;
2213  if (ZeroCount == 1) {
2214  PredPredBB = ZeroPred;
2215  } else if (OneCount == 1) {
2216  PredPredBB = OnePred;
2217  } else {
2218  return false;
2219  }
2220 
2221  BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2222 
2223  // If threading to the same block as we come from, we would infinite loop.
2224  if (SuccBB == BB) {
2225  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2226  << "' - would thread to self!\n");
2227  return false;
2228  }
2229 
2230  // If threading this would thread across a loop header, don't thread the edge.
2231  // See the comments above findLoopHeaders for justifications and caveats.
2232  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2233  LLVM_DEBUG({
2234  bool BBIsHeader = LoopHeaders.count(BB);
2235  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2236  dbgs() << " Not threading across "
2237  << (BBIsHeader ? "loop header BB '" : "block BB '")
2238  << BB->getName() << "' to dest "
2239  << (SuccIsHeader ? "loop header BB '" : "block BB '")
2240  << SuccBB->getName()
2241  << "' - it might create an irreducible loop!\n";
2242  });
2243  return false;
2244  }
2245 
2246  // Compute the cost of duplicating BB and PredBB.
2247  unsigned BBCost = getJumpThreadDuplicationCost(
2248  TTI, BB, BB->getTerminator(), BBDupThreshold);
2249  unsigned PredBBCost = getJumpThreadDuplicationCost(
2250  TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2251 
2252  // Give up if costs are too high. We need to check BBCost and PredBBCost
2253  // individually before checking their sum because getJumpThreadDuplicationCost
2254  // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2255  if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2256  BBCost + PredBBCost > BBDupThreshold) {
2257  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2258  << "' - Cost is too high: " << PredBBCost
2259  << " for PredBB, " << BBCost << "for BB\n");
2260  return false;
2261  }
2262 
2263  // Now we are ready to duplicate PredBB.
2264  threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2265  return true;
2266 }
2267 
2269  BasicBlock *PredBB,
2270  BasicBlock *BB,
2271  BasicBlock *SuccBB) {
2272  LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2273  << BB->getName() << "'\n");
2274 
2275  BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2276  BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2277 
2278  BasicBlock *NewBB =
2279  BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2280  PredBB->getParent(), PredBB);
2281  NewBB->moveAfter(PredBB);
2282 
2283  // Set the block frequency of NewBB.
2284  if (HasProfileData) {
2285  auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2286  BPI->getEdgeProbability(PredPredBB, PredBB);
2287  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2288  }
2289 
2290  // We are going to have to map operands from the original BB block to the new
2291  // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2292  // to account for entry from PredPredBB.
2293  DenseMap<Instruction *, Value *> ValueMapping =
2294  cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2295 
2296  // Copy the edge probabilities from PredBB to NewBB.
2297  if (HasProfileData)
2298  BPI->copyEdgeProbabilities(PredBB, NewBB);
2299 
2300  // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2301  // This eliminates predecessors from PredPredBB, which requires us to simplify
2302  // any PHI nodes in PredBB.
2303  Instruction *PredPredTerm = PredPredBB->getTerminator();
2304  for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2305  if (PredPredTerm->getSuccessor(i) == PredBB) {
2306  PredBB->removePredecessor(PredPredBB, true);
2307  PredPredTerm->setSuccessor(i, NewBB);
2308  }
2309 
2310  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2311  ValueMapping);
2312  addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2313  ValueMapping);
2314 
2315  DTU->applyUpdatesPermissive(
2316  {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2317  {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2318  {DominatorTree::Insert, PredPredBB, NewBB},
2319  {DominatorTree::Delete, PredPredBB, PredBB}});
2320 
2321  updateSSA(PredBB, NewBB, ValueMapping);
2322 
2323  // Clean up things like PHI nodes with single operands, dead instructions,
2324  // etc.
2325  SimplifyInstructionsInBlock(NewBB, TLI);
2326  SimplifyInstructionsInBlock(PredBB, TLI);
2327 
2328  SmallVector<BasicBlock *, 1> PredsToFactor;
2329  PredsToFactor.push_back(NewBB);
2330  threadEdge(BB, PredsToFactor, SuccBB);
2331 }
2332 
2333 /// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2335  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2336  BasicBlock *SuccBB) {
2337  // If threading to the same block as we come from, we would infinite loop.
2338  if (SuccBB == BB) {
2339  LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2340  << "' - would thread to self!\n");
2341  return false;
2342  }
2343 
2344  // If threading this would thread across a loop header, don't thread the edge.
2345  // See the comments above findLoopHeaders for justifications and caveats.
2346  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2347  LLVM_DEBUG({
2348  bool BBIsHeader = LoopHeaders.count(BB);
2349  bool SuccIsHeader = LoopHeaders.count(SuccBB);
2350  dbgs() << " Not threading across "
2351  << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2352  << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2353  << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2354  });
2355  return false;
2356  }
2357 
2358  unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2359  TTI, BB, BB->getTerminator(), BBDupThreshold);
2360  if (JumpThreadCost > BBDupThreshold) {
2361  LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2362  << "' - Cost is too high: " << JumpThreadCost << "\n");
2363  return false;
2364  }
2365 
2366  threadEdge(BB, PredBBs, SuccBB);
2367  return true;
2368 }
2369 
2370 /// threadEdge - We have decided that it is safe and profitable to factor the
2371 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2372 /// across BB. Transform the IR to reflect this change.
2374  const SmallVectorImpl<BasicBlock *> &PredBBs,
2375  BasicBlock *SuccBB) {
2376  assert(SuccBB != BB && "Don't create an infinite loop");
2377 
2378  assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2379  "Don't thread across loop headers");
2380 
2381  // And finally, do it! Start by factoring the predecessors if needed.
2382  BasicBlock *PredBB;
2383  if (PredBBs.size() == 1)
2384  PredBB = PredBBs[0];
2385  else {
2386  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2387  << " common predecessors.\n");
2388  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2389  }
2390 
2391  // And finally, do it!
2392  LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2393  << "' to '" << SuccBB->getName()
2394  << ", across block:\n " << *BB << "\n");
2395 
2396  LVI->threadEdge(PredBB, BB, SuccBB);
2397 
2398  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
2399  BB->getName()+".thread",
2400  BB->getParent(), BB);
2401  NewBB->moveAfter(PredBB);
2402 
2403  // Set the block frequency of NewBB.
2404  if (HasProfileData) {
2405  auto NewBBFreq =
2406  BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2407  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2408  }
2409 
2410  // Copy all the instructions from BB to NewBB except the terminator.
2411  DenseMap<Instruction *, Value *> ValueMapping =
2412  cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
2413 
2414  // We didn't copy the terminator from BB over to NewBB, because there is now
2415  // an unconditional jump to SuccBB. Insert the unconditional jump.
2416  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2417  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2418 
2419  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2420  // PHI nodes for NewBB now.
2421  addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2422 
2423  // Update the terminator of PredBB to jump to NewBB instead of BB. This
2424  // eliminates predecessors from BB, which requires us to simplify any PHI
2425  // nodes in BB.
2426  Instruction *PredTerm = PredBB->getTerminator();
2427  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2428  if (PredTerm->getSuccessor(i) == BB) {
2429  BB->removePredecessor(PredBB, true);
2430  PredTerm->setSuccessor(i, NewBB);
2431  }
2432 
2433  // Enqueue required DT updates.
2434  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2435  {DominatorTree::Insert, PredBB, NewBB},
2436  {DominatorTree::Delete, PredBB, BB}});
2437 
2438  updateSSA(BB, NewBB, ValueMapping);
2439 
2440  // At this point, the IR is fully up to date and consistent. Do a quick scan
2441  // over the new instructions and zap any that are constants or dead. This
2442  // frequently happens because of phi translation.
2443  SimplifyInstructionsInBlock(NewBB, TLI);
2444 
2445  // Update the edge weight from BB to SuccBB, which should be less than before.
2446  updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
2447 
2448  // Threaded an edge!
2449  ++NumThreads;
2450 }
2451 
2452 /// Create a new basic block that will be the predecessor of BB and successor of
2453 /// all blocks in Preds. When profile data is available, update the frequency of
2454 /// this new block.
2455 BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2456  ArrayRef<BasicBlock *> Preds,
2457  const char *Suffix) {
2459 
2460  // Collect the frequencies of all predecessors of BB, which will be used to
2461  // update the edge weight of the result of splitting predecessors.
2463  if (HasProfileData)
2464  for (auto *Pred : Preds)
2465  FreqMap.insert(std::make_pair(
2466  Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2467 
2468  // In the case when BB is a LandingPad block we create 2 new predecessors
2469  // instead of just one.
2470  if (BB->isLandingPad()) {
2471  std::string NewName = std::string(Suffix) + ".split-lp";
2472  SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2473  } else {
2474  NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2475  }
2476 
2477  std::vector<DominatorTree::UpdateType> Updates;
2478  Updates.reserve((2 * Preds.size()) + NewBBs.size());
2479  for (auto *NewBB : NewBBs) {
2480  BlockFrequency NewBBFreq(0);
2481  Updates.push_back({DominatorTree::Insert, NewBB, BB});
2482  for (auto *Pred : predecessors(NewBB)) {
2483  Updates.push_back({DominatorTree::Delete, Pred, BB});
2484  Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2485  if (HasProfileData) // Update frequencies between Pred -> NewBB.
2486  NewBBFreq += FreqMap.lookup(Pred);
2487  }
2488  if (HasProfileData) // Apply the summed frequency to NewBB.
2489  BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2490  }
2491 
2492  DTU->applyUpdatesPermissive(Updates);
2493  return NewBBs[0];
2494 }
2495 
2496 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2497  const Instruction *TI = BB->getTerminator();
2498  assert(TI->getNumSuccessors() > 1 && "not a split");
2499 
2500  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
2501  if (!WeightsNode)
2502  return false;
2503 
2504  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
2505  if (MDName->getString() != "branch_weights")
2506  return false;
2507 
2508  // Ensure there are weights for all of the successors. Note that the first
2509  // operand to the metadata node is a name, not a weight.
2510  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
2511 }
2512 
2513 /// Update the block frequency of BB and branch weight and the metadata on the
2514 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2515 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
2516 void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2517  BasicBlock *BB,
2518  BasicBlock *NewBB,
2519  BasicBlock *SuccBB) {
2520  if (!HasProfileData)
2521  return;
2522 
2523  assert(BFI && BPI && "BFI & BPI should have been created here");
2524 
2525  // As the edge from PredBB to BB is deleted, we have to update the block
2526  // frequency of BB.
2527  auto BBOrigFreq = BFI->getBlockFreq(BB);
2528  auto NewBBFreq = BFI->getBlockFreq(NewBB);
2529  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2530  auto BBNewFreq = BBOrigFreq - NewBBFreq;
2531  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2532 
2533  // Collect updated outgoing edges' frequencies from BB and use them to update
2534  // edge probabilities.
2535  SmallVector<uint64_t, 4> BBSuccFreq;
2536  for (BasicBlock *Succ : successors(BB)) {
2537  auto SuccFreq = (Succ == SuccBB)
2538  ? BB2SuccBBFreq - NewBBFreq
2539  : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2540  BBSuccFreq.push_back(SuccFreq.getFrequency());
2541  }
2542 
2543  uint64_t MaxBBSuccFreq =
2544  *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2545 
2547  if (MaxBBSuccFreq == 0)
2548  BBSuccProbs.assign(BBSuccFreq.size(),
2549  {1, static_cast<unsigned>(BBSuccFreq.size())});
2550  else {
2551  for (uint64_t Freq : BBSuccFreq)
2552  BBSuccProbs.push_back(
2553  BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2554  // Normalize edge probabilities so that they sum up to one.
2555  BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
2556  BBSuccProbs.end());
2557  }
2558 
2559  // Update edge probabilities in BPI.
2560  BPI->setEdgeProbability(BB, BBSuccProbs);
2561 
2562  // Update the profile metadata as well.
2563  //
2564  // Don't do this if the profile of the transformed blocks was statically
2565  // estimated. (This could occur despite the function having an entry
2566  // frequency in completely cold parts of the CFG.)
2567  //
2568  // In this case we don't want to suggest to subsequent passes that the
2569  // calculated weights are fully consistent. Consider this graph:
2570  //
2571  // check_1
2572  // 50% / |
2573  // eq_1 | 50%
2574  // \ |
2575  // check_2
2576  // 50% / |
2577  // eq_2 | 50%
2578  // \ |
2579  // check_3
2580  // 50% / |
2581  // eq_3 | 50%
2582  // \ |
2583  //
2584  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2585  // the overall probabilities are inconsistent; the total probability that the
2586  // value is either 1, 2 or 3 is 150%.
2587  //
2588  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2589  // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2590  // the loop exit edge. Then based solely on static estimation we would assume
2591  // the loop was extremely hot.
2592  //
2593  // FIXME this locally as well so that BPI and BFI are consistent as well. We
2594  // shouldn't make edges extremely likely or unlikely based solely on static
2595  // estimation.
2596  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
2597  SmallVector<uint32_t, 4> Weights;
2598  for (auto Prob : BBSuccProbs)
2599  Weights.push_back(Prob.getNumerator());
2600 
2601  auto TI = BB->getTerminator();
2602  TI->setMetadata(
2603  LLVMContext::MD_prof,
2604  MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
2605  }
2606 }
2607 
2608 /// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2609 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2610 /// If we can duplicate the contents of BB up into PredBB do so now, this
2611 /// improves the odds that the branch will be on an analyzable instruction like
2612 /// a compare.
2614  BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2615  assert(!PredBBs.empty() && "Can't handle an empty set");
2616 
2617  // If BB is a loop header, then duplicating this block outside the loop would
2618  // cause us to transform this into an irreducible loop, don't do this.
2619  // See the comments above findLoopHeaders for justifications and caveats.
2620  if (LoopHeaders.count(BB)) {
2621  LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2622  << "' into predecessor block '" << PredBBs[0]->getName()
2623  << "' - it might create an irreducible loop!\n");
2624  return false;
2625  }
2626 
2627  unsigned DuplicationCost = getJumpThreadDuplicationCost(
2628  TTI, BB, BB->getTerminator(), BBDupThreshold);
2629  if (DuplicationCost > BBDupThreshold) {
2630  LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2631  << "' - Cost is too high: " << DuplicationCost << "\n");
2632  return false;
2633  }
2634 
2635  // And finally, do it! Start by factoring the predecessors if needed.
2636  std::vector<DominatorTree::UpdateType> Updates;
2637  BasicBlock *PredBB;
2638  if (PredBBs.size() == 1)
2639  PredBB = PredBBs[0];
2640  else {
2641  LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2642  << " common predecessors.\n");
2643  PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2644  }
2645  Updates.push_back({DominatorTree::Delete, PredBB, BB});
2646 
2647  // Okay, we decided to do this! Clone all the instructions in BB onto the end
2648  // of PredBB.
2649  LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2650  << "' into end of '" << PredBB->getName()
2651  << "' to eliminate branch on phi. Cost: "
2652  << DuplicationCost << " block is:" << *BB << "\n");
2653 
2654  // Unless PredBB ends with an unconditional branch, split the edge so that we
2655  // can just clone the bits from BB into the end of the new PredBB.
2656  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2657 
2658  if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2659  BasicBlock *OldPredBB = PredBB;
2660  PredBB = SplitEdge(OldPredBB, BB);
2661  Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2662  Updates.push_back({DominatorTree::Insert, PredBB, BB});
2663  Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2664  OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2665  }
2666 
2667  // We are going to have to map operands from the original BB block into the
2668  // PredBB block. Evaluate PHI nodes in BB.
2669  DenseMap<Instruction*, Value*> ValueMapping;
2670 
2671  BasicBlock::iterator BI = BB->begin();
2672  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2673  ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2674  // Clone the non-phi instructions of BB into PredBB, keeping track of the
2675  // mapping and using it to remap operands in the cloned instructions.
2676  for (; BI != BB->end(); ++BI) {
2677  Instruction *New = BI->clone();
2678 
2679  // Remap operands to patch up intra-block references.
2680  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2681  if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2682  DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2683  if (I != ValueMapping.end())
2684  New->setOperand(i, I->second);
2685  }
2686 
2687  // If this instruction can be simplified after the operands are updated,
2688  // just use the simplified value instead. This frequently happens due to
2689  // phi translation.
2690  if (Value *IV = simplifyInstruction(
2691  New,
2692  {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2693  ValueMapping[&*BI] = IV;
2694  if (!New->mayHaveSideEffects()) {
2695  New->deleteValue();
2696  New = nullptr;
2697  }
2698  } else {
2699  ValueMapping[&*BI] = New;
2700  }
2701  if (New) {
2702  // Otherwise, insert the new instruction into the block.
2703  New->setName(BI->getName());
2704  PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
2705  // Update Dominance from simplified New instruction operands.
2706  for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2707  if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2708  Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2709  }
2710  }
2711 
2712  // Check to see if the targets of the branch had PHI nodes. If so, we need to
2713  // add entries to the PHI nodes for branch from PredBB now.
2714  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2715  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2716  ValueMapping);
2717  addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2718  ValueMapping);
2719 
2720  updateSSA(BB, PredBB, ValueMapping);
2721 
2722  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2723  // that we nuked.
2724  BB->removePredecessor(PredBB, true);
2725 
2726  // Remove the unconditional branch at the end of the PredBB block.
2727  OldPredBranch->eraseFromParent();
2728  if (HasProfileData)
2729  BPI->copyEdgeProbabilities(BB, PredBB);
2730  DTU->applyUpdatesPermissive(Updates);
2731 
2732  ++NumDupes;
2733  return true;
2734 }
2735 
2736 // Pred is a predecessor of BB with an unconditional branch to BB. SI is
2737 // a Select instruction in Pred. BB has other predecessors and SI is used in
2738 // a PHI node in BB. SI has no other use.
2739 // A new basic block, NewBB, is created and SI is converted to compare and
2740 // conditional branch. SI is erased from parent.
2742  SelectInst *SI, PHINode *SIUse,
2743  unsigned Idx) {
2744  // Expand the select.
2745  //
2746  // Pred --
2747  // | v
2748  // | NewBB
2749  // | |
2750  // |-----
2751  // v
2752  // BB
2753  BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2754  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2755  BB->getParent(), BB);
2756  // Move the unconditional branch to NewBB.
2757  PredTerm->removeFromParent();
2758  NewBB->getInstList().insert(NewBB->end(), PredTerm);
2759  // Create a conditional branch and update PHI nodes.
2760  auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2761  BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2762  SIUse->setIncomingValue(Idx, SI->getFalseValue());
2763  SIUse->addIncoming(SI->getTrueValue(), NewBB);
2764 
2765  // The select is now dead.
2766  SI->eraseFromParent();
2767  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2768  {DominatorTree::Insert, Pred, NewBB}});
2769 
2770  // Update any other PHI nodes in BB.
2771  for (BasicBlock::iterator BI = BB->begin();
2772  PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2773  if (Phi != SIUse)
2774  Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2775 }
2776 
2778  PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2779 
2780  if (!CondPHI || CondPHI->getParent() != BB)
2781  return false;
2782 
2783  for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2784  BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2785  SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2786 
2787  // The second and third condition can be potentially relaxed. Currently
2788  // the conditions help to simplify the code and allow us to reuse existing
2789  // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2790  if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2791  continue;
2792 
2793  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2794  if (!PredTerm || !PredTerm->isUnconditional())
2795  continue;
2796 
2797  unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2798  return true;
2799  }
2800  return false;
2801 }
2802 
2803 /// tryToUnfoldSelect - Look for blocks of the form
2804 /// bb1:
2805 /// %a = select
2806 /// br bb2
2807 ///
2808 /// bb2:
2809 /// %p = phi [%a, %bb1] ...
2810 /// %c = icmp %p
2811 /// br i1 %c
2812 ///
2813 /// And expand the select into a branch structure if one of its arms allows %c
2814 /// to be folded. This later enables threading from bb1 over bb2.
2816  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2817  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2818  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2819 
2820  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2821  CondLHS->getParent() != BB)
2822  return false;
2823 
2824  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2825  BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2826  SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2827 
2828  // Look if one of the incoming values is a select in the corresponding
2829  // predecessor.
2830  if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2831  continue;
2832 
2833  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2834  if (!PredTerm || !PredTerm->isUnconditional())
2835  continue;
2836 
2837  // Now check if one of the select values would allow us to constant fold the
2838  // terminator in BB. We don't do the transform if both sides fold, those
2839  // cases will be threaded in any case.
2840  LazyValueInfo::Tristate LHSFolds =
2841  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2842  CondRHS, Pred, BB, CondCmp);
2843  LazyValueInfo::Tristate RHSFolds =
2844  LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2845  CondRHS, Pred, BB, CondCmp);
2846  if ((LHSFolds != LazyValueInfo::Unknown ||
2847  RHSFolds != LazyValueInfo::Unknown) &&
2848  LHSFolds != RHSFolds) {
2849  unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2850  return true;
2851  }
2852  }
2853  return false;
2854 }
2855 
2856 /// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2857 /// same BB in the form
2858 /// bb:
2859 /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2860 /// %s = select %p, trueval, falseval
2861 ///
2862 /// or
2863 ///
2864 /// bb:
2865 /// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2866 /// %c = cmp %p, 0
2867 /// %s = select %c, trueval, falseval
2868 ///
2869 /// And expand the select into a branch structure. This later enables
2870 /// jump-threading over bb in this pass.
2871 ///
2872 /// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2873 /// select if the associated PHI has at least one constant. If the unfolded
2874 /// select is not jump-threaded, it will be folded again in the later
2875 /// optimizations.
2877  // This transform would reduce the quality of msan diagnostics.
2878  // Disable this transform under MemorySanitizer.
2879  if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2880  return false;
2881 
2882  // If threading this would thread across a loop header, don't thread the edge.
2883  // See the comments above findLoopHeaders for justifications and caveats.
2884  if (LoopHeaders.count(BB))
2885  return false;
2886 
2887  for (BasicBlock::iterator BI = BB->begin();
2888  PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2889  // Look for a Phi having at least one constant incoming value.
2890  if (llvm::all_of(PN->incoming_values(),
2891  [](Value *V) { return !isa<ConstantInt>(V); }))
2892  continue;
2893 
2894  auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2895  using namespace PatternMatch;
2896 
2897  // Check if SI is in BB and use V as condition.
2898  if (SI->getParent() != BB)
2899  return false;
2900  Value *Cond = SI->getCondition();
2901  bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2902  return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2903  };
2904 
2905  SelectInst *SI = nullptr;
2906  for (Use &U : PN->uses()) {
2907  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2908  // Look for a ICmp in BB that compares PN with a constant and is the
2909  // condition of a Select.
2910  if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2911  isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2912  if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2913  if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2914  SI = SelectI;
2915  break;
2916  }
2917  } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2918  // Look for a Select in BB that uses PN as condition.
2919  if (isUnfoldCandidate(SelectI, U.get())) {
2920  SI = SelectI;
2921  break;
2922  }
2923  }
2924  }
2925 
2926  if (!SI)
2927  continue;
2928  // Expand the select.
2929  Value *Cond = SI->getCondition();
2930  if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2931  Cond = new FreezeInst(Cond, "cond.fr", SI);
2933  BasicBlock *SplitBB = SI->getParent();
2934  BasicBlock *NewBB = Term->getParent();
2935  PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2936  NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2937  NewPN->addIncoming(SI->getFalseValue(), BB);
2938  SI->replaceAllUsesWith(NewPN);
2939  SI->eraseFromParent();
2940  // NewBB and SplitBB are newly created blocks which require insertion.
2941  std::vector<DominatorTree::UpdateType> Updates;
2942  Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2943  Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2944  Updates.push_back({DominatorTree::Insert, BB, NewBB});
2945  Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2946  // BB's successors were moved to SplitBB, update DTU accordingly.
2947  for (auto *Succ : successors(SplitBB)) {
2948  Updates.push_back({DominatorTree::Delete, BB, Succ});
2949  Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2950  }
2951  DTU->applyUpdatesPermissive(Updates);
2952  return true;
2953  }
2954  return false;
2955 }
2956 
2957 /// Try to propagate a guard from the current BB into one of its predecessors
2958 /// in case if another branch of execution implies that the condition of this
2959 /// guard is always true. Currently we only process the simplest case that
2960 /// looks like:
2961 ///
2962 /// Start:
2963 /// %cond = ...
2964 /// br i1 %cond, label %T1, label %F1
2965 /// T1:
2966 /// br label %Merge
2967 /// F1:
2968 /// br label %Merge
2969 /// Merge:
2970 /// %condGuard = ...
2971 /// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2972 ///
2973 /// And cond either implies condGuard or !condGuard. In this case all the
2974 /// instructions before the guard can be duplicated in both branches, and the
2975 /// guard is then threaded to one of them.
2977  using namespace PatternMatch;
2978 
2979  // We only want to deal with two predecessors.
2980  BasicBlock *Pred1, *Pred2;
2981  auto PI = pred_begin(BB), PE = pred_end(BB);
2982  if (PI == PE)
2983  return false;
2984  Pred1 = *PI++;
2985  if (PI == PE)
2986  return false;
2987  Pred2 = *PI++;
2988  if (PI != PE)
2989  return false;
2990  if (Pred1 == Pred2)
2991  return false;
2992 
2993  // Try to thread one of the guards of the block.
2994  // TODO: Look up deeper than to immediate predecessor?
2995  auto *Parent = Pred1->getSinglePredecessor();
2996  if (!Parent || Parent != Pred2->getSinglePredecessor())
2997  return false;
2998 
2999  if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3000  for (auto &I : *BB)
3001  if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3002  return true;
3003 
3004  return false;
3005 }
3006 
3007 /// Try to propagate the guard from BB which is the lower block of a diamond
3008 /// to one of its branches, in case if diamond's condition implies guard's
3009 /// condition.
3011  BranchInst *BI) {
3012  assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3013  assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3014  Value *GuardCond = Guard->getArgOperand(0);
3015  Value *BranchCond = BI->getCondition();
3016  BasicBlock *TrueDest = BI->getSuccessor(0);
3017  BasicBlock *FalseDest = BI->getSuccessor(1);
3018 
3019  auto &DL = BB->getModule()->getDataLayout();
3020  bool TrueDestIsSafe = false;
3021  bool FalseDestIsSafe = false;
3022 
3023  // True dest is safe if BranchCond => GuardCond.
3024  auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3025  if (Impl && *Impl)
3026  TrueDestIsSafe = true;
3027  else {
3028  // False dest is safe if !BranchCond => GuardCond.
3029  Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3030  if (Impl && *Impl)
3031  FalseDestIsSafe = true;
3032  }
3033 
3034  if (!TrueDestIsSafe && !FalseDestIsSafe)
3035  return false;
3036 
3037  BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3038  BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3039 
3040  ValueToValueMapTy UnguardedMapping, GuardedMapping;
3041  Instruction *AfterGuard = Guard->getNextNode();
3042  unsigned Cost =
3043  getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3044  if (Cost > BBDupThreshold)
3045  return false;
3046  // Duplicate all instructions before the guard and the guard itself to the
3047  // branch where implication is not proved.
3049  BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3050  assert(GuardedBlock && "Could not create the guarded block?");
3051  // Duplicate all instructions before the guard in the unguarded branch.
3052  // Since we have successfully duplicated the guarded block and this block
3053  // has fewer instructions, we expect it to succeed.
3055  BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3056  assert(UnguardedBlock && "Could not create the unguarded block?");
3057  LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3058  << GuardedBlock->getName() << "\n");
3059  // Some instructions before the guard may still have uses. For them, we need
3060  // to create Phi nodes merging their copies in both guarded and unguarded
3061  // branches. Those instructions that have no uses can be just removed.
3063  for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3064  if (!isa<PHINode>(&*BI))
3065  ToRemove.push_back(&*BI);
3066 
3067  Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
3068  assert(InsertionPoint && "Empty block?");
3069  // Substitute with Phis & remove.
3070  for (auto *Inst : reverse(ToRemove)) {
3071  if (!Inst->use_empty()) {
3072  PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3073  NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3074  NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3075  NewPN->insertBefore(InsertionPoint);
3076  Inst->replaceAllUsesWith(NewPN);
3077  }
3078  Inst->eraseFromParent();
3079  }
3080  return true;
3081 }
i
i
Definition: README.txt:29
llvm::SSAUpdater::Initialize
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:52
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::array_pod_sort
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1642
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:876
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:308
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2592
llvm::less_second
Function object to check whether the second component of a std::pair compares less than the second co...
Definition: STLExtras.h:1484
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:814
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
getBestDestForJumpOnUndef
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
Definition: JumpThreading.cpp:1017
llvm::SplitLandingPadPredecessors
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition: BasicBlockUtils.cpp:1374
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:225
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::ConstantExpr::getNot
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2624
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
intptr_t
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::JumpThreadingPass::unfoldSelectInstr
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
Definition: JumpThreading.cpp:2741
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::FindFunctionBackedges
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
Scalar.h
llvm::JumpThreadingPass::findLoopHeaders
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
Definition: JumpThreading.cpp:620
replaceFoldableUses
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
Definition: JumpThreading.cpp:491
Loads.h
T
llvm::Function
Definition: Function.h:60
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1922
llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition: BranchProbability.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::isImpliedCondition
Optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
Definition: ValueTracking.cpp:6831
PhiDuplicateThreshold
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
llvm::JumpThreadingPass::processBranchOnXOR
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
Definition: JumpThreading.cpp:1847
hasAddressTakenAndUsed
static bool hasAddressTakenAndUsed(BasicBlock *BB)
Definition: JumpThreading.cpp:1035
ImplicationSearchThreshold
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5440
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::LazyValueAnalysis
Analysis to compute lazy value information.
Definition: LazyValueInfo.h:134
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::pred_size
unsigned pred_size(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::jumpthreading::WantBlockAddress
@ WantBlockAddress
Definition: JumpThreading.h:58
llvm::replaceNonLocalUsesWith
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:2816
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:547
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:942
MapVector.h
DomTreeUpdater.h
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1997
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:87
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
getJumpThreadDuplicationCost
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
Definition: JumpThreading.cpp:521
isOpDefinedInBlock
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
Definition: JumpThreading.cpp:1312
llvm::jumpthreading::ConstantPreference
ConstantPreference
Definition: JumpThreading.h:58
llvm::JumpThreadingPass::maybethreadThroughTwoBasicBlocks
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
Definition: JumpThreading.cpp:2128
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:652
llvm::DenseMapIterator
Definition: DenseMap.h:57
BlockFrequency.h
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
Module.h
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
llvm::JumpThreadingPass::evaluateOnPredecessorEdge
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond)
Definition: JumpThreading.cpp:1595
JumpThreading.h
llvm::Optional< bool >
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::BranchInst::getNumSuccessors
unsigned getNumSuccessors() const
Definition: Instructions.h:3223
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::JumpThreadingPass::processImpliedCondition
bool processImpliedCondition(BasicBlock *BB)
Definition: JumpThreading.cpp:1246
LazyValueInfo.h
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:285
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:217
llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:2796
ConstantFolding.h
llvm::JumpThreadingPass::tryThreadEdge
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
Definition: JumpThreading.cpp:2334
llvm::Instruction::isExceptionalTerminator
bool isExceptionalTerminator() const
Definition: Instruction.h:179
Use.h
llvm::combineMetadataForCSE
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2702
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1456
llvm::JumpThreadingPass::duplicateCondBranchOnPHIIntoPred
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
Definition: JumpThreading.cpp:2613
llvm::DomTreeUpdater::UpdateStrategy::Lazy
@ Lazy
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
getKnownConstant
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
Definition: JumpThreading.cpp:633
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
AliasAnalysis.h
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::JumpThreadingPass::threadEdge
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
Definition: JumpThreading.cpp:2373
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:169
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::MapVector::begin
iterator begin()
Definition: MapVector.h:70
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:710
BBDuplicateThreshold
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::LazyValueInfo::Unknown
@ Unknown
Definition: LazyValueInfo.h:61
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2793
llvm::AAResults
Definition: AliasAnalysis.h:294
InlinePriorityMode::Cost
@ Cost
llvm::initializeJumpThreadingPass
void initializeJumpThreadingPass(PassRegistry &)
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ConstantFoldBinaryOpOperands
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
Definition: ConstantFolding.cpp:1339
llvm::User
Definition: User.h:44
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Instruction::isAtomic
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:653
InstrTypes.h
llvm::JumpThreadingPass::releaseMemory
void releaseMemory()
Definition: JumpThreading.h:107
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:238
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:113
llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
llvm::jumpthreading::WantInteger
@ WantInteger
Definition: JumpThreading.h:58
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::LazyValueInfo::True
@ True
Definition: LazyValueInfo.h:61
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2882
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
MDBuilder.h
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:364
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
Threading
jump Jump Threading
Definition: JumpThreading.cpp:173
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:102
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
threading
jump threading
Definition: JumpThreading.cpp:172
SmallPtrSet.h
llvm::Instruction::getAAMetadata
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1500
llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1254
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:77
llvm::ConstantRange::add
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
Definition: ConstantRange.cpp:989
PatternMatch.h
llvm::RemoveRedundantDbgInstrs
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
Definition: BasicBlockUtils.cpp:520
llvm::JumpThreadingPass::tryToUnfoldSelect
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
Definition: JumpThreading.cpp:2815
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2789
llvm::JumpThreadingPass::tryToUnfoldSelectInCurrBB
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
Definition: JumpThreading.cpp:2876
llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:236
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
BranchProbability.h
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:276
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3213
jump
The object format emitted by the WebAssembly backed is documented see the home and packaging for producing WebAssembly applications that can run in browsers and other environments wasi sdk provides a more minimal C C SDK based on llvm and a libc based on for producing WebAssemmbly applictions that use the WASI ABI Rust provides WebAssembly support integrated into Cargo There are two main which provides a relatively minimal environment that has an emphasis on being native wasm32 unknown which uses Emscripten internally and provides standard C C filesystem GL and SDL bindings For more and br_table instructions can support having a value on the value stack across the jump(sometimes). We should(a) model this
llvm::findAvailablePtrLoadStore
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:558
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::DenseSet< Value * >
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::DuplicateInstructionsInSplitBetween
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
Definition: CloneFunction.cpp:1026
llvm::TryToSimplifyUncondBranchFromEmptyBlock
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1055
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2392
llvm::ConstantRange::inverse
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
Definition: ConstantRange.cpp:1623
llvm::BlockFrequency
Definition: BlockFrequency.h:23
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:826
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
BranchProbabilityInfo.h
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:475
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2648
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2561
llvm::LazyValueInfoWrapperPass
Wrapper around LazyValueInfo.
Definition: LazyValueInfo.h:145
llvm::JumpThreadingPass::simplifyPartiallyRedundantLoad
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
Definition: JumpThreading.cpp:1323
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2847
findMostPopularDest
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
Definition: JumpThreading.cpp:1562
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
ProfDataUtils.h
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3188
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AArch64PACKey::IB
@ IB
Definition: AArch64BaseInfo.h:820
Cloning.h
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1356
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::adaptNoAliasScopes
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
Definition: CloneFunction.cpp:1096
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6600
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:356
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1946
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1735
llvm::cloneNoAliasScopes
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
Definition: CloneFunction.cpp:1071
llvm::LazyValueInfo
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:31
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3210
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::JumpThreadingPass::processGuards
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
Definition: JumpThreading.cpp:2976
CFG.h
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:244
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:849
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:396
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:702
llvm::BinaryOperator
Definition: InstrTypes.h:189
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::BasicBlock::moveAfter
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:141
DataLayout.h
llvm::JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
Definition: JumpThreading.cpp:1982
llvm::JumpThreadingPass::threadGuard
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
Definition: JumpThreading.cpp:3010
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::identifyNoAliasScopesToClone
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
Definition: CloneFunction.cpp:1167
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:838
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:222
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::JumpThreadingPass::threadThroughTwoBasicBlocks
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
Definition: JumpThreading.cpp:2268
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:805
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
ConstantRange.h
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
updatePredecessorProfileMetadata
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
Definition: JumpThreading.cpp:219
SSAUpdater.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:226
llvm::PredIterator
Definition: CFG.h:42
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::ValueMap
See the file comment.
Definition: ValueMap.h:85
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:318
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
addPHINodeEntriesForMappedBlock
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value * > &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
Definition: JumpThreading.cpp:1961
Constant.h
llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:597
llvm::ConstantFoldTerminator
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:126
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
PrintLVIAfterJumpThreading
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
llvm::JumpThreadingPass::processThreadableEdges
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
Definition: JumpThreading.cpp:1635
llvm::CastInst::CreateBitOrPointerCast
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Definition: Instructions.cpp:3496
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2739
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:841
llvm::pred_end
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5471
Casting.h
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1514
Function.h
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:428
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::JumpThreadingPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, std::unique_ptr< BlockFrequencyInfo > BFI, std::unique_ptr< BranchProbabilityInfo > BPI)
Definition: JumpThreading.cpp:376
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:449
runImpl
static bool runImpl(Function &F, const TargetLowering &TLI)
Definition: ExpandLargeDivRem.cpp:56
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_END(JumpThreading
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3674
llvm::BranchProbability::getCompl
BranchProbability getCompl() const
Definition: BranchProbability.h:69
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
GuardUtils.h
llvm::MDBuilder
Definition: MDBuilder.h:36
llvm::JumpThreadingPass::updateSSA
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap< Instruction *, Value * > &ValueMapping)
Update the SSA form.
Definition: JumpThreading.cpp:2033
llvm::pred_begin
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:271
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:373
Instructions.h
llvm::JumpThreadingPass::computeValueKnownInPredecessorsImpl
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet< Value * > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
Definition: JumpThreading.cpp:653
llvm::BranchProbability::normalizeProbabilities
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Definition: BranchProbability.h:205
SmallVector.h
llvm::LazyValueInfo::Tristate
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:60
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:359
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:986
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1342
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5435
llvm::createJumpThreadingPass
FunctionPass * createJumpThreadingPass(int Threshold=-1)
Definition: JumpThreading.cpp:176
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::SSAUpdater::RewriteUse
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:186
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:810
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1751
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:142
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2813
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2697
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:342
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::ConstantRange::makeExactICmpRegion
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
Definition: ConstantRange.cpp:156
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
llvm::DeleteDeadBlock
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Definition: BasicBlockUtils.cpp:95
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
ThreadAcrossLoopHeaders
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:104
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:255
PB
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
llvm::MergeBasicBlockIntoOnlyPred
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:750
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3276
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1524
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:97
llvm::SSAUpdater::AddAvailableValue
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:69
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:508
llvm::simplifyCmpInst
Value * simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5724
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
llvm::ConstantFoldInstruction
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Definition: ConstantFolding.cpp:1159
raw_ostream.h
llvm::SSAUpdater
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
llvm::JumpThreadingPass::cloneInstructions
DenseMap< Instruction *, Value * > cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
Definition: JumpThreading.cpp:2079
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
BasicBlockUtils.h
llvm::JumpThreadingPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: JumpThreading.cpp:340
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
llvm::BasicBlock::isEHPad
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:501
llvm::JumpThreadingPass::processBlock
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
Definition: JumpThreading.cpp:1047
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4731
llvm::MCID::Terminator
@ Terminator
Definition: MCInstrDesc.h:157
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:450
llvm::JumpThreadingPass::processBranchOnPHI
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
Definition: JumpThreading.cpp:1815
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3211
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2543
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3225
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:211
llvm::JumpThreadingPass::JumpThreadingPass
JumpThreadingPass(int T=-1)
Definition: JumpThreading.cpp:180
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
llvm::DefMaxInstsToScan
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732