LLVM 18.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <memory>
77#include <utility>
78
79using namespace llvm;
80using namespace jumpthreading;
81
82#define DEBUG_TYPE "jump-threading"
83
84STATISTIC(NumThreads, "Number of jumps threaded");
85STATISTIC(NumFolds, "Number of terminators folded");
86STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
87
89BBDuplicateThreshold("jump-threading-threshold",
90 cl::desc("Max block size to duplicate for jump threading"),
92
95 "jump-threading-implication-search-threshold",
96 cl::desc("The number of predecessors to search for a stronger "
97 "condition to use to thread over a weaker condition"),
99
101 "jump-threading-phi-threshold",
102 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
103 cl::Hidden);
104
106 "print-lvi-after-jump-threading",
107 cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
108 cl::Hidden);
109
111 "jump-threading-across-loop-headers",
112 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
113 cl::init(false), cl::Hidden);
114
116 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
117}
118
119// Update branch probability information according to conditional
120// branch probability. This is usually made possible for cloned branches
121// in inline instances by the context specific profile in the caller.
122// For instance,
123//
124// [Block PredBB]
125// [Branch PredBr]
126// if (t) {
127// Block A;
128// } else {
129// Block B;
130// }
131//
132// [Block BB]
133// cond = PN([true, %A], [..., %B]); // PHI node
134// [Branch CondBr]
135// if (cond) {
136// ... // P(cond == true) = 1%
137// }
138//
139// Here we know that when block A is taken, cond must be true, which means
140// P(cond == true | A) = 1
141//
142// Given that P(cond == true) = P(cond == true | A) * P(A) +
143// P(cond == true | B) * P(B)
144// we get:
145// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
146//
147// which gives us:
148// P(A) is less than P(cond == true), i.e.
149// P(t == true) <= P(cond == true)
150//
151// In other words, if we know P(cond == true) is unlikely, we know
152// that P(t == true) is also unlikely.
153//
155 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
156 if (!CondBr)
157 return;
158
159 uint64_t TrueWeight, FalseWeight;
160 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
161 return;
162
163 if (TrueWeight + FalseWeight == 0)
164 // Zero branch_weights do not give a hint for getting branch probabilities.
165 // Technically it would result in division by zero denominator, which is
166 // TrueWeight + FalseWeight.
167 return;
168
169 // Returns the outgoing edge of the dominating predecessor block
170 // that leads to the PhiNode's incoming block:
171 auto GetPredOutEdge =
172 [](BasicBlock *IncomingBB,
173 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
174 auto *PredBB = IncomingBB;
175 auto *SuccBB = PhiBB;
177 while (true) {
178 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
179 if (PredBr && PredBr->isConditional())
180 return {PredBB, SuccBB};
181 Visited.insert(PredBB);
182 auto *SinglePredBB = PredBB->getSinglePredecessor();
183 if (!SinglePredBB)
184 return {nullptr, nullptr};
185
186 // Stop searching when SinglePredBB has been visited. It means we see
187 // an unreachable loop.
188 if (Visited.count(SinglePredBB))
189 return {nullptr, nullptr};
190
191 SuccBB = PredBB;
192 PredBB = SinglePredBB;
193 }
194 };
195
196 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
197 Value *PhiOpnd = PN->getIncomingValue(i);
198 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
199
200 if (!CI || !CI->getType()->isIntegerTy(1))
201 continue;
202
205 TrueWeight, TrueWeight + FalseWeight)
207 FalseWeight, TrueWeight + FalseWeight));
208
209 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
210 if (!PredOutEdge.first)
211 return;
212
213 BasicBlock *PredBB = PredOutEdge.first;
214 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
215 if (!PredBr)
216 return;
217
218 uint64_t PredTrueWeight, PredFalseWeight;
219 // FIXME: We currently only set the profile data when it is missing.
220 // With PGO, this can be used to refine even existing profile data with
221 // context information. This needs to be done after more performance
222 // testing.
223 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
224 continue;
225
226 // We can not infer anything useful when BP >= 50%, because BP is the
227 // upper bound probability value.
228 if (BP >= BranchProbability(50, 100))
229 continue;
230
232 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
233 Weights.push_back(BP.getNumerator());
234 Weights.push_back(BP.getCompl().getNumerator());
235 } else {
236 Weights.push_back(BP.getCompl().getNumerator());
237 Weights.push_back(BP.getNumerator());
238 }
239 PredBr->setMetadata(LLVMContext::MD_prof,
240 MDBuilder(PredBr->getParent()->getContext())
241 .createBranchWeights(Weights));
242 }
243}
244
247 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
248 // Jump Threading has no sense for the targets with divergent CF
250 return PreservedAnalyses::all();
251 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
252 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
253 auto &AA = AM.getResult<AAManager>(F);
254 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
255
256 bool Changed =
257 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
258 std::make_unique<DomTreeUpdater>(
260 std::nullopt, std::nullopt);
261
263 dbgs() << "LVI for function '" << F.getName() << "':\n";
264 LVI.printLVI(F, getDomTreeUpdater()->getDomTree(), dbgs());
265 }
266
267 if (!Changed)
268 return PreservedAnalyses::all();
269
270
272
273#if defined(EXPENSIVE_CHECKS)
274 assert(getDomTreeUpdater()->getDomTree().verify(
275 DominatorTree::VerificationLevel::Full) &&
276 "DT broken after JumpThreading");
277 assert((!getDomTreeUpdater()->hasPostDomTree() ||
278 getDomTreeUpdater()->getPostDomTree().verify(
280 "PDT broken after JumpThreading");
281#else
282 assert(getDomTreeUpdater()->getDomTree().verify(
283 DominatorTree::VerificationLevel::Fast) &&
284 "DT broken after JumpThreading");
285 assert((!getDomTreeUpdater()->hasPostDomTree() ||
286 getDomTreeUpdater()->getPostDomTree().verify(
288 "PDT broken after JumpThreading");
289#endif
290
291 return getPreservedAnalysis();
292}
293
295 TargetLibraryInfo *TLI_,
297 AliasAnalysis *AA_,
298 std::unique_ptr<DomTreeUpdater> DTU_,
299 std::optional<BlockFrequencyInfo *> BFI_,
300 std::optional<BranchProbabilityInfo *> BPI_) {
301 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
302 F = &F_;
303 FAM = FAM_;
304 TLI = TLI_;
305 TTI = TTI_;
306 LVI = LVI_;
307 AA = AA_;
308 DTU = std::move(DTU_);
309 BFI = BFI_;
310 BPI = BPI_;
311 auto *GuardDecl = F->getParent()->getFunction(
312 Intrinsic::getName(Intrinsic::experimental_guard));
313 HasGuards = GuardDecl && !GuardDecl->use_empty();
314
315 // Reduce the number of instructions duplicated when optimizing strictly for
316 // size.
317 if (BBDuplicateThreshold.getNumOccurrences())
318 BBDupThreshold = BBDuplicateThreshold;
319 else if (F->hasFnAttribute(Attribute::MinSize))
320 BBDupThreshold = 3;
321 else
322 BBDupThreshold = DefaultBBDupThreshold;
323
324 // JumpThreading must not processes blocks unreachable from entry. It's a
325 // waste of compute time and can potentially lead to hangs.
327 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
328 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
329 DominatorTree &DT = DTU->getDomTree();
330 for (auto &BB : *F)
331 if (!DT.isReachableFromEntry(&BB))
332 Unreachable.insert(&BB);
333
336
337 bool EverChanged = false;
338 bool Changed;
339 do {
340 Changed = false;
341 for (auto &BB : *F) {
342 if (Unreachable.count(&BB))
343 continue;
344 while (processBlock(&BB)) // Thread all of the branches we can over BB.
345 Changed = ChangedSinceLastAnalysisUpdate = true;
346
347 // Jump threading may have introduced redundant debug values into BB
348 // which should be removed.
349 if (Changed)
351
352 // Stop processing BB if it's the entry or is now deleted. The following
353 // routines attempt to eliminate BB and locating a suitable replacement
354 // for the entry is non-trivial.
355 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
356 continue;
357
358 if (pred_empty(&BB)) {
359 // When processBlock makes BB unreachable it doesn't bother to fix up
360 // the instructions in it. We must remove BB to prevent invalid IR.
361 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
362 << "' with terminator: " << *BB.getTerminator()
363 << '\n');
364 LoopHeaders.erase(&BB);
365 LVI->eraseBlock(&BB);
366 DeleteDeadBlock(&BB, DTU.get());
367 Changed = ChangedSinceLastAnalysisUpdate = true;
368 continue;
369 }
370
371 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
372 // is "almost empty", we attempt to merge BB with its sole successor.
373 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
374 if (BI && BI->isUnconditional()) {
375 BasicBlock *Succ = BI->getSuccessor(0);
376 if (
377 // The terminator must be the only non-phi instruction in BB.
378 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
379 // Don't alter Loop headers and latches to ensure another pass can
380 // detect and transform nested loops later.
381 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
384 // BB is valid for cleanup here because we passed in DTU. F remains
385 // BB's parent until a DTU->getDomTree() event.
386 LVI->eraseBlock(&BB);
387 Changed = ChangedSinceLastAnalysisUpdate = true;
388 }
389 }
390 }
391 EverChanged |= Changed;
392 } while (Changed);
393
394 LoopHeaders.clear();
395 return EverChanged;
396}
397
398// Replace uses of Cond with ToVal when safe to do so. If all uses are
399// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
400// because we may incorrectly replace uses when guards/assumes are uses of
401// of `Cond` and we used the guards/assume to reason about the `Cond` value
402// at the end of block. RAUW unconditionally replaces all uses
403// including the guards/assumes themselves and the uses before the
404// guard/assume.
406 BasicBlock *KnownAtEndOfBB) {
407 bool Changed = false;
408 assert(Cond->getType() == ToVal->getType());
409 // We can unconditionally replace all uses in non-local blocks (i.e. uses
410 // strictly dominated by BB), since LVI information is true from the
411 // terminator of BB.
412 if (Cond->getParent() == KnownAtEndOfBB)
413 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
414 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
415 // Reached the Cond whose uses we are trying to replace, so there are no
416 // more uses.
417 if (&I == Cond)
418 break;
419 // We only replace uses in instructions that are guaranteed to reach the end
420 // of BB, where we know Cond is ToVal.
422 break;
423 Changed |= I.replaceUsesOfWith(Cond, ToVal);
424 }
425 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
426 Cond->eraseFromParent();
427 Changed = true;
428 }
429 return Changed;
430}
431
432/// Return the cost of duplicating a piece of this block from first non-phi
433/// and before StopAt instruction to thread across it. Stop scanning the block
434/// when exceeding the threshold. If duplication is impossible, returns ~0U.
436 BasicBlock *BB,
437 Instruction *StopAt,
438 unsigned Threshold) {
439 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
440
441 // Do not duplicate the BB if it has a lot of PHI nodes.
442 // If a threadable chain is too long then the number of PHI nodes can add up,
443 // leading to a substantial increase in compile time when rewriting the SSA.
444 unsigned PhiCount = 0;
445 Instruction *FirstNonPHI = nullptr;
446 for (Instruction &I : *BB) {
447 if (!isa<PHINode>(&I)) {
448 FirstNonPHI = &I;
449 break;
450 }
451 if (++PhiCount > PhiDuplicateThreshold)
452 return ~0U;
453 }
454
455 /// Ignore PHI nodes, these will be flattened when duplication happens.
456 BasicBlock::const_iterator I(FirstNonPHI);
457
458 // FIXME: THREADING will delete values that are just used to compute the
459 // branch, so they shouldn't count against the duplication cost.
460
461 unsigned Bonus = 0;
462 if (BB->getTerminator() == StopAt) {
463 // Threading through a switch statement is particularly profitable. If this
464 // block ends in a switch, decrease its cost to make it more likely to
465 // happen.
466 if (isa<SwitchInst>(StopAt))
467 Bonus = 6;
468
469 // The same holds for indirect branches, but slightly more so.
470 if (isa<IndirectBrInst>(StopAt))
471 Bonus = 8;
472 }
473
474 // Bump the threshold up so the early exit from the loop doesn't skip the
475 // terminator-based Size adjustment at the end.
476 Threshold += Bonus;
477
478 // Sum up the cost of each instruction until we get to the terminator. Don't
479 // include the terminator because the copy won't include it.
480 unsigned Size = 0;
481 for (; &*I != StopAt; ++I) {
482
483 // Stop scanning the block if we've reached the threshold.
484 if (Size > Threshold)
485 return Size;
486
487 // Bail out if this instruction gives back a token type, it is not possible
488 // to duplicate it if it is used outside this BB.
489 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
490 return ~0U;
491
492 // Blocks with NoDuplicate are modelled as having infinite cost, so they
493 // are never duplicated.
494 if (const CallInst *CI = dyn_cast<CallInst>(I))
495 if (CI->cannotDuplicate() || CI->isConvergent())
496 return ~0U;
497
500 continue;
501
502 // All other instructions count for at least one unit.
503 ++Size;
504
505 // Calls are more expensive. If they are non-intrinsic calls, we model them
506 // as having cost of 4. If they are a non-vector intrinsic, we model them
507 // as having cost of 2 total, and if they are a vector intrinsic, we model
508 // them as having cost 1.
509 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
510 if (!isa<IntrinsicInst>(CI))
511 Size += 3;
512 else if (!CI->getType()->isVectorTy())
513 Size += 1;
514 }
515 }
516
517 return Size > Bonus ? Size - Bonus : 0;
518}
519
520/// findLoopHeaders - We do not want jump threading to turn proper loop
521/// structures into irreducible loops. Doing this breaks up the loop nesting
522/// hierarchy and pessimizes later transformations. To prevent this from
523/// happening, we first have to find the loop headers. Here we approximate this
524/// by finding targets of backedges in the CFG.
525///
526/// Note that there definitely are cases when we want to allow threading of
527/// edges across a loop header. For example, threading a jump from outside the
528/// loop (the preheader) to an exit block of the loop is definitely profitable.
529/// It is also almost always profitable to thread backedges from within the loop
530/// to exit blocks, and is often profitable to thread backedges to other blocks
531/// within the loop (forming a nested loop). This simple analysis is not rich
532/// enough to track all of these properties and keep it up-to-date as the CFG
533/// mutates, so we don't allow any of these transformations.
536 FindFunctionBackedges(F, Edges);
537
538 for (const auto &Edge : Edges)
539 LoopHeaders.insert(Edge.second);
540}
541
542/// getKnownConstant - Helper method to determine if we can thread over a
543/// terminator with the given value as its condition, and if so what value to
544/// use for that. What kind of value this is depends on whether we want an
545/// integer or a block address, but an undef is always accepted.
546/// Returns null if Val is null or not an appropriate constant.
548 if (!Val)
549 return nullptr;
550
551 // Undef is "known" enough.
552 if (UndefValue *U = dyn_cast<UndefValue>(Val))
553 return U;
554
555 if (Preference == WantBlockAddress)
556 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
557
558 return dyn_cast<ConstantInt>(Val);
559}
560
561/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
562/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
563/// in any of our predecessors. If so, return the known list of value and pred
564/// BB in the result vector.
565///
566/// This returns true if there were any known values.
568 Value *V, BasicBlock *BB, PredValueInfo &Result,
569 ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
570 Instruction *CxtI) {
571 // This method walks up use-def chains recursively. Because of this, we could
572 // get into an infinite loop going around loops in the use-def chain. To
573 // prevent this, keep track of what (value, block) pairs we've already visited
574 // and terminate the search if we loop back to them
575 if (!RecursionSet.insert(V).second)
576 return false;
577
578 // If V is a constant, then it is known in all predecessors.
579 if (Constant *KC = getKnownConstant(V, Preference)) {
580 for (BasicBlock *Pred : predecessors(BB))
581 Result.emplace_back(KC, Pred);
582
583 return !Result.empty();
584 }
585
586 // If V is a non-instruction value, or an instruction in a different block,
587 // then it can't be derived from a PHI.
588 Instruction *I = dyn_cast<Instruction>(V);
589 if (!I || I->getParent() != BB) {
590
591 // Okay, if this is a live-in value, see if it has a known value at the any
592 // edge from our predecessors.
593 for (BasicBlock *P : predecessors(BB)) {
594 using namespace PatternMatch;
595 // If the value is known by LazyValueInfo to be a constant in a
596 // predecessor, use that information to try to thread this block.
597 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
598 // If I is a non-local compare-with-constant instruction, use more-rich
599 // 'getPredicateOnEdge' method. This would be able to handle value
600 // inequalities better, for example if the compare is "X < 4" and "X < 3"
601 // is known true but "X < 4" itself is not available.
603 Value *Val;
604 Constant *Cst;
605 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
606 auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
607 if (Res != LazyValueInfo::Unknown)
608 PredCst = ConstantInt::getBool(V->getContext(), Res);
609 }
610 if (Constant *KC = getKnownConstant(PredCst, Preference))
611 Result.emplace_back(KC, P);
612 }
613
614 return !Result.empty();
615 }
616
617 /// If I is a PHI node, then we know the incoming values for any constants.
618 if (PHINode *PN = dyn_cast<PHINode>(I)) {
619 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
620 Value *InVal = PN->getIncomingValue(i);
621 if (Constant *KC = getKnownConstant(InVal, Preference)) {
622 Result.emplace_back(KC, PN->getIncomingBlock(i));
623 } else {
624 Constant *CI = LVI->getConstantOnEdge(InVal,
625 PN->getIncomingBlock(i),
626 BB, CxtI);
627 if (Constant *KC = getKnownConstant(CI, Preference))
628 Result.emplace_back(KC, PN->getIncomingBlock(i));
629 }
630 }
631
632 return !Result.empty();
633 }
634
635 // Handle Cast instructions.
636 if (CastInst *CI = dyn_cast<CastInst>(I)) {
637 Value *Source = CI->getOperand(0);
638 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
639 RecursionSet, CxtI);
640 if (Result.empty())
641 return false;
642
643 // Convert the known values.
644 for (auto &R : Result)
645 R.first = ConstantExpr::getCast(CI->getOpcode(), R.first, CI->getType());
646
647 return true;
648 }
649
650 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
651 Value *Source = FI->getOperand(0);
652 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
653 RecursionSet, CxtI);
654
655 erase_if(Result, [](auto &Pair) {
656 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
657 });
658
659 return !Result.empty();
660 }
661
662 // Handle some boolean conditions.
663 if (I->getType()->getPrimitiveSizeInBits() == 1) {
664 using namespace PatternMatch;
665 if (Preference != WantInteger)
666 return false;
667 // X | true -> true
668 // X & false -> false
669 Value *Op0, *Op1;
670 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
671 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
672 PredValueInfoTy LHSVals, RHSVals;
673
675 RecursionSet, CxtI);
677 RecursionSet, CxtI);
678
679 if (LHSVals.empty() && RHSVals.empty())
680 return false;
681
682 ConstantInt *InterestingVal;
683 if (match(I, m_LogicalOr()))
684 InterestingVal = ConstantInt::getTrue(I->getContext());
685 else
686 InterestingVal = ConstantInt::getFalse(I->getContext());
687
688 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
689
690 // Scan for the sentinel. If we find an undef, force it to the
691 // interesting value: x|undef -> true and x&undef -> false.
692 for (const auto &LHSVal : LHSVals)
693 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
694 Result.emplace_back(InterestingVal, LHSVal.second);
695 LHSKnownBBs.insert(LHSVal.second);
696 }
697 for (const auto &RHSVal : RHSVals)
698 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
699 // If we already inferred a value for this block on the LHS, don't
700 // re-add it.
701 if (!LHSKnownBBs.count(RHSVal.second))
702 Result.emplace_back(InterestingVal, RHSVal.second);
703 }
704
705 return !Result.empty();
706 }
707
708 // Handle the NOT form of XOR.
709 if (I->getOpcode() == Instruction::Xor &&
710 isa<ConstantInt>(I->getOperand(1)) &&
711 cast<ConstantInt>(I->getOperand(1))->isOne()) {
712 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
713 WantInteger, RecursionSet, CxtI);
714 if (Result.empty())
715 return false;
716
717 // Invert the known values.
718 for (auto &R : Result)
719 R.first = ConstantExpr::getNot(R.first);
720
721 return true;
722 }
723
724 // Try to simplify some other binary operator values.
725 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
726 if (Preference != WantInteger)
727 return false;
728 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
729 const DataLayout &DL = BO->getModule()->getDataLayout();
730 PredValueInfoTy LHSVals;
731 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
732 WantInteger, RecursionSet, CxtI);
733
734 // Try to use constant folding to simplify the binary operator.
735 for (const auto &LHSVal : LHSVals) {
736 Constant *V = LHSVal.first;
737 Constant *Folded =
738 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
739
740 if (Constant *KC = getKnownConstant(Folded, WantInteger))
741 Result.emplace_back(KC, LHSVal.second);
742 }
743 }
744
745 return !Result.empty();
746 }
747
748 // Handle compare with phi operand, where the PHI is defined in this block.
749 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
750 if (Preference != WantInteger)
751 return false;
752 Type *CmpType = Cmp->getType();
753 Value *CmpLHS = Cmp->getOperand(0);
754 Value *CmpRHS = Cmp->getOperand(1);
755 CmpInst::Predicate Pred = Cmp->getPredicate();
756
757 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
758 if (!PN)
759 PN = dyn_cast<PHINode>(CmpRHS);
760 if (PN && PN->getParent() == BB) {
761 const DataLayout &DL = PN->getModule()->getDataLayout();
762 // We can do this simplification if any comparisons fold to true or false.
763 // See if any do.
764 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
765 BasicBlock *PredBB = PN->getIncomingBlock(i);
766 Value *LHS, *RHS;
767 if (PN == CmpLHS) {
768 LHS = PN->getIncomingValue(i);
769 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
770 } else {
771 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
772 RHS = PN->getIncomingValue(i);
773 }
774 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
775 if (!Res) {
776 if (!isa<Constant>(RHS))
777 continue;
778
779 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
780 auto LHSInst = dyn_cast<Instruction>(LHS);
781 if (LHSInst && LHSInst->getParent() == BB)
782 continue;
783
785 ResT = LVI->getPredicateOnEdge(Pred, LHS,
786 cast<Constant>(RHS), PredBB, BB,
787 CxtI ? CxtI : Cmp);
788 if (ResT == LazyValueInfo::Unknown)
789 continue;
791 }
792
793 if (Constant *KC = getKnownConstant(Res, WantInteger))
794 Result.emplace_back(KC, PredBB);
795 }
796
797 return !Result.empty();
798 }
799
800 // If comparing a live-in value against a constant, see if we know the
801 // live-in value on any predecessors.
802 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
803 Constant *CmpConst = cast<Constant>(CmpRHS);
804
805 if (!isa<Instruction>(CmpLHS) ||
806 cast<Instruction>(CmpLHS)->getParent() != BB) {
807 for (BasicBlock *P : predecessors(BB)) {
808 // If the value is known by LazyValueInfo to be a constant in a
809 // predecessor, use that information to try to thread this block.
811 LVI->getPredicateOnEdge(Pred, CmpLHS,
812 CmpConst, P, BB, CxtI ? CxtI : Cmp);
813 if (Res == LazyValueInfo::Unknown)
814 continue;
815
816 Constant *ResC = ConstantInt::get(CmpType, Res);
817 Result.emplace_back(ResC, P);
818 }
819
820 return !Result.empty();
821 }
822
823 // InstCombine can fold some forms of constant range checks into
824 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
825 // x as a live-in.
826 {
827 using namespace PatternMatch;
828
829 Value *AddLHS;
830 ConstantInt *AddConst;
831 if (isa<ConstantInt>(CmpConst) &&
832 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
833 if (!isa<Instruction>(AddLHS) ||
834 cast<Instruction>(AddLHS)->getParent() != BB) {
835 for (BasicBlock *P : predecessors(BB)) {
836 // If the value is known by LazyValueInfo to be a ConstantRange in
837 // a predecessor, use that information to try to thread this
838 // block.
840 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
841 // Propagate the range through the addition.
842 CR = CR.add(AddConst->getValue());
843
844 // Get the range where the compare returns true.
846 Pred, cast<ConstantInt>(CmpConst)->getValue());
847
848 Constant *ResC;
849 if (CmpRange.contains(CR))
850 ResC = ConstantInt::getTrue(CmpType);
851 else if (CmpRange.inverse().contains(CR))
852 ResC = ConstantInt::getFalse(CmpType);
853 else
854 continue;
855
856 Result.emplace_back(ResC, P);
857 }
858
859 return !Result.empty();
860 }
861 }
862 }
863
864 // Try to find a constant value for the LHS of a comparison,
865 // and evaluate it statically if we can.
866 PredValueInfoTy LHSVals;
867 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
868 WantInteger, RecursionSet, CxtI);
869
870 for (const auto &LHSVal : LHSVals) {
871 Constant *V = LHSVal.first;
872 Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
873 if (Constant *KC = getKnownConstant(Folded, WantInteger))
874 Result.emplace_back(KC, LHSVal.second);
875 }
876
877 return !Result.empty();
878 }
879 }
880
881 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
882 // Handle select instructions where at least one operand is a known constant
883 // and we can figure out the condition value for any predecessor block.
884 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
885 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
886 PredValueInfoTy Conds;
887 if ((TrueVal || FalseVal) &&
888 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
889 WantInteger, RecursionSet, CxtI)) {
890 for (auto &C : Conds) {
891 Constant *Cond = C.first;
892
893 // Figure out what value to use for the condition.
894 bool KnownCond;
895 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
896 // A known boolean.
897 KnownCond = CI->isOne();
898 } else {
899 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
900 // Either operand will do, so be sure to pick the one that's a known
901 // constant.
902 // FIXME: Do this more cleverly if both values are known constants?
903 KnownCond = (TrueVal != nullptr);
904 }
905
906 // See if the select has a known constant value for this predecessor.
907 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
908 Result.emplace_back(Val, C.second);
909 }
910
911 return !Result.empty();
912 }
913 }
914
915 // If all else fails, see if LVI can figure out a constant value for us.
916 assert(CxtI->getParent() == BB && "CxtI should be in BB");
917 Constant *CI = LVI->getConstant(V, CxtI);
918 if (Constant *KC = getKnownConstant(CI, Preference)) {
919 for (BasicBlock *Pred : predecessors(BB))
920 Result.emplace_back(KC, Pred);
921 }
922
923 return !Result.empty();
924}
925
926/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
927/// in an undefined jump, decide which block is best to revector to.
928///
929/// Since we can pick an arbitrary destination, we pick the successor with the
930/// fewest predecessors. This should reduce the in-degree of the others.
932 Instruction *BBTerm = BB->getTerminator();
933 unsigned MinSucc = 0;
934 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
935 // Compute the successor with the minimum number of predecessors.
936 unsigned MinNumPreds = pred_size(TestBB);
937 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
938 TestBB = BBTerm->getSuccessor(i);
939 unsigned NumPreds = pred_size(TestBB);
940 if (NumPreds < MinNumPreds) {
941 MinSucc = i;
942 MinNumPreds = NumPreds;
943 }
944 }
945
946 return MinSucc;
947}
948
950 if (!BB->hasAddressTaken()) return false;
951
952 // If the block has its address taken, it may be a tree of dead constants
953 // hanging off of it. These shouldn't keep the block alive.
956 return !BA->use_empty();
957}
958
959/// processBlock - If there are any predecessors whose control can be threaded
960/// through to a successor, transform them now.
962 // If the block is trivially dead, just return and let the caller nuke it.
963 // This simplifies other transformations.
964 if (DTU->isBBPendingDeletion(BB) ||
965 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
966 return false;
967
968 // If this block has a single predecessor, and if that pred has a single
969 // successor, merge the blocks. This encourages recursive jump threading
970 // because now the condition in this block can be threaded through
971 // predecessors of our predecessor block.
973 return true;
974
976 return true;
977
978 // Look if we can propagate guards to predecessors.
979 if (HasGuards && processGuards(BB))
980 return true;
981
982 // What kind of constant we're looking for.
983 ConstantPreference Preference = WantInteger;
984
985 // Look to see if the terminator is a conditional branch, switch or indirect
986 // branch, if not we can't thread it.
987 Value *Condition;
988 Instruction *Terminator = BB->getTerminator();
989 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
990 // Can't thread an unconditional jump.
991 if (BI->isUnconditional()) return false;
992 Condition = BI->getCondition();
993 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
994 Condition = SI->getCondition();
995 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
996 // Can't thread indirect branch with no successors.
997 if (IB->getNumSuccessors() == 0) return false;
998 Condition = IB->getAddress()->stripPointerCasts();
999 Preference = WantBlockAddress;
1000 } else {
1001 return false; // Must be an invoke or callbr.
1002 }
1003
1004 // Keep track if we constant folded the condition in this invocation.
1005 bool ConstantFolded = false;
1006
1007 // Run constant folding to see if we can reduce the condition to a simple
1008 // constant.
1009 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1010 Value *SimpleVal =
1012 if (SimpleVal) {
1013 I->replaceAllUsesWith(SimpleVal);
1014 if (isInstructionTriviallyDead(I, TLI))
1015 I->eraseFromParent();
1016 Condition = SimpleVal;
1017 ConstantFolded = true;
1018 }
1019 }
1020
1021 // If the terminator is branching on an undef or freeze undef, we can pick any
1022 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1023 auto *FI = dyn_cast<FreezeInst>(Condition);
1024 if (isa<UndefValue>(Condition) ||
1025 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1026 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1027 std::vector<DominatorTree::UpdateType> Updates;
1028
1029 // Fold the branch/switch.
1030 Instruction *BBTerm = BB->getTerminator();
1031 Updates.reserve(BBTerm->getNumSuccessors());
1032 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1033 if (i == BestSucc) continue;
1034 BasicBlock *Succ = BBTerm->getSuccessor(i);
1035 Succ->removePredecessor(BB, true);
1036 Updates.push_back({DominatorTree::Delete, BB, Succ});
1037 }
1038
1039 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1040 << "' folding undef terminator: " << *BBTerm << '\n');
1041 BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1042 ++NumFolds;
1043 BBTerm->eraseFromParent();
1044 DTU->applyUpdatesPermissive(Updates);
1045 if (FI)
1046 FI->eraseFromParent();
1047 return true;
1048 }
1049
1050 // If the terminator of this block is branching on a constant, simplify the
1051 // terminator to an unconditional branch. This can occur due to threading in
1052 // other blocks.
1053 if (getKnownConstant(Condition, Preference)) {
1054 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1055 << "' folding terminator: " << *BB->getTerminator()
1056 << '\n');
1057 ++NumFolds;
1058 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1059 if (auto *BPI = getBPI())
1060 BPI->eraseBlock(BB);
1061 return true;
1062 }
1063
1064 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1065
1066 // All the rest of our checks depend on the condition being an instruction.
1067 if (!CondInst) {
1068 // FIXME: Unify this with code below.
1069 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1070 return true;
1071 return ConstantFolded;
1072 }
1073
1074 // Some of the following optimization can safely work on the unfrozen cond.
1075 Value *CondWithoutFreeze = CondInst;
1076 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1077 CondWithoutFreeze = FI->getOperand(0);
1078
1079 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1080 // If we're branching on a conditional, LVI might be able to determine
1081 // it's value at the branch instruction. We only handle comparisons
1082 // against a constant at this time.
1083 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1085 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1086 CondConst, BB->getTerminator(),
1087 /*UseBlockValue=*/false);
1088 if (Ret != LazyValueInfo::Unknown) {
1089 // We can safely replace *some* uses of the CondInst if it has
1090 // exactly one value as returned by LVI. RAUW is incorrect in the
1091 // presence of guards and assumes, that have the `Cond` as the use. This
1092 // is because we use the guards/assume to reason about the `Cond` value
1093 // at the end of block, but RAUW unconditionally replaces all uses
1094 // including the guards/assumes themselves and the uses before the
1095 // guard/assume.
1096 auto *CI = Ret == LazyValueInfo::True ?
1097 ConstantInt::getTrue(CondCmp->getType()) :
1098 ConstantInt::getFalse(CondCmp->getType());
1099 if (replaceFoldableUses(CondCmp, CI, BB))
1100 return true;
1101 }
1102
1103 // We did not manage to simplify this branch, try to see whether
1104 // CondCmp depends on a known phi-select pattern.
1105 if (tryToUnfoldSelect(CondCmp, BB))
1106 return true;
1107 }
1108 }
1109
1110 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1111 if (tryToUnfoldSelect(SI, BB))
1112 return true;
1113
1114 // Check for some cases that are worth simplifying. Right now we want to look
1115 // for loads that are used by a switch or by the condition for the branch. If
1116 // we see one, check to see if it's partially redundant. If so, insert a PHI
1117 // which can then be used to thread the values.
1118 Value *SimplifyValue = CondWithoutFreeze;
1119
1120 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1121 if (isa<Constant>(CondCmp->getOperand(1)))
1122 SimplifyValue = CondCmp->getOperand(0);
1123
1124 // TODO: There are other places where load PRE would be profitable, such as
1125 // more complex comparisons.
1126 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1128 return true;
1129
1130 // Before threading, try to propagate profile data backwards:
1131 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1132 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1134
1135 // Handle a variety of cases where we are branching on something derived from
1136 // a PHI node in the current block. If we can prove that any predecessors
1137 // compute a predictable value based on a PHI node, thread those predecessors.
1138 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1139 return true;
1140
1141 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1142 // the current block, see if we can simplify.
1143 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1144 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1145 return processBranchOnPHI(PN);
1146
1147 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1148 if (CondInst->getOpcode() == Instruction::Xor &&
1149 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1150 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1151
1152 // Search for a stronger dominating condition that can be used to simplify a
1153 // conditional branch leaving BB.
1155 return true;
1156
1157 return false;
1158}
1159
1161 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1162 if (!BI || !BI->isConditional())
1163 return false;
1164
1165 Value *Cond = BI->getCondition();
1166 // Assuming that predecessor's branch was taken, if pred's branch condition
1167 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1168 // freeze(Cond) is either true or a nondeterministic value.
1169 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1170 // without affecting other instructions.
1171 auto *FICond = dyn_cast<FreezeInst>(Cond);
1172 if (FICond && FICond->hasOneUse())
1173 Cond = FICond->getOperand(0);
1174 else
1175 FICond = nullptr;
1176
1177 BasicBlock *CurrentBB = BB;
1178 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1179 unsigned Iter = 0;
1180
1181 auto &DL = BB->getModule()->getDataLayout();
1182
1183 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1184 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1185 if (!PBI || !PBI->isConditional())
1186 return false;
1187 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1188 return false;
1189
1190 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1191 std::optional<bool> Implication =
1192 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1193
1194 // If the branch condition of BB (which is Cond) and CurrentPred are
1195 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1196 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1197 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1198 FICond->getOperand(0))
1199 Implication = CondIsTrue;
1200 }
1201
1202 if (Implication) {
1203 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1204 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1205 RemoveSucc->removePredecessor(BB);
1206 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
1207 UncondBI->setDebugLoc(BI->getDebugLoc());
1208 ++NumFolds;
1209 BI->eraseFromParent();
1210 if (FICond)
1211 FICond->eraseFromParent();
1212
1213 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1214 if (auto *BPI = getBPI())
1215 BPI->eraseBlock(BB);
1216 return true;
1217 }
1218 CurrentBB = CurrentPred;
1219 CurrentPred = CurrentBB->getSinglePredecessor();
1220 }
1221
1222 return false;
1223}
1224
1225/// Return true if Op is an instruction defined in the given block.
1227 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1228 if (OpInst->getParent() == BB)
1229 return true;
1230 return false;
1231}
1232
1233/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1234/// redundant load instruction, eliminate it by replacing it with a PHI node.
1235/// This is an important optimization that encourages jump threading, and needs
1236/// to be run interlaced with other jump threading tasks.
1238 // Don't hack volatile and ordered loads.
1239 if (!LoadI->isUnordered()) return false;
1240
1241 // If the load is defined in a block with exactly one predecessor, it can't be
1242 // partially redundant.
1243 BasicBlock *LoadBB = LoadI->getParent();
1244 if (LoadBB->getSinglePredecessor())
1245 return false;
1246
1247 // If the load is defined in an EH pad, it can't be partially redundant,
1248 // because the edges between the invoke and the EH pad cannot have other
1249 // instructions between them.
1250 if (LoadBB->isEHPad())
1251 return false;
1252
1253 Value *LoadedPtr = LoadI->getOperand(0);
1254
1255 // If the loaded operand is defined in the LoadBB and its not a phi,
1256 // it can't be available in predecessors.
1257 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1258 return false;
1259
1260 // Scan a few instructions up from the load, to see if it is obviously live at
1261 // the entry to its block.
1262 BasicBlock::iterator BBIt(LoadI);
1263 bool IsLoadCSE;
1264 if (Value *AvailableVal = FindAvailableLoadedValue(
1265 LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1266 // If the value of the load is locally available within the block, just use
1267 // it. This frequently occurs for reg2mem'd allocas.
1268
1269 if (IsLoadCSE) {
1270 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1271 combineMetadataForCSE(NLoadI, LoadI, false);
1272 LVI->forgetValue(NLoadI);
1273 };
1274
1275 // If the returned value is the load itself, replace with poison. This can
1276 // only happen in dead loops.
1277 if (AvailableVal == LoadI)
1278 AvailableVal = PoisonValue::get(LoadI->getType());
1279 if (AvailableVal->getType() != LoadI->getType())
1280 AvailableVal = CastInst::CreateBitOrPointerCast(
1281 AvailableVal, LoadI->getType(), "", LoadI);
1282 LoadI->replaceAllUsesWith(AvailableVal);
1283 LoadI->eraseFromParent();
1284 return true;
1285 }
1286
1287 // Otherwise, if we scanned the whole block and got to the top of the block,
1288 // we know the block is locally transparent to the load. If not, something
1289 // might clobber its value.
1290 if (BBIt != LoadBB->begin())
1291 return false;
1292
1293 // If all of the loads and stores that feed the value have the same AA tags,
1294 // then we can propagate them onto any newly inserted loads.
1295 AAMDNodes AATags = LoadI->getAAMetadata();
1296
1297 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1298
1299 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1300
1301 AvailablePredsTy AvailablePreds;
1302 BasicBlock *OneUnavailablePred = nullptr;
1304
1305 // If we got here, the loaded value is transparent through to the start of the
1306 // block. Check to see if it is available in any of the predecessor blocks.
1307 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1308 // If we already scanned this predecessor, skip it.
1309 if (!PredsScanned.insert(PredBB).second)
1310 continue;
1311
1312 BBIt = PredBB->end();
1313 unsigned NumScanedInst = 0;
1314 Value *PredAvailable = nullptr;
1315 // NOTE: We don't CSE load that is volatile or anything stronger than
1316 // unordered, that should have been checked when we entered the function.
1317 assert(LoadI->isUnordered() &&
1318 "Attempting to CSE volatile or atomic loads");
1319 // If this is a load on a phi pointer, phi-translate it and search
1320 // for available load/store to the pointer in predecessors.
1321 Type *AccessTy = LoadI->getType();
1322 const auto &DL = LoadI->getModule()->getDataLayout();
1323 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1324 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1325 AATags);
1326 PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
1327 PredBB, BBIt, DefMaxInstsToScan,
1328 AA, &IsLoadCSE, &NumScanedInst);
1329
1330 // If PredBB has a single predecessor, continue scanning through the
1331 // single predecessor.
1332 BasicBlock *SinglePredBB = PredBB;
1333 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1334 NumScanedInst < DefMaxInstsToScan) {
1335 SinglePredBB = SinglePredBB->getSinglePredecessor();
1336 if (SinglePredBB) {
1337 BBIt = SinglePredBB->end();
1338 PredAvailable = findAvailablePtrLoadStore(
1339 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1340 (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
1341 &NumScanedInst);
1342 }
1343 }
1344
1345 if (!PredAvailable) {
1346 OneUnavailablePred = PredBB;
1347 continue;
1348 }
1349
1350 if (IsLoadCSE)
1351 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1352
1353 // If so, this load is partially redundant. Remember this info so that we
1354 // can create a PHI node.
1355 AvailablePreds.emplace_back(PredBB, PredAvailable);
1356 }
1357
1358 // If the loaded value isn't available in any predecessor, it isn't partially
1359 // redundant.
1360 if (AvailablePreds.empty()) return false;
1361
1362 // Okay, the loaded value is available in at least one (and maybe all!)
1363 // predecessors. If the value is unavailable in more than one unique
1364 // predecessor, we want to insert a merge block for those common predecessors.
1365 // This ensures that we only have to insert one reload, thus not increasing
1366 // code size.
1367 BasicBlock *UnavailablePred = nullptr;
1368
1369 // If the value is unavailable in one of predecessors, we will end up
1370 // inserting a new instruction into them. It is only valid if all the
1371 // instructions before LoadI are guaranteed to pass execution to its
1372 // successor, or if LoadI is safe to speculate.
1373 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1374 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1375 // It requires domination tree analysis, so for this simple case it is an
1376 // overkill.
1377 if (PredsScanned.size() != AvailablePreds.size() &&
1379 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1381 return false;
1382
1383 // If there is exactly one predecessor where the value is unavailable, the
1384 // already computed 'OneUnavailablePred' block is it. If it ends in an
1385 // unconditional branch, we know that it isn't a critical edge.
1386 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1387 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1388 UnavailablePred = OneUnavailablePred;
1389 } else if (PredsScanned.size() != AvailablePreds.size()) {
1390 // Otherwise, we had multiple unavailable predecessors or we had a critical
1391 // edge from the one.
1392 SmallVector<BasicBlock*, 8> PredsToSplit;
1393 SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1394
1395 for (const auto &AvailablePred : AvailablePreds)
1396 AvailablePredSet.insert(AvailablePred.first);
1397
1398 // Add all the unavailable predecessors to the PredsToSplit list.
1399 for (BasicBlock *P : predecessors(LoadBB)) {
1400 // If the predecessor is an indirect goto, we can't split the edge.
1401 if (isa<IndirectBrInst>(P->getTerminator()))
1402 return false;
1403
1404 if (!AvailablePredSet.count(P))
1405 PredsToSplit.push_back(P);
1406 }
1407
1408 // Split them out to their own block.
1409 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1410 }
1411
1412 // If the value isn't available in all predecessors, then there will be
1413 // exactly one where it isn't available. Insert a load on that edge and add
1414 // it to the AvailablePreds list.
1415 if (UnavailablePred) {
1416 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1417 "Can't handle critical edge here!");
1418 LoadInst *NewVal = new LoadInst(
1419 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1420 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1421 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1422 UnavailablePred->getTerminator());
1423 NewVal->setDebugLoc(LoadI->getDebugLoc());
1424 if (AATags)
1425 NewVal->setAAMetadata(AATags);
1426
1427 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1428 }
1429
1430 // Now we know that each predecessor of this block has a value in
1431 // AvailablePreds, sort them for efficient access as we're walking the preds.
1432 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1433
1434 // Create a PHI node at the start of the block for the PRE'd load value.
1435 pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
1436 PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "");
1437 PN->insertBefore(LoadBB->begin());
1438 PN->takeName(LoadI);
1439 PN->setDebugLoc(LoadI->getDebugLoc());
1440
1441 // Insert new entries into the PHI for each predecessor. A single block may
1442 // have multiple entries here.
1443 for (pred_iterator PI = PB; PI != PE; ++PI) {
1444 BasicBlock *P = *PI;
1445 AvailablePredsTy::iterator I =
1446 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1447
1448 assert(I != AvailablePreds.end() && I->first == P &&
1449 "Didn't find entry for predecessor!");
1450
1451 // If we have an available predecessor but it requires casting, insert the
1452 // cast in the predecessor and use the cast. Note that we have to update the
1453 // AvailablePreds vector as we go so that all of the PHI entries for this
1454 // predecessor use the same bitcast.
1455 Value *&PredV = I->second;
1456 if (PredV->getType() != LoadI->getType())
1457 PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
1458 P->getTerminator());
1459
1460 PN->addIncoming(PredV, I->first);
1461 }
1462
1463 for (LoadInst *PredLoadI : CSELoads) {
1464 combineMetadataForCSE(PredLoadI, LoadI, true);
1465 LVI->forgetValue(PredLoadI);
1466 }
1467
1468 LoadI->replaceAllUsesWith(PN);
1469 LoadI->eraseFromParent();
1470
1471 return true;
1472}
1473
1474/// findMostPopularDest - The specified list contains multiple possible
1475/// threadable destinations. Pick the one that occurs the most frequently in
1476/// the list.
1477static BasicBlock *
1479 const SmallVectorImpl<std::pair<BasicBlock *,
1480 BasicBlock *>> &PredToDestList) {
1481 assert(!PredToDestList.empty());
1482
1483 // Determine popularity. If there are multiple possible destinations, we
1484 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1485 // blocks with known and real destinations to threading undef. We'll handle
1486 // them later if interesting.
1487 MapVector<BasicBlock *, unsigned> DestPopularity;
1488
1489 // Populate DestPopularity with the successors in the order they appear in the
1490 // successor list. This way, we ensure determinism by iterating it in the
1491 // same order in std::max_element below. We map nullptr to 0 so that we can
1492 // return nullptr when PredToDestList contains nullptr only.
1493 DestPopularity[nullptr] = 0;
1494 for (auto *SuccBB : successors(BB))
1495 DestPopularity[SuccBB] = 0;
1496
1497 for (const auto &PredToDest : PredToDestList)
1498 if (PredToDest.second)
1499 DestPopularity[PredToDest.second]++;
1500
1501 // Find the most popular dest.
1502 auto MostPopular = std::max_element(
1503 DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
1504
1505 // Okay, we have finally picked the most popular destination.
1506 return MostPopular->first;
1507}
1508
1509// Try to evaluate the value of V when the control flows from PredPredBB to
1510// BB->getSinglePredecessor() and then on to BB.
1512 BasicBlock *PredPredBB,
1513 Value *V) {
1514 BasicBlock *PredBB = BB->getSinglePredecessor();
1515 assert(PredBB && "Expected a single predecessor");
1516
1517 if (Constant *Cst = dyn_cast<Constant>(V)) {
1518 return Cst;
1519 }
1520
1521 // Consult LVI if V is not an instruction in BB or PredBB.
1522 Instruction *I = dyn_cast<Instruction>(V);
1523 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1524 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1525 }
1526
1527 // Look into a PHI argument.
1528 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1529 if (PHI->getParent() == PredBB)
1530 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1531 return nullptr;
1532 }
1533
1534 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1535 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1536 if (CondCmp->getParent() == BB) {
1537 Constant *Op0 =
1538 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1539 Constant *Op1 =
1540 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1541 if (Op0 && Op1) {
1542 return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1543 }
1544 }
1545 return nullptr;
1546 }
1547
1548 return nullptr;
1549}
1550
1552 ConstantPreference Preference,
1553 Instruction *CxtI) {
1554 // If threading this would thread across a loop header, don't even try to
1555 // thread the edge.
1556 if (LoopHeaders.count(BB))
1557 return false;
1558
1559 PredValueInfoTy PredValues;
1560 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1561 CxtI)) {
1562 // We don't have known values in predecessors. See if we can thread through
1563 // BB and its sole predecessor.
1565 }
1566
1567 assert(!PredValues.empty() &&
1568 "computeValueKnownInPredecessors returned true with no values");
1569
1570 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1571 for (const auto &PredValue : PredValues) {
1572 dbgs() << " BB '" << BB->getName()
1573 << "': FOUND condition = " << *PredValue.first
1574 << " for pred '" << PredValue.second->getName() << "'.\n";
1575 });
1576
1577 // Decide what we want to thread through. Convert our list of known values to
1578 // a list of known destinations for each pred. This also discards duplicate
1579 // predecessors and keeps track of the undefined inputs (which are represented
1580 // as a null dest in the PredToDestList).
1583
1584 BasicBlock *OnlyDest = nullptr;
1585 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1586 Constant *OnlyVal = nullptr;
1587 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1588
1589 for (const auto &PredValue : PredValues) {
1590 BasicBlock *Pred = PredValue.second;
1591 if (!SeenPreds.insert(Pred).second)
1592 continue; // Duplicate predecessor entry.
1593
1594 Constant *Val = PredValue.first;
1595
1596 BasicBlock *DestBB;
1597 if (isa<UndefValue>(Val))
1598 DestBB = nullptr;
1599 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1600 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1601 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1602 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1603 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1604 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1605 } else {
1606 assert(isa<IndirectBrInst>(BB->getTerminator())
1607 && "Unexpected terminator");
1608 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1609 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1610 }
1611
1612 // If we have exactly one destination, remember it for efficiency below.
1613 if (PredToDestList.empty()) {
1614 OnlyDest = DestBB;
1615 OnlyVal = Val;
1616 } else {
1617 if (OnlyDest != DestBB)
1618 OnlyDest = MultipleDestSentinel;
1619 // It possible we have same destination, but different value, e.g. default
1620 // case in switchinst.
1621 if (Val != OnlyVal)
1622 OnlyVal = MultipleVal;
1623 }
1624
1625 // If the predecessor ends with an indirect goto, we can't change its
1626 // destination.
1627 if (isa<IndirectBrInst>(Pred->getTerminator()))
1628 continue;
1629
1630 PredToDestList.emplace_back(Pred, DestBB);
1631 }
1632
1633 // If all edges were unthreadable, we fail.
1634 if (PredToDestList.empty())
1635 return false;
1636
1637 // If all the predecessors go to a single known successor, we want to fold,
1638 // not thread. By doing so, we do not need to duplicate the current block and
1639 // also miss potential opportunities in case we dont/cant duplicate.
1640 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1641 if (BB->hasNPredecessors(PredToDestList.size())) {
1642 bool SeenFirstBranchToOnlyDest = false;
1643 std::vector <DominatorTree::UpdateType> Updates;
1644 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1645 for (BasicBlock *SuccBB : successors(BB)) {
1646 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1647 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1648 } else {
1649 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1650 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1651 }
1652 }
1653
1654 // Finally update the terminator.
1655 Instruction *Term = BB->getTerminator();
1656 BranchInst::Create(OnlyDest, Term);
1657 ++NumFolds;
1658 Term->eraseFromParent();
1659 DTU->applyUpdatesPermissive(Updates);
1660 if (auto *BPI = getBPI())
1661 BPI->eraseBlock(BB);
1662
1663 // If the condition is now dead due to the removal of the old terminator,
1664 // erase it.
1665 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1666 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1667 CondInst->eraseFromParent();
1668 // We can safely replace *some* uses of the CondInst if it has
1669 // exactly one value as returned by LVI. RAUW is incorrect in the
1670 // presence of guards and assumes, that have the `Cond` as the use. This
1671 // is because we use the guards/assume to reason about the `Cond` value
1672 // at the end of block, but RAUW unconditionally replaces all uses
1673 // including the guards/assumes themselves and the uses before the
1674 // guard/assume.
1675 else if (OnlyVal && OnlyVal != MultipleVal)
1676 replaceFoldableUses(CondInst, OnlyVal, BB);
1677 }
1678 return true;
1679 }
1680 }
1681
1682 // Determine which is the most common successor. If we have many inputs and
1683 // this block is a switch, we want to start by threading the batch that goes
1684 // to the most popular destination first. If we only know about one
1685 // threadable destination (the common case) we can avoid this.
1686 BasicBlock *MostPopularDest = OnlyDest;
1687
1688 if (MostPopularDest == MultipleDestSentinel) {
1689 // Remove any loop headers from the Dest list, threadEdge conservatively
1690 // won't process them, but we might have other destination that are eligible
1691 // and we still want to process.
1692 erase_if(PredToDestList,
1693 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1694 return LoopHeaders.contains(PredToDest.second);
1695 });
1696
1697 if (PredToDestList.empty())
1698 return false;
1699
1700 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1701 }
1702
1703 // Now that we know what the most popular destination is, factor all
1704 // predecessors that will jump to it into a single predecessor.
1705 SmallVector<BasicBlock*, 16> PredsToFactor;
1706 for (const auto &PredToDest : PredToDestList)
1707 if (PredToDest.second == MostPopularDest) {
1708 BasicBlock *Pred = PredToDest.first;
1709
1710 // This predecessor may be a switch or something else that has multiple
1711 // edges to the block. Factor each of these edges by listing them
1712 // according to # occurrences in PredsToFactor.
1713 for (BasicBlock *Succ : successors(Pred))
1714 if (Succ == BB)
1715 PredsToFactor.push_back(Pred);
1716 }
1717
1718 // If the threadable edges are branching on an undefined value, we get to pick
1719 // the destination that these predecessors should get to.
1720 if (!MostPopularDest)
1721 MostPopularDest = BB->getTerminator()->
1722 getSuccessor(getBestDestForJumpOnUndef(BB));
1723
1724 // Ok, try to thread it!
1725 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1726}
1727
1728/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1729/// a PHI node (or freeze PHI) in the current block. See if there are any
1730/// simplifications we can do based on inputs to the phi node.
1732 BasicBlock *BB = PN->getParent();
1733
1734 // TODO: We could make use of this to do it once for blocks with common PHI
1735 // values.
1737 PredBBs.resize(1);
1738
1739 // If any of the predecessor blocks end in an unconditional branch, we can
1740 // *duplicate* the conditional branch into that block in order to further
1741 // encourage jump threading and to eliminate cases where we have branch on a
1742 // phi of an icmp (branch on icmp is much better).
1743 // This is still beneficial when a frozen phi is used as the branch condition
1744 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1745 // to br(icmp(freeze ...)).
1746 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1747 BasicBlock *PredBB = PN->getIncomingBlock(i);
1748 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1749 if (PredBr->isUnconditional()) {
1750 PredBBs[0] = PredBB;
1751 // Try to duplicate BB into PredBB.
1752 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1753 return true;
1754 }
1755 }
1756
1757 return false;
1758}
1759
1760/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1761/// a xor instruction in the current block. See if there are any
1762/// simplifications we can do based on inputs to the xor.
1764 BasicBlock *BB = BO->getParent();
1765
1766 // If either the LHS or RHS of the xor is a constant, don't do this
1767 // optimization.
1768 if (isa<ConstantInt>(BO->getOperand(0)) ||
1769 isa<ConstantInt>(BO->getOperand(1)))
1770 return false;
1771
1772 // If the first instruction in BB isn't a phi, we won't be able to infer
1773 // anything special about any particular predecessor.
1774 if (!isa<PHINode>(BB->front()))
1775 return false;
1776
1777 // If this BB is a landing pad, we won't be able to split the edge into it.
1778 if (BB->isEHPad())
1779 return false;
1780
1781 // If we have a xor as the branch input to this block, and we know that the
1782 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1783 // the condition into the predecessor and fix that value to true, saving some
1784 // logical ops on that path and encouraging other paths to simplify.
1785 //
1786 // This copies something like this:
1787 //
1788 // BB:
1789 // %X = phi i1 [1], [%X']
1790 // %Y = icmp eq i32 %A, %B
1791 // %Z = xor i1 %X, %Y
1792 // br i1 %Z, ...
1793 //
1794 // Into:
1795 // BB':
1796 // %Y = icmp ne i32 %A, %B
1797 // br i1 %Y, ...
1798
1799 PredValueInfoTy XorOpValues;
1800 bool isLHS = true;
1801 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1802 WantInteger, BO)) {
1803 assert(XorOpValues.empty());
1804 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1805 WantInteger, BO))
1806 return false;
1807 isLHS = false;
1808 }
1809
1810 assert(!XorOpValues.empty() &&
1811 "computeValueKnownInPredecessors returned true with no values");
1812
1813 // Scan the information to see which is most popular: true or false. The
1814 // predecessors can be of the set true, false, or undef.
1815 unsigned NumTrue = 0, NumFalse = 0;
1816 for (const auto &XorOpValue : XorOpValues) {
1817 if (isa<UndefValue>(XorOpValue.first))
1818 // Ignore undefs for the count.
1819 continue;
1820 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1821 ++NumFalse;
1822 else
1823 ++NumTrue;
1824 }
1825
1826 // Determine which value to split on, true, false, or undef if neither.
1827 ConstantInt *SplitVal = nullptr;
1828 if (NumTrue > NumFalse)
1829 SplitVal = ConstantInt::getTrue(BB->getContext());
1830 else if (NumTrue != 0 || NumFalse != 0)
1831 SplitVal = ConstantInt::getFalse(BB->getContext());
1832
1833 // Collect all of the blocks that this can be folded into so that we can
1834 // factor this once and clone it once.
1835 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1836 for (const auto &XorOpValue : XorOpValues) {
1837 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1838 continue;
1839
1840 BlocksToFoldInto.push_back(XorOpValue.second);
1841 }
1842
1843 // If we inferred a value for all of the predecessors, then duplication won't
1844 // help us. However, we can just replace the LHS or RHS with the constant.
1845 if (BlocksToFoldInto.size() ==
1846 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1847 if (!SplitVal) {
1848 // If all preds provide undef, just nuke the xor, because it is undef too.
1850 BO->eraseFromParent();
1851 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1852 // If all preds provide 0, replace the xor with the other input.
1853 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1854 BO->eraseFromParent();
1855 } else {
1856 // If all preds provide 1, set the computed value to 1.
1857 BO->setOperand(!isLHS, SplitVal);
1858 }
1859
1860 return true;
1861 }
1862
1863 // If any of predecessors end with an indirect goto, we can't change its
1864 // destination.
1865 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1866 return isa<IndirectBrInst>(Pred->getTerminator());
1867 }))
1868 return false;
1869
1870 // Try to duplicate BB into PredBB.
1871 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1872}
1873
1874/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1875/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1876/// NewPred using the entries from OldPred (suitably mapped).
1878 BasicBlock *OldPred,
1879 BasicBlock *NewPred,
1881 for (PHINode &PN : PHIBB->phis()) {
1882 // Ok, we have a PHI node. Figure out what the incoming value was for the
1883 // DestBlock.
1884 Value *IV = PN.getIncomingValueForBlock(OldPred);
1885
1886 // Remap the value if necessary.
1887 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1889 if (I != ValueMap.end())
1890 IV = I->second;
1891 }
1892
1893 PN.addIncoming(IV, NewPred);
1894 }
1895}
1896
1897/// Merge basic block BB into its sole predecessor if possible.
1899 BasicBlock *SinglePred = BB->getSinglePredecessor();
1900 if (!SinglePred)
1901 return false;
1902
1903 const Instruction *TI = SinglePred->getTerminator();
1904 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1905 SinglePred == BB || hasAddressTakenAndUsed(BB))
1906 return false;
1907
1908 // If SinglePred was a loop header, BB becomes one.
1909 if (LoopHeaders.erase(SinglePred))
1910 LoopHeaders.insert(BB);
1911
1912 LVI->eraseBlock(SinglePred);
1913 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1914
1915 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1916 // BB code within one basic block `BB`), we need to invalidate the LVI
1917 // information associated with BB, because the LVI information need not be
1918 // true for all of BB after the merge. For example,
1919 // Before the merge, LVI info and code is as follows:
1920 // SinglePred: <LVI info1 for %p val>
1921 // %y = use of %p
1922 // call @exit() // need not transfer execution to successor.
1923 // assume(%p) // from this point on %p is true
1924 // br label %BB
1925 // BB: <LVI info2 for %p val, i.e. %p is true>
1926 // %x = use of %p
1927 // br label exit
1928 //
1929 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1930 // (info2 and info1 respectively). After the merge and the deletion of the
1931 // LVI info1 for SinglePred. We have the following code:
1932 // BB: <LVI info2 for %p val>
1933 // %y = use of %p
1934 // call @exit()
1935 // assume(%p)
1936 // %x = use of %p <-- LVI info2 is correct from here onwards.
1937 // br label exit
1938 // LVI info2 for BB is incorrect at the beginning of BB.
1939
1940 // Invalidate LVI information for BB if the LVI is not provably true for
1941 // all of BB.
1943 LVI->eraseBlock(BB);
1944 return true;
1945}
1946
1947/// Update the SSA form. NewBB contains instructions that are copied from BB.
1948/// ValueMapping maps old values in BB to new ones in NewBB.
1950 BasicBlock *BB, BasicBlock *NewBB,
1951 DenseMap<Instruction *, Value *> &ValueMapping) {
1952 // If there were values defined in BB that are used outside the block, then we
1953 // now have to update all uses of the value to use either the original value,
1954 // the cloned value, or some PHI derived value. This can require arbitrary
1955 // PHI insertion, of which we are prepared to do, clean these up now.
1956 SSAUpdater SSAUpdate;
1957 SmallVector<Use *, 16> UsesToRename;
1959
1960 for (Instruction &I : *BB) {
1961 // Scan all uses of this instruction to see if it is used outside of its
1962 // block, and if so, record them in UsesToRename.
1963 for (Use &U : I.uses()) {
1964 Instruction *User = cast<Instruction>(U.getUser());
1965 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1966 if (UserPN->getIncomingBlock(U) == BB)
1967 continue;
1968 } else if (User->getParent() == BB)
1969 continue;
1970
1971 UsesToRename.push_back(&U);
1972 }
1973
1974 // Find debug values outside of the block
1975 findDbgValues(DbgValues, &I);
1976 DbgValues.erase(remove_if(DbgValues,
1977 [&](const DbgValueInst *DbgVal) {
1978 return DbgVal->getParent() == BB;
1979 }),
1980 DbgValues.end());
1981
1982 // If there are no uses outside the block, we're done with this instruction.
1983 if (UsesToRename.empty() && DbgValues.empty())
1984 continue;
1985 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1986
1987 // We found a use of I outside of BB. Rename all uses of I that are outside
1988 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1989 // with the two values we know.
1990 SSAUpdate.Initialize(I.getType(), I.getName());
1991 SSAUpdate.AddAvailableValue(BB, &I);
1992 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1993
1994 while (!UsesToRename.empty())
1995 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1996 if (!DbgValues.empty()) {
1997 SSAUpdate.UpdateDebugValues(&I, DbgValues);
1998 DbgValues.clear();
1999 }
2000
2001 LLVM_DEBUG(dbgs() << "\n");
2002 }
2003}
2004
2005/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2006/// arguments that come from PredBB. Return the map from the variables in the
2007/// source basic block to the variables in the newly created basic block.
2011 BasicBlock *PredBB) {
2012 // We are going to have to map operands from the source basic block to the new
2013 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2014 // block, evaluate them to account for entry from PredBB.
2016
2017 // Retargets llvm.dbg.value to any renamed variables.
2018 auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
2019 auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2020 if (!DbgInstruction)
2021 return false;
2022
2023 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2024 for (auto DbgOperand : DbgInstruction->location_ops()) {
2025 auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2026 if (!DbgOperandInstruction)
2027 continue;
2028
2029 auto I = ValueMapping.find(DbgOperandInstruction);
2030 if (I != ValueMapping.end()) {
2031 OperandsToRemap.insert(
2032 std::pair<Value *, Value *>(DbgOperand, I->second));
2033 }
2034 }
2035
2036 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2037 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2038 return true;
2039 };
2040
2041 // Clone the phi nodes of the source basic block into NewBB. The resulting
2042 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2043 // might need to rewrite the operand of the cloned phi.
2044 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2045 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2046 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2047 ValueMapping[PN] = NewPN;
2048 }
2049
2050 // Clone noalias scope declarations in the threaded block. When threading a
2051 // loop exit, we would otherwise end up with two idential scope declarations
2052 // visible at the same time.
2053 SmallVector<MDNode *> NoAliasScopes;
2054 DenseMap<MDNode *, MDNode *> ClonedScopes;
2055 LLVMContext &Context = PredBB->getContext();
2056 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2057 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2058
2059 // Clone the non-phi instructions of the source basic block into NewBB,
2060 // keeping track of the mapping and using it to remap operands in the cloned
2061 // instructions.
2062 for (; BI != BE; ++BI) {
2063 Instruction *New = BI->clone();
2064 New->setName(BI->getName());
2065 New->insertInto(NewBB, NewBB->end());
2066 ValueMapping[&*BI] = New;
2067 adaptNoAliasScopes(New, ClonedScopes, Context);
2068
2069 if (RetargetDbgValueIfPossible(New))
2070 continue;
2071
2072 // Remap operands to patch up intra-block references.
2073 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2074 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2076 if (I != ValueMapping.end())
2077 New->setOperand(i, I->second);
2078 }
2079 }
2080
2081 return ValueMapping;
2082}
2083
2084/// Attempt to thread through two successive basic blocks.
2086 Value *Cond) {
2087 // Consider:
2088 //
2089 // PredBB:
2090 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2091 // %tobool = icmp eq i32 %cond, 0
2092 // br i1 %tobool, label %BB, label ...
2093 //
2094 // BB:
2095 // %cmp = icmp eq i32* %var, null
2096 // br i1 %cmp, label ..., label ...
2097 //
2098 // We don't know the value of %var at BB even if we know which incoming edge
2099 // we take to BB. However, once we duplicate PredBB for each of its incoming
2100 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2101 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2102
2103 // Require that BB end with a Branch for simplicity.
2104 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2105 if (!CondBr)
2106 return false;
2107
2108 // BB must have exactly one predecessor.
2109 BasicBlock *PredBB = BB->getSinglePredecessor();
2110 if (!PredBB)
2111 return false;
2112
2113 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2114 // unconditional branch, we should be merging PredBB and BB instead. For
2115 // simplicity, we don't deal with a switch.
2116 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2117 if (!PredBBBranch || PredBBBranch->isUnconditional())
2118 return false;
2119
2120 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2121 // PredBB.
2122 if (PredBB->getSinglePredecessor())
2123 return false;
2124
2125 // Don't thread through PredBB if it contains a successor edge to itself, in
2126 // which case we would infinite loop. Suppose we are threading an edge from
2127 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2128 // successor edge to itself. If we allowed jump threading in this case, we
2129 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2130 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2131 // with another jump threading opportunity from PredBB.thread through PredBB
2132 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2133 // would keep peeling one iteration from PredBB.
2134 if (llvm::is_contained(successors(PredBB), PredBB))
2135 return false;
2136
2137 // Don't thread across a loop header.
2138 if (LoopHeaders.count(PredBB))
2139 return false;
2140
2141 // Avoid complication with duplicating EH pads.
2142 if (PredBB->isEHPad())
2143 return false;
2144
2145 // Find a predecessor that we can thread. For simplicity, we only consider a
2146 // successor edge out of BB to which we thread exactly one incoming edge into
2147 // PredBB.
2148 unsigned ZeroCount = 0;
2149 unsigned OneCount = 0;
2150 BasicBlock *ZeroPred = nullptr;
2151 BasicBlock *OnePred = nullptr;
2152 for (BasicBlock *P : predecessors(PredBB)) {
2153 // If PredPred ends with IndirectBrInst, we can't handle it.
2154 if (isa<IndirectBrInst>(P->getTerminator()))
2155 continue;
2156 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2158 if (CI->isZero()) {
2159 ZeroCount++;
2160 ZeroPred = P;
2161 } else if (CI->isOne()) {
2162 OneCount++;
2163 OnePred = P;
2164 }
2165 }
2166 }
2167
2168 // Disregard complicated cases where we have to thread multiple edges.
2169 BasicBlock *PredPredBB;
2170 if (ZeroCount == 1) {
2171 PredPredBB = ZeroPred;
2172 } else if (OneCount == 1) {
2173 PredPredBB = OnePred;
2174 } else {
2175 return false;
2176 }
2177
2178 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2179
2180 // If threading to the same block as we come from, we would infinite loop.
2181 if (SuccBB == BB) {
2182 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2183 << "' - would thread to self!\n");
2184 return false;
2185 }
2186
2187 // If threading this would thread across a loop header, don't thread the edge.
2188 // See the comments above findLoopHeaders for justifications and caveats.
2189 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2190 LLVM_DEBUG({
2191 bool BBIsHeader = LoopHeaders.count(BB);
2192 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2193 dbgs() << " Not threading across "
2194 << (BBIsHeader ? "loop header BB '" : "block BB '")
2195 << BB->getName() << "' to dest "
2196 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2197 << SuccBB->getName()
2198 << "' - it might create an irreducible loop!\n";
2199 });
2200 return false;
2201 }
2202
2203 // Compute the cost of duplicating BB and PredBB.
2204 unsigned BBCost = getJumpThreadDuplicationCost(
2205 TTI, BB, BB->getTerminator(), BBDupThreshold);
2206 unsigned PredBBCost = getJumpThreadDuplicationCost(
2207 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2208
2209 // Give up if costs are too high. We need to check BBCost and PredBBCost
2210 // individually before checking their sum because getJumpThreadDuplicationCost
2211 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2212 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2213 BBCost + PredBBCost > BBDupThreshold) {
2214 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2215 << "' - Cost is too high: " << PredBBCost
2216 << " for PredBB, " << BBCost << "for BB\n");
2217 return false;
2218 }
2219
2220 // Now we are ready to duplicate PredBB.
2221 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2222 return true;
2223}
2224
2226 BasicBlock *PredBB,
2227 BasicBlock *BB,
2228 BasicBlock *SuccBB) {
2229 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2230 << BB->getName() << "'\n");
2231
2232 // Build BPI/BFI before any changes are made to IR.
2233 bool HasProfile = doesBlockHaveProfileData(BB);
2234 auto *BFI = getOrCreateBFI(HasProfile);
2235 auto *BPI = getOrCreateBPI(BFI != nullptr);
2236
2237 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2238 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2239
2240 BasicBlock *NewBB =
2241 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2242 PredBB->getParent(), PredBB);
2243 NewBB->moveAfter(PredBB);
2244
2245 // Set the block frequency of NewBB.
2246 if (BFI) {
2247 assert(BPI && "It's expected BPI to exist along with BFI");
2248 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2249 BPI->getEdgeProbability(PredPredBB, PredBB);
2250 BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2251 }
2252
2253 // We are going to have to map operands from the original BB block to the new
2254 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2255 // to account for entry from PredPredBB.
2257 cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2258
2259 // Copy the edge probabilities from PredBB to NewBB.
2260 if (BPI)
2261 BPI->copyEdgeProbabilities(PredBB, NewBB);
2262
2263 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2264 // This eliminates predecessors from PredPredBB, which requires us to simplify
2265 // any PHI nodes in PredBB.
2266 Instruction *PredPredTerm = PredPredBB->getTerminator();
2267 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2268 if (PredPredTerm->getSuccessor(i) == PredBB) {
2269 PredBB->removePredecessor(PredPredBB, true);
2270 PredPredTerm->setSuccessor(i, NewBB);
2271 }
2272
2273 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2274 ValueMapping);
2275 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2276 ValueMapping);
2277
2278 DTU->applyUpdatesPermissive(
2279 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2280 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2281 {DominatorTree::Insert, PredPredBB, NewBB},
2282 {DominatorTree::Delete, PredPredBB, PredBB}});
2283
2284 updateSSA(PredBB, NewBB, ValueMapping);
2285
2286 // Clean up things like PHI nodes with single operands, dead instructions,
2287 // etc.
2288 SimplifyInstructionsInBlock(NewBB, TLI);
2289 SimplifyInstructionsInBlock(PredBB, TLI);
2290
2291 SmallVector<BasicBlock *, 1> PredsToFactor;
2292 PredsToFactor.push_back(NewBB);
2293 threadEdge(BB, PredsToFactor, SuccBB);
2294}
2295
2296/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2298 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2299 BasicBlock *SuccBB) {
2300 // If threading to the same block as we come from, we would infinite loop.
2301 if (SuccBB == BB) {
2302 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2303 << "' - would thread to self!\n");
2304 return false;
2305 }
2306
2307 // If threading this would thread across a loop header, don't thread the edge.
2308 // See the comments above findLoopHeaders for justifications and caveats.
2309 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2310 LLVM_DEBUG({
2311 bool BBIsHeader = LoopHeaders.count(BB);
2312 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2313 dbgs() << " Not threading across "
2314 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2315 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2316 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2317 });
2318 return false;
2319 }
2320
2321 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2322 TTI, BB, BB->getTerminator(), BBDupThreshold);
2323 if (JumpThreadCost > BBDupThreshold) {
2324 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2325 << "' - Cost is too high: " << JumpThreadCost << "\n");
2326 return false;
2327 }
2328
2329 threadEdge(BB, PredBBs, SuccBB);
2330 return true;
2331}
2332
2333/// threadEdge - We have decided that it is safe and profitable to factor the
2334/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2335/// across BB. Transform the IR to reflect this change.
2337 const SmallVectorImpl<BasicBlock *> &PredBBs,
2338 BasicBlock *SuccBB) {
2339 assert(SuccBB != BB && "Don't create an infinite loop");
2340
2341 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2342 "Don't thread across loop headers");
2343
2344 // Build BPI/BFI before any changes are made to IR.
2345 bool HasProfile = doesBlockHaveProfileData(BB);
2346 auto *BFI = getOrCreateBFI(HasProfile);
2347 auto *BPI = getOrCreateBPI(BFI != nullptr);
2348
2349 // And finally, do it! Start by factoring the predecessors if needed.
2350 BasicBlock *PredBB;
2351 if (PredBBs.size() == 1)
2352 PredBB = PredBBs[0];
2353 else {
2354 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2355 << " common predecessors.\n");
2356 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2357 }
2358
2359 // And finally, do it!
2360 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2361 << "' to '" << SuccBB->getName()
2362 << ", across block:\n " << *BB << "\n");
2363
2364 LVI->threadEdge(PredBB, BB, SuccBB);
2365
2367 BB->getName()+".thread",
2368 BB->getParent(), BB);
2369 NewBB->moveAfter(PredBB);
2370
2371 // Set the block frequency of NewBB.
2372 if (BFI) {
2373 assert(BPI && "It's expected BPI to exist along with BFI");
2374 auto NewBBFreq =
2375 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2376 BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2377 }
2378
2379 // Copy all the instructions from BB to NewBB except the terminator.
2381 cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
2382
2383 // We didn't copy the terminator from BB over to NewBB, because there is now
2384 // an unconditional jump to SuccBB. Insert the unconditional jump.
2385 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2386 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2387
2388 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2389 // PHI nodes for NewBB now.
2390 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2391
2392 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2393 // eliminates predecessors from BB, which requires us to simplify any PHI
2394 // nodes in BB.
2395 Instruction *PredTerm = PredBB->getTerminator();
2396 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2397 if (PredTerm->getSuccessor(i) == BB) {
2398 BB->removePredecessor(PredBB, true);
2399 PredTerm->setSuccessor(i, NewBB);
2400 }
2401
2402 // Enqueue required DT updates.
2403 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2404 {DominatorTree::Insert, PredBB, NewBB},
2405 {DominatorTree::Delete, PredBB, BB}});
2406
2407 updateSSA(BB, NewBB, ValueMapping);
2408
2409 // At this point, the IR is fully up to date and consistent. Do a quick scan
2410 // over the new instructions and zap any that are constants or dead. This
2411 // frequently happens because of phi translation.
2412 SimplifyInstructionsInBlock(NewBB, TLI);
2413
2414 // Update the edge weight from BB to SuccBB, which should be less than before.
2415 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2416
2417 // Threaded an edge!
2418 ++NumThreads;
2419}
2420
2421/// Create a new basic block that will be the predecessor of BB and successor of
2422/// all blocks in Preds. When profile data is available, update the frequency of
2423/// this new block.
2424BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2426 const char *Suffix) {
2428
2429 // Collect the frequencies of all predecessors of BB, which will be used to
2430 // update the edge weight of the result of splitting predecessors.
2432 auto *BFI = getBFI();
2433 if (BFI) {
2434 auto *BPI = getOrCreateBPI(true);
2435 for (auto *Pred : Preds)
2436 FreqMap.insert(std::make_pair(
2437 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2438 }
2439
2440 // In the case when BB is a LandingPad block we create 2 new predecessors
2441 // instead of just one.
2442 if (BB->isLandingPad()) {
2443 std::string NewName = std::string(Suffix) + ".split-lp";
2444 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2445 } else {
2446 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2447 }
2448
2449 std::vector<DominatorTree::UpdateType> Updates;
2450 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2451 for (auto *NewBB : NewBBs) {
2452 BlockFrequency NewBBFreq(0);
2453 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2454 for (auto *Pred : predecessors(NewBB)) {
2455 Updates.push_back({DominatorTree::Delete, Pred, BB});
2456 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2457 if (BFI) // Update frequencies between Pred -> NewBB.
2458 NewBBFreq += FreqMap.lookup(Pred);
2459 }
2460 if (BFI) // Apply the summed frequency to NewBB.
2461 BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2462 }
2463
2464 DTU->applyUpdatesPermissive(Updates);
2465 return NewBBs[0];
2466}
2467
2468bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2469 const Instruction *TI = BB->getTerminator();
2470 if (!TI || TI->getNumSuccessors() < 2)
2471 return false;
2472
2473 return hasValidBranchWeightMD(*TI);
2474}
2475
2476/// Update the block frequency of BB and branch weight and the metadata on the
2477/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2478/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2479void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2480 BasicBlock *BB,
2481 BasicBlock *NewBB,
2482 BasicBlock *SuccBB,
2483 BlockFrequencyInfo *BFI,
2485 bool HasProfile) {
2486 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2487 "Both BFI & BPI should either be set or unset");
2488
2489 if (!BFI) {
2490 assert(!HasProfile &&
2491 "It's expected to have BFI/BPI when profile info exists");
2492 return;
2493 }
2494
2495 // As the edge from PredBB to BB is deleted, we have to update the block
2496 // frequency of BB.
2497 auto BBOrigFreq = BFI->getBlockFreq(BB);
2498 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2499 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2500 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2501 BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
2502
2503 // Collect updated outgoing edges' frequencies from BB and use them to update
2504 // edge probabilities.
2505 SmallVector<uint64_t, 4> BBSuccFreq;
2506 for (BasicBlock *Succ : successors(BB)) {
2507 auto SuccFreq = (Succ == SuccBB)
2508 ? BB2SuccBBFreq - NewBBFreq
2509 : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2510 BBSuccFreq.push_back(SuccFreq.getFrequency());
2511 }
2512
2513 uint64_t MaxBBSuccFreq =
2514 *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2515
2517 if (MaxBBSuccFreq == 0)
2518 BBSuccProbs.assign(BBSuccFreq.size(),
2519 {1, static_cast<unsigned>(BBSuccFreq.size())});
2520 else {
2521 for (uint64_t Freq : BBSuccFreq)
2522 BBSuccProbs.push_back(
2523 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2524 // Normalize edge probabilities so that they sum up to one.
2526 BBSuccProbs.end());
2527 }
2528
2529 // Update edge probabilities in BPI.
2530 BPI->setEdgeProbability(BB, BBSuccProbs);
2531
2532 // Update the profile metadata as well.
2533 //
2534 // Don't do this if the profile of the transformed blocks was statically
2535 // estimated. (This could occur despite the function having an entry
2536 // frequency in completely cold parts of the CFG.)
2537 //
2538 // In this case we don't want to suggest to subsequent passes that the
2539 // calculated weights are fully consistent. Consider this graph:
2540 //
2541 // check_1
2542 // 50% / |
2543 // eq_1 | 50%
2544 // \ |
2545 // check_2
2546 // 50% / |
2547 // eq_2 | 50%
2548 // \ |
2549 // check_3
2550 // 50% / |
2551 // eq_3 | 50%
2552 // \ |
2553 //
2554 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2555 // the overall probabilities are inconsistent; the total probability that the
2556 // value is either 1, 2 or 3 is 150%.
2557 //
2558 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2559 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2560 // the loop exit edge. Then based solely on static estimation we would assume
2561 // the loop was extremely hot.
2562 //
2563 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2564 // shouldn't make edges extremely likely or unlikely based solely on static
2565 // estimation.
2566 if (BBSuccProbs.size() >= 2 && HasProfile) {
2568 for (auto Prob : BBSuccProbs)
2569 Weights.push_back(Prob.getNumerator());
2570
2571 auto TI = BB->getTerminator();
2572 TI->setMetadata(
2573 LLVMContext::MD_prof,
2575 }
2576}
2577
2578/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2579/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2580/// If we can duplicate the contents of BB up into PredBB do so now, this
2581/// improves the odds that the branch will be on an analyzable instruction like
2582/// a compare.
2584 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2585 assert(!PredBBs.empty() && "Can't handle an empty set");
2586
2587 // If BB is a loop header, then duplicating this block outside the loop would
2588 // cause us to transform this into an irreducible loop, don't do this.
2589 // See the comments above findLoopHeaders for justifications and caveats.
2590 if (LoopHeaders.count(BB)) {
2591 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2592 << "' into predecessor block '" << PredBBs[0]->getName()
2593 << "' - it might create an irreducible loop!\n");
2594 return false;
2595 }
2596
2597 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2598 TTI, BB, BB->getTerminator(), BBDupThreshold);
2599 if (DuplicationCost > BBDupThreshold) {
2600 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2601 << "' - Cost is too high: " << DuplicationCost << "\n");
2602 return false;
2603 }
2604
2605 // And finally, do it! Start by factoring the predecessors if needed.
2606 std::vector<DominatorTree::UpdateType> Updates;
2607 BasicBlock *PredBB;
2608 if (PredBBs.size() == 1)
2609 PredBB = PredBBs[0];
2610 else {
2611 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2612 << " common predecessors.\n");
2613 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2614 }
2615 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2616
2617 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2618 // of PredBB.
2619 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2620 << "' into end of '" << PredBB->getName()
2621 << "' to eliminate branch on phi. Cost: "
2622 << DuplicationCost << " block is:" << *BB << "\n");
2623
2624 // Unless PredBB ends with an unconditional branch, split the edge so that we
2625 // can just clone the bits from BB into the end of the new PredBB.
2626 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2627
2628 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2629 BasicBlock *OldPredBB = PredBB;
2630 PredBB = SplitEdge(OldPredBB, BB);
2631 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2632 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2633 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2634 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2635 }
2636
2637 // We are going to have to map operands from the original BB block into the
2638 // PredBB block. Evaluate PHI nodes in BB.
2639 DenseMap<Instruction*, Value*> ValueMapping;
2640
2641 BasicBlock::iterator BI = BB->begin();
2642 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2643 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2644 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2645 // mapping and using it to remap operands in the cloned instructions.
2646 for (; BI != BB->end(); ++BI) {
2647 Instruction *New = BI->clone();
2648 New->insertInto(PredBB, OldPredBranch->getIterator());
2649
2650 // Remap operands to patch up intra-block references.
2651 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2652 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2653 DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2654 if (I != ValueMapping.end())
2655 New->setOperand(i, I->second);
2656 }
2657
2658 // If this instruction can be simplified after the operands are updated,
2659 // just use the simplified value instead. This frequently happens due to
2660 // phi translation.
2662 New,
2663 {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2664 ValueMapping[&*BI] = IV;
2665 if (!New->mayHaveSideEffects()) {
2666 New->eraseFromParent();
2667 New = nullptr;
2668 }
2669 } else {
2670 ValueMapping[&*BI] = New;
2671 }
2672 if (New) {
2673 // Otherwise, insert the new instruction into the block.
2674 New->setName(BI->getName());
2675 // Update Dominance from simplified New instruction operands.
2676 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2677 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2678 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2679 }
2680 }
2681
2682 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2683 // add entries to the PHI nodes for branch from PredBB now.
2684 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2685 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2686 ValueMapping);
2687 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2688 ValueMapping);
2689
2690 updateSSA(BB, PredBB, ValueMapping);
2691
2692 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2693 // that we nuked.
2694 BB->removePredecessor(PredBB, true);
2695
2696 // Remove the unconditional branch at the end of the PredBB block.
2697 OldPredBranch->eraseFromParent();
2698 if (auto *BPI = getBPI())
2699 BPI->copyEdgeProbabilities(BB, PredBB);
2700 DTU->applyUpdatesPermissive(Updates);
2701
2702 ++NumDupes;
2703 return true;
2704}
2705
2706// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2707// a Select instruction in Pred. BB has other predecessors and SI is used in
2708// a PHI node in BB. SI has no other use.
2709// A new basic block, NewBB, is created and SI is converted to compare and
2710// conditional branch. SI is erased from parent.
2712 SelectInst *SI, PHINode *SIUse,
2713 unsigned Idx) {
2714 // Expand the select.
2715 //
2716 // Pred --
2717 // | v
2718 // | NewBB
2719 // | |
2720 // |-----
2721 // v
2722 // BB
2723 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2724 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2725 BB->getParent(), BB);
2726 // Move the unconditional branch to NewBB.
2727 PredTerm->removeFromParent();
2728 PredTerm->insertInto(NewBB, NewBB->end());
2729 // Create a conditional branch and update PHI nodes.
2730 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2731 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2732 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2733 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2734 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2735
2736 uint64_t TrueWeight = 1;
2737 uint64_t FalseWeight = 1;
2738 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2739 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2740 (TrueWeight + FalseWeight) != 0) {
2743 TrueWeight, TrueWeight + FalseWeight));
2745 FalseWeight, TrueWeight + FalseWeight));
2746 // Update BPI if exists.
2747 if (auto *BPI = getBPI())
2748 BPI->setEdgeProbability(Pred, BP);
2749 }
2750 // Set the block frequency of NewBB.
2751 if (auto *BFI = getBFI()) {
2752 if ((TrueWeight + FalseWeight) == 0) {
2753 TrueWeight = 1;
2754 FalseWeight = 1;
2755 }
2757 TrueWeight, TrueWeight + FalseWeight);
2758 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2759 BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2760 }
2761
2762 // The select is now dead.
2763 SI->eraseFromParent();
2764 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2765 {DominatorTree::Insert, Pred, NewBB}});
2766
2767 // Update any other PHI nodes in BB.
2768 for (BasicBlock::iterator BI = BB->begin();
2769 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2770 if (Phi != SIUse)
2771 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2772}
2773
2775 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2776
2777 if (!CondPHI || CondPHI->getParent() != BB)
2778 return false;
2779
2780 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2781 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2782 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2783
2784 // The second and third condition can be potentially relaxed. Currently
2785 // the conditions help to simplify the code and allow us to reuse existing
2786 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2787 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2788 continue;
2789
2790 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2791 if (!PredTerm || !PredTerm->isUnconditional())
2792 continue;
2793
2794 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2795 return true;
2796 }
2797 return false;
2798}
2799
2800/// tryToUnfoldSelect - Look for blocks of the form
2801/// bb1:
2802/// %a = select
2803/// br bb2
2804///
2805/// bb2:
2806/// %p = phi [%a, %bb1] ...
2807/// %c = icmp %p
2808/// br i1 %c
2809///
2810/// And expand the select into a branch structure if one of its arms allows %c
2811/// to be folded. This later enables threading from bb1 over bb2.
2813 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2814 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2815 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2816
2817 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2818 CondLHS->getParent() != BB)
2819 return false;
2820
2821 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2822 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2823 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2824
2825 // Look if one of the incoming values is a select in the corresponding
2826 // predecessor.
2827 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2828 continue;
2829
2830 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2831 if (!PredTerm || !PredTerm->isUnconditional())
2832 continue;
2833
2834 // Now check if one of the select values would allow us to constant fold the
2835 // terminator in BB. We don't do the transform if both sides fold, those
2836 // cases will be threaded in any case.
2837 LazyValueInfo::Tristate LHSFolds =
2838 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2839 CondRHS, Pred, BB, CondCmp);
2840 LazyValueInfo::Tristate RHSFolds =
2841 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2842 CondRHS, Pred, BB, CondCmp);
2843 if ((LHSFolds != LazyValueInfo::Unknown ||
2844 RHSFolds != LazyValueInfo::Unknown) &&
2845 LHSFolds != RHSFolds) {
2846 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2847 return true;
2848 }
2849 }
2850 return false;
2851}
2852
2853/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2854/// same BB in the form
2855/// bb:
2856/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2857/// %s = select %p, trueval, falseval
2858///
2859/// or
2860///
2861/// bb:
2862/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2863/// %c = cmp %p, 0
2864/// %s = select %c, trueval, falseval
2865///
2866/// And expand the select into a branch structure. This later enables
2867/// jump-threading over bb in this pass.
2868///
2869/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2870/// select if the associated PHI has at least one constant. If the unfolded
2871/// select is not jump-threaded, it will be folded again in the later
2872/// optimizations.
2874 // This transform would reduce the quality of msan diagnostics.
2875 // Disable this transform under MemorySanitizer.
2876 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2877 return false;
2878
2879 // If threading this would thread across a loop header, don't thread the edge.
2880 // See the comments above findLoopHeaders for justifications and caveats.
2881 if (LoopHeaders.count(BB))
2882 return false;
2883
2884 for (BasicBlock::iterator BI = BB->begin();
2885 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2886 // Look for a Phi having at least one constant incoming value.
2887 if (llvm::all_of(PN->incoming_values(),
2888 [](Value *V) { return !isa<ConstantInt>(V); }))
2889 continue;
2890
2891 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2892 using namespace PatternMatch;
2893
2894 // Check if SI is in BB and use V as condition.
2895 if (SI->getParent() != BB)
2896 return false;
2897 Value *Cond = SI->getCondition();
2898 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2899 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2900 };
2901
2902 SelectInst *SI = nullptr;
2903 for (Use &U : PN->uses()) {
2904 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2905 // Look for a ICmp in BB that compares PN with a constant and is the
2906 // condition of a Select.
2907 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2908 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2909 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2910 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2911 SI = SelectI;
2912 break;
2913 }
2914 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2915 // Look for a Select in BB that uses PN as condition.
2916 if (isUnfoldCandidate(SelectI, U.get())) {
2917 SI = SelectI;
2918 break;
2919 }
2920 }
2921 }
2922
2923 if (!SI)
2924 continue;
2925 // Expand the select.
2926 Value *Cond = SI->getCondition();
2927 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2928 Cond = new FreezeInst(Cond, "cond.fr", SI);
2929 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2930 Instruction *Term =
2931 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2932 BasicBlock *SplitBB = SI->getParent();
2933 BasicBlock *NewBB = Term->getParent();
2934 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2935 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2936 NewPN->addIncoming(SI->getFalseValue(), BB);
2937 SI->replaceAllUsesWith(NewPN);
2938 SI->eraseFromParent();
2939 // NewBB and SplitBB are newly created blocks which require insertion.
2940 std::vector<DominatorTree::UpdateType> Updates;
2941 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2942 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2943 Updates.push_back({DominatorTree::Insert, BB, NewBB});
2944 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2945 // BB's successors were moved to SplitBB, update DTU accordingly.
2946 for (auto *Succ : successors(SplitBB)) {
2947 Updates.push_back({DominatorTree::Delete, BB, Succ});
2948 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2949 }
2950 DTU->applyUpdatesPermissive(Updates);
2951 return true;
2952 }
2953 return false;
2954}
2955
2956/// Try to propagate a guard from the current BB into one of its predecessors
2957/// in case if another branch of execution implies that the condition of this
2958/// guard is always true. Currently we only process the simplest case that
2959/// looks like:
2960///
2961/// Start:
2962/// %cond = ...
2963/// br i1 %cond, label %T1, label %F1
2964/// T1:
2965/// br label %Merge
2966/// F1:
2967/// br label %Merge
2968/// Merge:
2969/// %condGuard = ...
2970/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
2971///
2972/// And cond either implies condGuard or !condGuard. In this case all the
2973/// instructions before the guard can be duplicated in both branches, and the
2974/// guard is then threaded to one of them.
2976 using namespace PatternMatch;
2977
2978 // We only want to deal with two predecessors.
2979 BasicBlock *Pred1, *Pred2;
2980 auto PI = pred_begin(BB), PE = pred_end(BB);
2981 if (PI == PE)
2982 return false;
2983 Pred1 = *PI++;
2984 if (PI == PE)
2985 return false;
2986 Pred2 = *PI++;
2987 if (PI != PE)
2988 return false;
2989 if (Pred1 == Pred2)
2990 return false;
2991
2992 // Try to thread one of the guards of the block.
2993 // TODO: Look up deeper than to immediate predecessor?
2994 auto *Parent = Pred1->getSinglePredecessor();
2995 if (!Parent || Parent != Pred2->getSinglePredecessor())
2996 return false;
2997
2998 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
2999 for (auto &I : *BB)
3000 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3001 return true;
3002
3003 return false;
3004}
3005
3006/// Try to propagate the guard from BB which is the lower block of a diamond
3007/// to one of its branches, in case if diamond's condition implies guard's
3008/// condition.
3010 BranchInst *BI) {
3011 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3012 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3013 Value *GuardCond = Guard->getArgOperand(0);
3014 Value *BranchCond = BI->getCondition();
3015 BasicBlock *TrueDest = BI->getSuccessor(0);
3016 BasicBlock *FalseDest = BI->getSuccessor(1);
3017
3018 auto &DL = BB->getModule()->getDataLayout();
3019 bool TrueDestIsSafe = false;
3020 bool FalseDestIsSafe = false;
3021
3022 // True dest is safe if BranchCond => GuardCond.
3023 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3024 if (Impl && *Impl)
3025 TrueDestIsSafe = true;
3026 else {
3027 // False dest is safe if !BranchCond => GuardCond.
3028 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3029 if (Impl && *Impl)
3030 FalseDestIsSafe = true;
3031 }
3032
3033 if (!TrueDestIsSafe && !FalseDestIsSafe)
3034 return false;
3035
3036 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3037 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3038
3039 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3040 Instruction *AfterGuard = Guard->getNextNode();
3041 unsigned Cost =
3042 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3043 if (Cost > BBDupThreshold)
3044 return false;
3045 // Duplicate all instructions before the guard and the guard itself to the
3046 // branch where implication is not proved.
3048 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3049 assert(GuardedBlock && "Could not create the guarded block?");
3050 // Duplicate all instructions before the guard in the unguarded branch.
3051 // Since we have successfully duplicated the guarded block and this block
3052 // has fewer instructions, we expect it to succeed.
3054 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3055 assert(UnguardedBlock && "Could not create the unguarded block?");
3056 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3057 << GuardedBlock->getName() << "\n");
3058 // Some instructions before the guard may still have uses. For them, we need
3059 // to create Phi nodes merging their copies in both guarded and unguarded
3060 // branches. Those instructions that have no uses can be just removed.
3062 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3063 if (!isa<PHINode>(&*BI))
3064 ToRemove.push_back(&*BI);
3065
3066 Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
3067 assert(InsertionPoint && "Empty block?");
3068 // Substitute with Phis & remove.
3069 for (auto *Inst : reverse(ToRemove)) {
3070 if (!Inst->use_empty()) {
3071 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3072 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3073 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3074 NewPN->insertBefore(InsertionPoint);
3075 Inst->replaceAllUsesWith(NewPN);
3076 }
3077 Inst->eraseFromParent();
3078 }
3079 return true;
3080}
3081
3082PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3086
3087 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3088 // TODO: Would be nice to verify BPI/BFI consistency as well.
3089 return PA;
3090}
3091
3092template <typename AnalysisT>
3093typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3094 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3095
3096 // If there were no changes since last call to 'runExternalAnalysis' then all
3097 // analysis is either up to date or explicitly invalidated. Just go ahead and
3098 // run the "external" analysis.
3099 if (!ChangedSinceLastAnalysisUpdate) {
3100 assert(!DTU->hasPendingUpdates() &&
3101 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3102 // Run the "external" analysis.
3103 return &FAM->getResult<AnalysisT>(*F);
3104 }
3105 ChangedSinceLastAnalysisUpdate = false;
3106
3107 auto PA = getPreservedAnalysis();
3108 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3109 // as preserved.
3110 PA.preserve<BranchProbabilityAnalysis>();
3111 PA.preserve<BlockFrequencyAnalysis>();
3112 // Report everything except explicitly preserved as invalid.
3113 FAM->invalidate(*F, PA);
3114 // Update DT/PDT.
3115 DTU->flush();
3116 // Make sure DT/PDT are valid before running "external" analysis.
3117 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3118 assert((!DTU->hasPostDomTree() ||
3119 DTU->getPostDomTree().verify(
3121 // Run the "external" analysis.
3122 auto *Result = &FAM->getResult<AnalysisT>(*F);
3123 // Update analysis JumpThreading depends on and not explicitly preserved.
3124 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3125 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3126 AA = &FAM->getResult<AAManager>(*F);
3127
3128 return Result;
3129}
3130
3131BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3132 if (!BPI) {
3133 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3135 }
3136 return *BPI;
3137}
3138
3139BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3140 if (!BFI) {
3141 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3143 }
3144 return *BFI;
3145}
3146
3147// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3148// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3149// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3150BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3151 auto *Res = getBPI();
3152 if (Res)
3153 return Res;
3154
3155 if (Force)
3156 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3157
3158 return *BPI;
3159}
3160
3161BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3162 auto *Res = getBFI();
3163 if (Res)
3164 return Res;
3165
3166 if (Force)
3167 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3168
3169 return *BFI;
3170}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< bool > PrintLVIAfterJumpThreading("print-lvi-after-jump-threading", cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value * > &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:526
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
ppc ctr loops verify
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:337
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:393
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:516
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
const Instruction & front() const
Definition: BasicBlock.h:347
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:140
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:318
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:296
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:526
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:533
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:145
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:353
The address of a basic block.
Definition: Constants.h:874
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1762
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:701
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:801
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1957
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2560
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2357
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:203
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:176
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:197
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:847
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:41
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:708
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This represents the llvm.dbg.value instruction.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:747
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:645
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:79
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1610
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:90
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1596
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:195
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition: Instruction.cpp:99
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool isSpecialTerminator() const
Definition: Instruction.h:205
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
DenseMap< Instruction *, Value * > cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond)
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet< Value * > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap< Instruction *, Value * > &ValueMapping)
Update the SSA form.
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:31
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:64
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
An instruction for reading from memory.
Definition: Instructions.h:177
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:229
bool isUnordered() const
Definition: Instructions.h:258
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
static LocationSize precise(uint64_t Value)
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:950
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator begin()
Definition: MapVector.h:69
Representation for a specific memory location.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:254
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:188
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:200
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:93
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
'undef' values are things that do not have specified contents.
Definition: Constants.h:1368
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
See the file comment.
Definition: ValueMap.h:84
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1061
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:688
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
self_iterator getIterator()
Definition: ilist_node.h:82
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:988
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:982
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:126
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:2918
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:453
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:717
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:583
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1062
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:99
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2800
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:757
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1946
Value * simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2021
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1612
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
unsigned pred_size(const MachineBasicBlock *BB)
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1464