LLVM 19.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <memory>
77#include <utility>
78
79using namespace llvm;
80using namespace jumpthreading;
81
82#define DEBUG_TYPE "jump-threading"
83
84STATISTIC(NumThreads, "Number of jumps threaded");
85STATISTIC(NumFolds, "Number of terminators folded");
86STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
87
89BBDuplicateThreshold("jump-threading-threshold",
90 cl::desc("Max block size to duplicate for jump threading"),
92
95 "jump-threading-implication-search-threshold",
96 cl::desc("The number of predecessors to search for a stronger "
97 "condition to use to thread over a weaker condition"),
99
101 "jump-threading-phi-threshold",
102 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
103 cl::Hidden);
104
106 "jump-threading-across-loop-headers",
107 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
108 cl::init(false), cl::Hidden);
109
111 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
112}
113
114// Update branch probability information according to conditional
115// branch probability. This is usually made possible for cloned branches
116// in inline instances by the context specific profile in the caller.
117// For instance,
118//
119// [Block PredBB]
120// [Branch PredBr]
121// if (t) {
122// Block A;
123// } else {
124// Block B;
125// }
126//
127// [Block BB]
128// cond = PN([true, %A], [..., %B]); // PHI node
129// [Branch CondBr]
130// if (cond) {
131// ... // P(cond == true) = 1%
132// }
133//
134// Here we know that when block A is taken, cond must be true, which means
135// P(cond == true | A) = 1
136//
137// Given that P(cond == true) = P(cond == true | A) * P(A) +
138// P(cond == true | B) * P(B)
139// we get:
140// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
141//
142// which gives us:
143// P(A) is less than P(cond == true), i.e.
144// P(t == true) <= P(cond == true)
145//
146// In other words, if we know P(cond == true) is unlikely, we know
147// that P(t == true) is also unlikely.
148//
150 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
151 if (!CondBr)
152 return;
153
154 uint64_t TrueWeight, FalseWeight;
155 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
156 return;
157
158 if (TrueWeight + FalseWeight == 0)
159 // Zero branch_weights do not give a hint for getting branch probabilities.
160 // Technically it would result in division by zero denominator, which is
161 // TrueWeight + FalseWeight.
162 return;
163
164 // Returns the outgoing edge of the dominating predecessor block
165 // that leads to the PhiNode's incoming block:
166 auto GetPredOutEdge =
167 [](BasicBlock *IncomingBB,
168 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
169 auto *PredBB = IncomingBB;
170 auto *SuccBB = PhiBB;
172 while (true) {
173 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
174 if (PredBr && PredBr->isConditional())
175 return {PredBB, SuccBB};
176 Visited.insert(PredBB);
177 auto *SinglePredBB = PredBB->getSinglePredecessor();
178 if (!SinglePredBB)
179 return {nullptr, nullptr};
180
181 // Stop searching when SinglePredBB has been visited. It means we see
182 // an unreachable loop.
183 if (Visited.count(SinglePredBB))
184 return {nullptr, nullptr};
185
186 SuccBB = PredBB;
187 PredBB = SinglePredBB;
188 }
189 };
190
191 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
192 Value *PhiOpnd = PN->getIncomingValue(i);
193 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
194
195 if (!CI || !CI->getType()->isIntegerTy(1))
196 continue;
197
200 TrueWeight, TrueWeight + FalseWeight)
202 FalseWeight, TrueWeight + FalseWeight));
203
204 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
205 if (!PredOutEdge.first)
206 return;
207
208 BasicBlock *PredBB = PredOutEdge.first;
209 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
210 if (!PredBr)
211 return;
212
213 uint64_t PredTrueWeight, PredFalseWeight;
214 // FIXME: We currently only set the profile data when it is missing.
215 // With PGO, this can be used to refine even existing profile data with
216 // context information. This needs to be done after more performance
217 // testing.
218 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
219 continue;
220
221 // We can not infer anything useful when BP >= 50%, because BP is the
222 // upper bound probability value.
223 if (BP >= BranchProbability(50, 100))
224 continue;
225
226 uint32_t Weights[2];
227 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
228 Weights[0] = BP.getNumerator();
229 Weights[1] = BP.getCompl().getNumerator();
230 } else {
231 Weights[0] = BP.getCompl().getNumerator();
232 Weights[1] = BP.getNumerator();
233 }
234 setBranchWeights(*PredBr, Weights);
235 }
236}
237
240 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
241 // Jump Threading has no sense for the targets with divergent CF
243 return PreservedAnalyses::all();
244 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
245 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
246 auto &AA = AM.getResult<AAManager>(F);
247 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
248
249 bool Changed =
250 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
251 std::make_unique<DomTreeUpdater>(
253 std::nullopt, std::nullopt);
254
255 if (!Changed)
256 return PreservedAnalyses::all();
257
258
260
261#if defined(EXPENSIVE_CHECKS)
262 assert(getDomTreeUpdater()->getDomTree().verify(
263 DominatorTree::VerificationLevel::Full) &&
264 "DT broken after JumpThreading");
265 assert((!getDomTreeUpdater()->hasPostDomTree() ||
266 getDomTreeUpdater()->getPostDomTree().verify(
268 "PDT broken after JumpThreading");
269#else
270 assert(getDomTreeUpdater()->getDomTree().verify(
271 DominatorTree::VerificationLevel::Fast) &&
272 "DT broken after JumpThreading");
273 assert((!getDomTreeUpdater()->hasPostDomTree() ||
274 getDomTreeUpdater()->getPostDomTree().verify(
276 "PDT broken after JumpThreading");
277#endif
278
279 return getPreservedAnalysis();
280}
281
283 TargetLibraryInfo *TLI_,
285 AliasAnalysis *AA_,
286 std::unique_ptr<DomTreeUpdater> DTU_,
287 std::optional<BlockFrequencyInfo *> BFI_,
288 std::optional<BranchProbabilityInfo *> BPI_) {
289 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
290 F = &F_;
291 FAM = FAM_;
292 TLI = TLI_;
293 TTI = TTI_;
294 LVI = LVI_;
295 AA = AA_;
296 DTU = std::move(DTU_);
297 BFI = BFI_;
298 BPI = BPI_;
299 auto *GuardDecl = F->getParent()->getFunction(
300 Intrinsic::getName(Intrinsic::experimental_guard));
301 HasGuards = GuardDecl && !GuardDecl->use_empty();
302
303 // Reduce the number of instructions duplicated when optimizing strictly for
304 // size.
305 if (BBDuplicateThreshold.getNumOccurrences())
306 BBDupThreshold = BBDuplicateThreshold;
307 else if (F->hasFnAttribute(Attribute::MinSize))
308 BBDupThreshold = 3;
309 else
310 BBDupThreshold = DefaultBBDupThreshold;
311
312 // JumpThreading must not processes blocks unreachable from entry. It's a
313 // waste of compute time and can potentially lead to hangs.
315 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
316 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
317 DominatorTree &DT = DTU->getDomTree();
318 for (auto &BB : *F)
319 if (!DT.isReachableFromEntry(&BB))
320 Unreachable.insert(&BB);
321
324
325 bool EverChanged = false;
326 bool Changed;
327 do {
328 Changed = false;
329 for (auto &BB : *F) {
330 if (Unreachable.count(&BB))
331 continue;
332 while (processBlock(&BB)) // Thread all of the branches we can over BB.
333 Changed = ChangedSinceLastAnalysisUpdate = true;
334
335 // Jump threading may have introduced redundant debug values into BB
336 // which should be removed.
337 if (Changed)
339
340 // Stop processing BB if it's the entry or is now deleted. The following
341 // routines attempt to eliminate BB and locating a suitable replacement
342 // for the entry is non-trivial.
343 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
344 continue;
345
346 if (pred_empty(&BB)) {
347 // When processBlock makes BB unreachable it doesn't bother to fix up
348 // the instructions in it. We must remove BB to prevent invalid IR.
349 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
350 << "' with terminator: " << *BB.getTerminator()
351 << '\n');
352 LoopHeaders.erase(&BB);
353 LVI->eraseBlock(&BB);
354 DeleteDeadBlock(&BB, DTU.get());
355 Changed = ChangedSinceLastAnalysisUpdate = true;
356 continue;
357 }
358
359 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
360 // is "almost empty", we attempt to merge BB with its sole successor.
361 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
362 if (BI && BI->isUnconditional()) {
363 BasicBlock *Succ = BI->getSuccessor(0);
364 if (
365 // The terminator must be the only non-phi instruction in BB.
366 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
367 // Don't alter Loop headers and latches to ensure another pass can
368 // detect and transform nested loops later.
369 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
372 // BB is valid for cleanup here because we passed in DTU. F remains
373 // BB's parent until a DTU->getDomTree() event.
374 LVI->eraseBlock(&BB);
375 Changed = ChangedSinceLastAnalysisUpdate = true;
376 }
377 }
378 }
379 EverChanged |= Changed;
380 } while (Changed);
381
382 LoopHeaders.clear();
383 return EverChanged;
384}
385
386// Replace uses of Cond with ToVal when safe to do so. If all uses are
387// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
388// because we may incorrectly replace uses when guards/assumes are uses of
389// of `Cond` and we used the guards/assume to reason about the `Cond` value
390// at the end of block. RAUW unconditionally replaces all uses
391// including the guards/assumes themselves and the uses before the
392// guard/assume.
394 BasicBlock *KnownAtEndOfBB) {
395 bool Changed = false;
396 assert(Cond->getType() == ToVal->getType());
397 // We can unconditionally replace all uses in non-local blocks (i.e. uses
398 // strictly dominated by BB), since LVI information is true from the
399 // terminator of BB.
400 if (Cond->getParent() == KnownAtEndOfBB)
401 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
402 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
403 // Replace any debug-info record users of Cond with ToVal.
404 for (DPValue &DPV : DPValue::filter(I.getDbgValueRange()))
405 DPV.replaceVariableLocationOp(Cond, ToVal, true);
406
407 // Reached the Cond whose uses we are trying to replace, so there are no
408 // more uses.
409 if (&I == Cond)
410 break;
411 // We only replace uses in instructions that are guaranteed to reach the end
412 // of BB, where we know Cond is ToVal.
414 break;
415 Changed |= I.replaceUsesOfWith(Cond, ToVal);
416 }
417 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
418 Cond->eraseFromParent();
419 Changed = true;
420 }
421 return Changed;
422}
423
424/// Return the cost of duplicating a piece of this block from first non-phi
425/// and before StopAt instruction to thread across it. Stop scanning the block
426/// when exceeding the threshold. If duplication is impossible, returns ~0U.
428 BasicBlock *BB,
429 Instruction *StopAt,
430 unsigned Threshold) {
431 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
432
433 // Do not duplicate the BB if it has a lot of PHI nodes.
434 // If a threadable chain is too long then the number of PHI nodes can add up,
435 // leading to a substantial increase in compile time when rewriting the SSA.
436 unsigned PhiCount = 0;
437 Instruction *FirstNonPHI = nullptr;
438 for (Instruction &I : *BB) {
439 if (!isa<PHINode>(&I)) {
440 FirstNonPHI = &I;
441 break;
442 }
443 if (++PhiCount > PhiDuplicateThreshold)
444 return ~0U;
445 }
446
447 /// Ignore PHI nodes, these will be flattened when duplication happens.
448 BasicBlock::const_iterator I(FirstNonPHI);
449
450 // FIXME: THREADING will delete values that are just used to compute the
451 // branch, so they shouldn't count against the duplication cost.
452
453 unsigned Bonus = 0;
454 if (BB->getTerminator() == StopAt) {
455 // Threading through a switch statement is particularly profitable. If this
456 // block ends in a switch, decrease its cost to make it more likely to
457 // happen.
458 if (isa<SwitchInst>(StopAt))
459 Bonus = 6;
460
461 // The same holds for indirect branches, but slightly more so.
462 if (isa<IndirectBrInst>(StopAt))
463 Bonus = 8;
464 }
465
466 // Bump the threshold up so the early exit from the loop doesn't skip the
467 // terminator-based Size adjustment at the end.
468 Threshold += Bonus;
469
470 // Sum up the cost of each instruction until we get to the terminator. Don't
471 // include the terminator because the copy won't include it.
472 unsigned Size = 0;
473 for (; &*I != StopAt; ++I) {
474
475 // Stop scanning the block if we've reached the threshold.
476 if (Size > Threshold)
477 return Size;
478
479 // Bail out if this instruction gives back a token type, it is not possible
480 // to duplicate it if it is used outside this BB.
481 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
482 return ~0U;
483
484 // Blocks with NoDuplicate are modelled as having infinite cost, so they
485 // are never duplicated.
486 if (const CallInst *CI = dyn_cast<CallInst>(I))
487 if (CI->cannotDuplicate() || CI->isConvergent())
488 return ~0U;
489
492 continue;
493
494 // All other instructions count for at least one unit.
495 ++Size;
496
497 // Calls are more expensive. If they are non-intrinsic calls, we model them
498 // as having cost of 4. If they are a non-vector intrinsic, we model them
499 // as having cost of 2 total, and if they are a vector intrinsic, we model
500 // them as having cost 1.
501 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
502 if (!isa<IntrinsicInst>(CI))
503 Size += 3;
504 else if (!CI->getType()->isVectorTy())
505 Size += 1;
506 }
507 }
508
509 return Size > Bonus ? Size - Bonus : 0;
510}
511
512/// findLoopHeaders - We do not want jump threading to turn proper loop
513/// structures into irreducible loops. Doing this breaks up the loop nesting
514/// hierarchy and pessimizes later transformations. To prevent this from
515/// happening, we first have to find the loop headers. Here we approximate this
516/// by finding targets of backedges in the CFG.
517///
518/// Note that there definitely are cases when we want to allow threading of
519/// edges across a loop header. For example, threading a jump from outside the
520/// loop (the preheader) to an exit block of the loop is definitely profitable.
521/// It is also almost always profitable to thread backedges from within the loop
522/// to exit blocks, and is often profitable to thread backedges to other blocks
523/// within the loop (forming a nested loop). This simple analysis is not rich
524/// enough to track all of these properties and keep it up-to-date as the CFG
525/// mutates, so we don't allow any of these transformations.
528 FindFunctionBackedges(F, Edges);
529
530 for (const auto &Edge : Edges)
531 LoopHeaders.insert(Edge.second);
532}
533
534/// getKnownConstant - Helper method to determine if we can thread over a
535/// terminator with the given value as its condition, and if so what value to
536/// use for that. What kind of value this is depends on whether we want an
537/// integer or a block address, but an undef is always accepted.
538/// Returns null if Val is null or not an appropriate constant.
540 if (!Val)
541 return nullptr;
542
543 // Undef is "known" enough.
544 if (UndefValue *U = dyn_cast<UndefValue>(Val))
545 return U;
546
547 if (Preference == WantBlockAddress)
548 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
549
550 return dyn_cast<ConstantInt>(Val);
551}
552
553/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
554/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
555/// in any of our predecessors. If so, return the known list of value and pred
556/// BB in the result vector.
557///
558/// This returns true if there were any known values.
560 Value *V, BasicBlock *BB, PredValueInfo &Result,
561 ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
562 Instruction *CxtI) {
563 const DataLayout &DL = BB->getModule()->getDataLayout();
564
565 // This method walks up use-def chains recursively. Because of this, we could
566 // get into an infinite loop going around loops in the use-def chain. To
567 // prevent this, keep track of what (value, block) pairs we've already visited
568 // and terminate the search if we loop back to them
569 if (!RecursionSet.insert(V).second)
570 return false;
571
572 // If V is a constant, then it is known in all predecessors.
573 if (Constant *KC = getKnownConstant(V, Preference)) {
574 for (BasicBlock *Pred : predecessors(BB))
575 Result.emplace_back(KC, Pred);
576
577 return !Result.empty();
578 }
579
580 // If V is a non-instruction value, or an instruction in a different block,
581 // then it can't be derived from a PHI.
582 Instruction *I = dyn_cast<Instruction>(V);
583 if (!I || I->getParent() != BB) {
584
585 // Okay, if this is a live-in value, see if it has a known value at the any
586 // edge from our predecessors.
587 for (BasicBlock *P : predecessors(BB)) {
588 using namespace PatternMatch;
589 // If the value is known by LazyValueInfo to be a constant in a
590 // predecessor, use that information to try to thread this block.
591 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
592 // If I is a non-local compare-with-constant instruction, use more-rich
593 // 'getPredicateOnEdge' method. This would be able to handle value
594 // inequalities better, for example if the compare is "X < 4" and "X < 3"
595 // is known true but "X < 4" itself is not available.
597 Value *Val;
598 Constant *Cst;
599 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
600 auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
601 if (Res != LazyValueInfo::Unknown)
602 PredCst = ConstantInt::getBool(V->getContext(), Res);
603 }
604 if (Constant *KC = getKnownConstant(PredCst, Preference))
605 Result.emplace_back(KC, P);
606 }
607
608 return !Result.empty();
609 }
610
611 /// If I is a PHI node, then we know the incoming values for any constants.
612 if (PHINode *PN = dyn_cast<PHINode>(I)) {
613 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
614 Value *InVal = PN->getIncomingValue(i);
615 if (Constant *KC = getKnownConstant(InVal, Preference)) {
616 Result.emplace_back(KC, PN->getIncomingBlock(i));
617 } else {
618 Constant *CI = LVI->getConstantOnEdge(InVal,
619 PN->getIncomingBlock(i),
620 BB, CxtI);
621 if (Constant *KC = getKnownConstant(CI, Preference))
622 Result.emplace_back(KC, PN->getIncomingBlock(i));
623 }
624 }
625
626 return !Result.empty();
627 }
628
629 // Handle Cast instructions.
630 if (CastInst *CI = dyn_cast<CastInst>(I)) {
631 Value *Source = CI->getOperand(0);
632 PredValueInfoTy Vals;
633 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
634 RecursionSet, CxtI);
635 if (Vals.empty())
636 return false;
637
638 // Convert the known values.
639 for (auto &Val : Vals)
640 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
641 CI->getType(), DL))
642 Result.emplace_back(Folded, Val.second);
643
644 return !Result.empty();
645 }
646
647 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
648 Value *Source = FI->getOperand(0);
649 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
650 RecursionSet, CxtI);
651
652 erase_if(Result, [](auto &Pair) {
653 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
654 });
655
656 return !Result.empty();
657 }
658
659 // Handle some boolean conditions.
660 if (I->getType()->getPrimitiveSizeInBits() == 1) {
661 using namespace PatternMatch;
662 if (Preference != WantInteger)
663 return false;
664 // X | true -> true
665 // X & false -> false
666 Value *Op0, *Op1;
667 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
668 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
669 PredValueInfoTy LHSVals, RHSVals;
670
672 RecursionSet, CxtI);
674 RecursionSet, CxtI);
675
676 if (LHSVals.empty() && RHSVals.empty())
677 return false;
678
679 ConstantInt *InterestingVal;
680 if (match(I, m_LogicalOr()))
681 InterestingVal = ConstantInt::getTrue(I->getContext());
682 else
683 InterestingVal = ConstantInt::getFalse(I->getContext());
684
685 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
686
687 // Scan for the sentinel. If we find an undef, force it to the
688 // interesting value: x|undef -> true and x&undef -> false.
689 for (const auto &LHSVal : LHSVals)
690 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
691 Result.emplace_back(InterestingVal, LHSVal.second);
692 LHSKnownBBs.insert(LHSVal.second);
693 }
694 for (const auto &RHSVal : RHSVals)
695 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
696 // If we already inferred a value for this block on the LHS, don't
697 // re-add it.
698 if (!LHSKnownBBs.count(RHSVal.second))
699 Result.emplace_back(InterestingVal, RHSVal.second);
700 }
701
702 return !Result.empty();
703 }
704
705 // Handle the NOT form of XOR.
706 if (I->getOpcode() == Instruction::Xor &&
707 isa<ConstantInt>(I->getOperand(1)) &&
708 cast<ConstantInt>(I->getOperand(1))->isOne()) {
709 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
710 WantInteger, RecursionSet, CxtI);
711 if (Result.empty())
712 return false;
713
714 // Invert the known values.
715 for (auto &R : Result)
716 R.first = ConstantExpr::getNot(R.first);
717
718 return true;
719 }
720
721 // Try to simplify some other binary operator values.
722 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
723 if (Preference != WantInteger)
724 return false;
725 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
726 PredValueInfoTy LHSVals;
727 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
728 WantInteger, RecursionSet, CxtI);
729
730 // Try to use constant folding to simplify the binary operator.
731 for (const auto &LHSVal : LHSVals) {
732 Constant *V = LHSVal.first;
733 Constant *Folded =
734 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
735
736 if (Constant *KC = getKnownConstant(Folded, WantInteger))
737 Result.emplace_back(KC, LHSVal.second);
738 }
739 }
740
741 return !Result.empty();
742 }
743
744 // Handle compare with phi operand, where the PHI is defined in this block.
745 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
746 if (Preference != WantInteger)
747 return false;
748 Type *CmpType = Cmp->getType();
749 Value *CmpLHS = Cmp->getOperand(0);
750 Value *CmpRHS = Cmp->getOperand(1);
751 CmpInst::Predicate Pred = Cmp->getPredicate();
752
753 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
754 if (!PN)
755 PN = dyn_cast<PHINode>(CmpRHS);
756 // Do not perform phi translation across a loop header phi, because this
757 // may result in comparison of values from two different loop iterations.
758 // FIXME: This check is broken if LoopHeaders is not populated.
759 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
760 const DataLayout &DL = PN->getModule()->getDataLayout();
761 // We can do this simplification if any comparisons fold to true or false.
762 // See if any do.
763 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
764 BasicBlock *PredBB = PN->getIncomingBlock(i);
765 Value *LHS, *RHS;
766 if (PN == CmpLHS) {
767 LHS = PN->getIncomingValue(i);
768 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
769 } else {
770 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
771 RHS = PN->getIncomingValue(i);
772 }
773 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
774 if (!Res) {
775 if (!isa<Constant>(RHS))
776 continue;
777
778 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
779 auto LHSInst = dyn_cast<Instruction>(LHS);
780 if (LHSInst && LHSInst->getParent() == BB)
781 continue;
782
784 ResT = LVI->getPredicateOnEdge(Pred, LHS,
785 cast<Constant>(RHS), PredBB, BB,
786 CxtI ? CxtI : Cmp);
787 if (ResT == LazyValueInfo::Unknown)
788 continue;
789 Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
790 }
791
792 if (Constant *KC = getKnownConstant(Res, WantInteger))
793 Result.emplace_back(KC, PredBB);
794 }
795
796 return !Result.empty();
797 }
798
799 // If comparing a live-in value against a constant, see if we know the
800 // live-in value on any predecessors.
801 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
802 Constant *CmpConst = cast<Constant>(CmpRHS);
803
804 if (!isa<Instruction>(CmpLHS) ||
805 cast<Instruction>(CmpLHS)->getParent() != BB) {
806 for (BasicBlock *P : predecessors(BB)) {
807 // If the value is known by LazyValueInfo to be a constant in a
808 // predecessor, use that information to try to thread this block.
810 LVI->getPredicateOnEdge(Pred, CmpLHS,
811 CmpConst, P, BB, CxtI ? CxtI : Cmp);
812 if (Res == LazyValueInfo::Unknown)
813 continue;
814
815 Constant *ResC = ConstantInt::get(CmpType, Res);
816 Result.emplace_back(ResC, P);
817 }
818
819 return !Result.empty();
820 }
821
822 // InstCombine can fold some forms of constant range checks into
823 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
824 // x as a live-in.
825 {
826 using namespace PatternMatch;
827
828 Value *AddLHS;
829 ConstantInt *AddConst;
830 if (isa<ConstantInt>(CmpConst) &&
831 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
832 if (!isa<Instruction>(AddLHS) ||
833 cast<Instruction>(AddLHS)->getParent() != BB) {
834 for (BasicBlock *P : predecessors(BB)) {
835 // If the value is known by LazyValueInfo to be a ConstantRange in
836 // a predecessor, use that information to try to thread this
837 // block.
839 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
840 // Propagate the range through the addition.
841 CR = CR.add(AddConst->getValue());
842
843 // Get the range where the compare returns true.
845 Pred, cast<ConstantInt>(CmpConst)->getValue());
846
847 Constant *ResC;
848 if (CmpRange.contains(CR))
849 ResC = ConstantInt::getTrue(CmpType);
850 else if (CmpRange.inverse().contains(CR))
851 ResC = ConstantInt::getFalse(CmpType);
852 else
853 continue;
854
855 Result.emplace_back(ResC, P);
856 }
857
858 return !Result.empty();
859 }
860 }
861 }
862
863 // Try to find a constant value for the LHS of a comparison,
864 // and evaluate it statically if we can.
865 PredValueInfoTy LHSVals;
866 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
867 WantInteger, RecursionSet, CxtI);
868
869 for (const auto &LHSVal : LHSVals) {
870 Constant *V = LHSVal.first;
871 Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
872 if (Constant *KC = getKnownConstant(Folded, WantInteger))
873 Result.emplace_back(KC, LHSVal.second);
874 }
875
876 return !Result.empty();
877 }
878 }
879
880 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
881 // Handle select instructions where at least one operand is a known constant
882 // and we can figure out the condition value for any predecessor block.
883 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
884 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
885 PredValueInfoTy Conds;
886 if ((TrueVal || FalseVal) &&
887 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
888 WantInteger, RecursionSet, CxtI)) {
889 for (auto &C : Conds) {
890 Constant *Cond = C.first;
891
892 // Figure out what value to use for the condition.
893 bool KnownCond;
894 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
895 // A known boolean.
896 KnownCond = CI->isOne();
897 } else {
898 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
899 // Either operand will do, so be sure to pick the one that's a known
900 // constant.
901 // FIXME: Do this more cleverly if both values are known constants?
902 KnownCond = (TrueVal != nullptr);
903 }
904
905 // See if the select has a known constant value for this predecessor.
906 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
907 Result.emplace_back(Val, C.second);
908 }
909
910 return !Result.empty();
911 }
912 }
913
914 // If all else fails, see if LVI can figure out a constant value for us.
915 assert(CxtI->getParent() == BB && "CxtI should be in BB");
916 Constant *CI = LVI->getConstant(V, CxtI);
917 if (Constant *KC = getKnownConstant(CI, Preference)) {
918 for (BasicBlock *Pred : predecessors(BB))
919 Result.emplace_back(KC, Pred);
920 }
921
922 return !Result.empty();
923}
924
925/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
926/// in an undefined jump, decide which block is best to revector to.
927///
928/// Since we can pick an arbitrary destination, we pick the successor with the
929/// fewest predecessors. This should reduce the in-degree of the others.
931 Instruction *BBTerm = BB->getTerminator();
932 unsigned MinSucc = 0;
933 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
934 // Compute the successor with the minimum number of predecessors.
935 unsigned MinNumPreds = pred_size(TestBB);
936 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
937 TestBB = BBTerm->getSuccessor(i);
938 unsigned NumPreds = pred_size(TestBB);
939 if (NumPreds < MinNumPreds) {
940 MinSucc = i;
941 MinNumPreds = NumPreds;
942 }
943 }
944
945 return MinSucc;
946}
947
949 if (!BB->hasAddressTaken()) return false;
950
951 // If the block has its address taken, it may be a tree of dead constants
952 // hanging off of it. These shouldn't keep the block alive.
955 return !BA->use_empty();
956}
957
958/// processBlock - If there are any predecessors whose control can be threaded
959/// through to a successor, transform them now.
961 // If the block is trivially dead, just return and let the caller nuke it.
962 // This simplifies other transformations.
963 if (DTU->isBBPendingDeletion(BB) ||
964 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
965 return false;
966
967 // If this block has a single predecessor, and if that pred has a single
968 // successor, merge the blocks. This encourages recursive jump threading
969 // because now the condition in this block can be threaded through
970 // predecessors of our predecessor block.
972 return true;
973
975 return true;
976
977 // Look if we can propagate guards to predecessors.
978 if (HasGuards && processGuards(BB))
979 return true;
980
981 // What kind of constant we're looking for.
982 ConstantPreference Preference = WantInteger;
983
984 // Look to see if the terminator is a conditional branch, switch or indirect
985 // branch, if not we can't thread it.
986 Value *Condition;
987 Instruction *Terminator = BB->getTerminator();
988 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
989 // Can't thread an unconditional jump.
990 if (BI->isUnconditional()) return false;
991 Condition = BI->getCondition();
992 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
993 Condition = SI->getCondition();
994 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
995 // Can't thread indirect branch with no successors.
996 if (IB->getNumSuccessors() == 0) return false;
997 Condition = IB->getAddress()->stripPointerCasts();
998 Preference = WantBlockAddress;
999 } else {
1000 return false; // Must be an invoke or callbr.
1001 }
1002
1003 // Keep track if we constant folded the condition in this invocation.
1004 bool ConstantFolded = false;
1005
1006 // Run constant folding to see if we can reduce the condition to a simple
1007 // constant.
1008 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
1009 Value *SimpleVal =
1011 if (SimpleVal) {
1012 I->replaceAllUsesWith(SimpleVal);
1013 if (isInstructionTriviallyDead(I, TLI))
1014 I->eraseFromParent();
1015 Condition = SimpleVal;
1016 ConstantFolded = true;
1017 }
1018 }
1019
1020 // If the terminator is branching on an undef or freeze undef, we can pick any
1021 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1022 auto *FI = dyn_cast<FreezeInst>(Condition);
1023 if (isa<UndefValue>(Condition) ||
1024 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1025 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1026 std::vector<DominatorTree::UpdateType> Updates;
1027
1028 // Fold the branch/switch.
1029 Instruction *BBTerm = BB->getTerminator();
1030 Updates.reserve(BBTerm->getNumSuccessors());
1031 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1032 if (i == BestSucc) continue;
1033 BasicBlock *Succ = BBTerm->getSuccessor(i);
1034 Succ->removePredecessor(BB, true);
1035 Updates.push_back({DominatorTree::Delete, BB, Succ});
1036 }
1037
1038 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1039 << "' folding undef terminator: " << *BBTerm << '\n');
1040 BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
1041 ++NumFolds;
1042 BBTerm->eraseFromParent();
1043 DTU->applyUpdatesPermissive(Updates);
1044 if (FI)
1045 FI->eraseFromParent();
1046 return true;
1047 }
1048
1049 // If the terminator of this block is branching on a constant, simplify the
1050 // terminator to an unconditional branch. This can occur due to threading in
1051 // other blocks.
1052 if (getKnownConstant(Condition, Preference)) {
1053 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1054 << "' folding terminator: " << *BB->getTerminator()
1055 << '\n');
1056 ++NumFolds;
1057 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1058 if (auto *BPI = getBPI())
1059 BPI->eraseBlock(BB);
1060 return true;
1061 }
1062
1063 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1064
1065 // All the rest of our checks depend on the condition being an instruction.
1066 if (!CondInst) {
1067 // FIXME: Unify this with code below.
1068 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1069 return true;
1070 return ConstantFolded;
1071 }
1072
1073 // Some of the following optimization can safely work on the unfrozen cond.
1074 Value *CondWithoutFreeze = CondInst;
1075 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1076 CondWithoutFreeze = FI->getOperand(0);
1077
1078 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1079 // If we're branching on a conditional, LVI might be able to determine
1080 // it's value at the branch instruction. We only handle comparisons
1081 // against a constant at this time.
1082 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1084 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1085 CondConst, BB->getTerminator(),
1086 /*UseBlockValue=*/false);
1087 if (Ret != LazyValueInfo::Unknown) {
1088 // We can safely replace *some* uses of the CondInst if it has
1089 // exactly one value as returned by LVI. RAUW is incorrect in the
1090 // presence of guards and assumes, that have the `Cond` as the use. This
1091 // is because we use the guards/assume to reason about the `Cond` value
1092 // at the end of block, but RAUW unconditionally replaces all uses
1093 // including the guards/assumes themselves and the uses before the
1094 // guard/assume.
1095 auto *CI = Ret == LazyValueInfo::True ?
1096 ConstantInt::getTrue(CondCmp->getType()) :
1097 ConstantInt::getFalse(CondCmp->getType());
1098 if (replaceFoldableUses(CondCmp, CI, BB))
1099 return true;
1100 }
1101
1102 // We did not manage to simplify this branch, try to see whether
1103 // CondCmp depends on a known phi-select pattern.
1104 if (tryToUnfoldSelect(CondCmp, BB))
1105 return true;
1106 }
1107 }
1108
1109 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1110 if (tryToUnfoldSelect(SI, BB))
1111 return true;
1112
1113 // Check for some cases that are worth simplifying. Right now we want to look
1114 // for loads that are used by a switch or by the condition for the branch. If
1115 // we see one, check to see if it's partially redundant. If so, insert a PHI
1116 // which can then be used to thread the values.
1117 Value *SimplifyValue = CondWithoutFreeze;
1118
1119 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1120 if (isa<Constant>(CondCmp->getOperand(1)))
1121 SimplifyValue = CondCmp->getOperand(0);
1122
1123 // TODO: There are other places where load PRE would be profitable, such as
1124 // more complex comparisons.
1125 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1127 return true;
1128
1129 // Before threading, try to propagate profile data backwards:
1130 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1131 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1133
1134 // Handle a variety of cases where we are branching on something derived from
1135 // a PHI node in the current block. If we can prove that any predecessors
1136 // compute a predictable value based on a PHI node, thread those predecessors.
1137 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1138 return true;
1139
1140 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1141 // the current block, see if we can simplify.
1142 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1143 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1144 return processBranchOnPHI(PN);
1145
1146 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1147 if (CondInst->getOpcode() == Instruction::Xor &&
1148 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1149 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1150
1151 // Search for a stronger dominating condition that can be used to simplify a
1152 // conditional branch leaving BB.
1154 return true;
1155
1156 return false;
1157}
1158
1160 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1161 if (!BI || !BI->isConditional())
1162 return false;
1163
1164 Value *Cond = BI->getCondition();
1165 // Assuming that predecessor's branch was taken, if pred's branch condition
1166 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1167 // freeze(Cond) is either true or a nondeterministic value.
1168 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1169 // without affecting other instructions.
1170 auto *FICond = dyn_cast<FreezeInst>(Cond);
1171 if (FICond && FICond->hasOneUse())
1172 Cond = FICond->getOperand(0);
1173 else
1174 FICond = nullptr;
1175
1176 BasicBlock *CurrentBB = BB;
1177 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1178 unsigned Iter = 0;
1179
1180 auto &DL = BB->getModule()->getDataLayout();
1181
1182 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1183 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1184 if (!PBI || !PBI->isConditional())
1185 return false;
1186 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1187 return false;
1188
1189 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1190 std::optional<bool> Implication =
1191 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1192
1193 // If the branch condition of BB (which is Cond) and CurrentPred are
1194 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1195 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1196 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1197 FICond->getOperand(0))
1198 Implication = CondIsTrue;
1199 }
1200
1201 if (Implication) {
1202 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1203 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1204 RemoveSucc->removePredecessor(BB);
1205 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
1206 UncondBI->setDebugLoc(BI->getDebugLoc());
1207 ++NumFolds;
1208 BI->eraseFromParent();
1209 if (FICond)
1210 FICond->eraseFromParent();
1211
1212 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1213 if (auto *BPI = getBPI())
1214 BPI->eraseBlock(BB);
1215 return true;
1216 }
1217 CurrentBB = CurrentPred;
1218 CurrentPred = CurrentBB->getSinglePredecessor();
1219 }
1220
1221 return false;
1222}
1223
1224/// Return true if Op is an instruction defined in the given block.
1226 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1227 if (OpInst->getParent() == BB)
1228 return true;
1229 return false;
1230}
1231
1232/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1233/// redundant load instruction, eliminate it by replacing it with a PHI node.
1234/// This is an important optimization that encourages jump threading, and needs
1235/// to be run interlaced with other jump threading tasks.
1237 // Don't hack volatile and ordered loads.
1238 if (!LoadI->isUnordered()) return false;
1239
1240 // If the load is defined in a block with exactly one predecessor, it can't be
1241 // partially redundant.
1242 BasicBlock *LoadBB = LoadI->getParent();
1243 if (LoadBB->getSinglePredecessor())
1244 return false;
1245
1246 // If the load is defined in an EH pad, it can't be partially redundant,
1247 // because the edges between the invoke and the EH pad cannot have other
1248 // instructions between them.
1249 if (LoadBB->isEHPad())
1250 return false;
1251
1252 Value *LoadedPtr = LoadI->getOperand(0);
1253
1254 // If the loaded operand is defined in the LoadBB and its not a phi,
1255 // it can't be available in predecessors.
1256 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1257 return false;
1258
1259 // Scan a few instructions up from the load, to see if it is obviously live at
1260 // the entry to its block.
1261 BasicBlock::iterator BBIt(LoadI);
1262 bool IsLoadCSE;
1263 BatchAAResults BatchAA(*AA);
1264 // The dominator tree is updated lazily and may not be valid at this point.
1265 BatchAA.disableDominatorTree();
1266 if (Value *AvailableVal = FindAvailableLoadedValue(
1267 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1268 // If the value of the load is locally available within the block, just use
1269 // it. This frequently occurs for reg2mem'd allocas.
1270
1271 if (IsLoadCSE) {
1272 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1273 combineMetadataForCSE(NLoadI, LoadI, false);
1274 LVI->forgetValue(NLoadI);
1275 };
1276
1277 // If the returned value is the load itself, replace with poison. This can
1278 // only happen in dead loops.
1279 if (AvailableVal == LoadI)
1280 AvailableVal = PoisonValue::get(LoadI->getType());
1281 if (AvailableVal->getType() != LoadI->getType())
1282 AvailableVal = CastInst::CreateBitOrPointerCast(
1283 AvailableVal, LoadI->getType(), "", LoadI);
1284 LoadI->replaceAllUsesWith(AvailableVal);
1285 LoadI->eraseFromParent();
1286 return true;
1287 }
1288
1289 // Otherwise, if we scanned the whole block and got to the top of the block,
1290 // we know the block is locally transparent to the load. If not, something
1291 // might clobber its value.
1292 if (BBIt != LoadBB->begin())
1293 return false;
1294
1295 // If all of the loads and stores that feed the value have the same AA tags,
1296 // then we can propagate them onto any newly inserted loads.
1297 AAMDNodes AATags = LoadI->getAAMetadata();
1298
1299 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1300
1301 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1302
1303 AvailablePredsTy AvailablePreds;
1304 BasicBlock *OneUnavailablePred = nullptr;
1306
1307 // If we got here, the loaded value is transparent through to the start of the
1308 // block. Check to see if it is available in any of the predecessor blocks.
1309 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1310 // If we already scanned this predecessor, skip it.
1311 if (!PredsScanned.insert(PredBB).second)
1312 continue;
1313
1314 BBIt = PredBB->end();
1315 unsigned NumScanedInst = 0;
1316 Value *PredAvailable = nullptr;
1317 // NOTE: We don't CSE load that is volatile or anything stronger than
1318 // unordered, that should have been checked when we entered the function.
1319 assert(LoadI->isUnordered() &&
1320 "Attempting to CSE volatile or atomic loads");
1321 // If this is a load on a phi pointer, phi-translate it and search
1322 // for available load/store to the pointer in predecessors.
1323 Type *AccessTy = LoadI->getType();
1324 const auto &DL = LoadI->getModule()->getDataLayout();
1325 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1326 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1327 AATags);
1328 PredAvailable = findAvailablePtrLoadStore(
1329 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1330 &BatchAA, &IsLoadCSE, &NumScanedInst);
1331
1332 // If PredBB has a single predecessor, continue scanning through the
1333 // single predecessor.
1334 BasicBlock *SinglePredBB = PredBB;
1335 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1336 NumScanedInst < DefMaxInstsToScan) {
1337 SinglePredBB = SinglePredBB->getSinglePredecessor();
1338 if (SinglePredBB) {
1339 BBIt = SinglePredBB->end();
1340 PredAvailable = findAvailablePtrLoadStore(
1341 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1342 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1343 &NumScanedInst);
1344 }
1345 }
1346
1347 if (!PredAvailable) {
1348 OneUnavailablePred = PredBB;
1349 continue;
1350 }
1351
1352 if (IsLoadCSE)
1353 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1354
1355 // If so, this load is partially redundant. Remember this info so that we
1356 // can create a PHI node.
1357 AvailablePreds.emplace_back(PredBB, PredAvailable);
1358 }
1359
1360 // If the loaded value isn't available in any predecessor, it isn't partially
1361 // redundant.
1362 if (AvailablePreds.empty()) return false;
1363
1364 // Okay, the loaded value is available in at least one (and maybe all!)
1365 // predecessors. If the value is unavailable in more than one unique
1366 // predecessor, we want to insert a merge block for those common predecessors.
1367 // This ensures that we only have to insert one reload, thus not increasing
1368 // code size.
1369 BasicBlock *UnavailablePred = nullptr;
1370
1371 // If the value is unavailable in one of predecessors, we will end up
1372 // inserting a new instruction into them. It is only valid if all the
1373 // instructions before LoadI are guaranteed to pass execution to its
1374 // successor, or if LoadI is safe to speculate.
1375 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1376 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1377 // It requires domination tree analysis, so for this simple case it is an
1378 // overkill.
1379 if (PredsScanned.size() != AvailablePreds.size() &&
1381 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1383 return false;
1384
1385 // If there is exactly one predecessor where the value is unavailable, the
1386 // already computed 'OneUnavailablePred' block is it. If it ends in an
1387 // unconditional branch, we know that it isn't a critical edge.
1388 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1389 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1390 UnavailablePred = OneUnavailablePred;
1391 } else if (PredsScanned.size() != AvailablePreds.size()) {
1392 // Otherwise, we had multiple unavailable predecessors or we had a critical
1393 // edge from the one.
1394 SmallVector<BasicBlock*, 8> PredsToSplit;
1395 SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
1396
1397 for (const auto &AvailablePred : AvailablePreds)
1398 AvailablePredSet.insert(AvailablePred.first);
1399
1400 // Add all the unavailable predecessors to the PredsToSplit list.
1401 for (BasicBlock *P : predecessors(LoadBB)) {
1402 // If the predecessor is an indirect goto, we can't split the edge.
1403 if (isa<IndirectBrInst>(P->getTerminator()))
1404 return false;
1405
1406 if (!AvailablePredSet.count(P))
1407 PredsToSplit.push_back(P);
1408 }
1409
1410 // Split them out to their own block.
1411 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1412 }
1413
1414 // If the value isn't available in all predecessors, then there will be
1415 // exactly one where it isn't available. Insert a load on that edge and add
1416 // it to the AvailablePreds list.
1417 if (UnavailablePred) {
1418 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1419 "Can't handle critical edge here!");
1420 LoadInst *NewVal = new LoadInst(
1421 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1422 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1423 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1424 UnavailablePred->getTerminator());
1425 NewVal->setDebugLoc(LoadI->getDebugLoc());
1426 if (AATags)
1427 NewVal->setAAMetadata(AATags);
1428
1429 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1430 }
1431
1432 // Now we know that each predecessor of this block has a value in
1433 // AvailablePreds, sort them for efficient access as we're walking the preds.
1434 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1435
1436 // Create a PHI node at the start of the block for the PRE'd load value.
1437 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1438 PN->insertBefore(LoadBB->begin());
1439 PN->takeName(LoadI);
1440 PN->setDebugLoc(LoadI->getDebugLoc());
1441
1442 // Insert new entries into the PHI for each predecessor. A single block may
1443 // have multiple entries here.
1444 for (BasicBlock *P : predecessors(LoadBB)) {
1445 AvailablePredsTy::iterator I =
1446 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1447
1448 assert(I != AvailablePreds.end() && I->first == P &&
1449 "Didn't find entry for predecessor!");
1450
1451 // If we have an available predecessor but it requires casting, insert the
1452 // cast in the predecessor and use the cast. Note that we have to update the
1453 // AvailablePreds vector as we go so that all of the PHI entries for this
1454 // predecessor use the same bitcast.
1455 Value *&PredV = I->second;
1456 if (PredV->getType() != LoadI->getType())
1457 PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
1458 P->getTerminator());
1459
1460 PN->addIncoming(PredV, I->first);
1461 }
1462
1463 for (LoadInst *PredLoadI : CSELoads) {
1464 combineMetadataForCSE(PredLoadI, LoadI, true);
1465 LVI->forgetValue(PredLoadI);
1466 }
1467
1468 LoadI->replaceAllUsesWith(PN);
1469 LoadI->eraseFromParent();
1470
1471 return true;
1472}
1473
1474/// findMostPopularDest - The specified list contains multiple possible
1475/// threadable destinations. Pick the one that occurs the most frequently in
1476/// the list.
1477static BasicBlock *
1479 const SmallVectorImpl<std::pair<BasicBlock *,
1480 BasicBlock *>> &PredToDestList) {
1481 assert(!PredToDestList.empty());
1482
1483 // Determine popularity. If there are multiple possible destinations, we
1484 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1485 // blocks with known and real destinations to threading undef. We'll handle
1486 // them later if interesting.
1487 MapVector<BasicBlock *, unsigned> DestPopularity;
1488
1489 // Populate DestPopularity with the successors in the order they appear in the
1490 // successor list. This way, we ensure determinism by iterating it in the
1491 // same order in std::max_element below. We map nullptr to 0 so that we can
1492 // return nullptr when PredToDestList contains nullptr only.
1493 DestPopularity[nullptr] = 0;
1494 for (auto *SuccBB : successors(BB))
1495 DestPopularity[SuccBB] = 0;
1496
1497 for (const auto &PredToDest : PredToDestList)
1498 if (PredToDest.second)
1499 DestPopularity[PredToDest.second]++;
1500
1501 // Find the most popular dest.
1502 auto MostPopular = std::max_element(
1503 DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
1504
1505 // Okay, we have finally picked the most popular destination.
1506 return MostPopular->first;
1507}
1508
1509// Try to evaluate the value of V when the control flows from PredPredBB to
1510// BB->getSinglePredecessor() and then on to BB.
1512 BasicBlock *PredPredBB,
1513 Value *V) {
1514 BasicBlock *PredBB = BB->getSinglePredecessor();
1515 assert(PredBB && "Expected a single predecessor");
1516
1517 if (Constant *Cst = dyn_cast<Constant>(V)) {
1518 return Cst;
1519 }
1520
1521 // Consult LVI if V is not an instruction in BB or PredBB.
1522 Instruction *I = dyn_cast<Instruction>(V);
1523 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1524 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1525 }
1526
1527 // Look into a PHI argument.
1528 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1529 if (PHI->getParent() == PredBB)
1530 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1531 return nullptr;
1532 }
1533
1534 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1535 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1536 if (CondCmp->getParent() == BB) {
1537 Constant *Op0 =
1538 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1539 Constant *Op1 =
1540 evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1541 if (Op0 && Op1) {
1542 return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1543 }
1544 }
1545 return nullptr;
1546 }
1547
1548 return nullptr;
1549}
1550
1552 ConstantPreference Preference,
1553 Instruction *CxtI) {
1554 // If threading this would thread across a loop header, don't even try to
1555 // thread the edge.
1556 if (LoopHeaders.count(BB))
1557 return false;
1558
1559 PredValueInfoTy PredValues;
1560 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1561 CxtI)) {
1562 // We don't have known values in predecessors. See if we can thread through
1563 // BB and its sole predecessor.
1565 }
1566
1567 assert(!PredValues.empty() &&
1568 "computeValueKnownInPredecessors returned true with no values");
1569
1570 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1571 for (const auto &PredValue : PredValues) {
1572 dbgs() << " BB '" << BB->getName()
1573 << "': FOUND condition = " << *PredValue.first
1574 << " for pred '" << PredValue.second->getName() << "'.\n";
1575 });
1576
1577 // Decide what we want to thread through. Convert our list of known values to
1578 // a list of known destinations for each pred. This also discards duplicate
1579 // predecessors and keeps track of the undefined inputs (which are represented
1580 // as a null dest in the PredToDestList).
1583
1584 BasicBlock *OnlyDest = nullptr;
1585 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1586 Constant *OnlyVal = nullptr;
1587 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1588
1589 for (const auto &PredValue : PredValues) {
1590 BasicBlock *Pred = PredValue.second;
1591 if (!SeenPreds.insert(Pred).second)
1592 continue; // Duplicate predecessor entry.
1593
1594 Constant *Val = PredValue.first;
1595
1596 BasicBlock *DestBB;
1597 if (isa<UndefValue>(Val))
1598 DestBB = nullptr;
1599 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1600 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1601 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1602 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1603 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1604 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1605 } else {
1606 assert(isa<IndirectBrInst>(BB->getTerminator())
1607 && "Unexpected terminator");
1608 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1609 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1610 }
1611
1612 // If we have exactly one destination, remember it for efficiency below.
1613 if (PredToDestList.empty()) {
1614 OnlyDest = DestBB;
1615 OnlyVal = Val;
1616 } else {
1617 if (OnlyDest != DestBB)
1618 OnlyDest = MultipleDestSentinel;
1619 // It possible we have same destination, but different value, e.g. default
1620 // case in switchinst.
1621 if (Val != OnlyVal)
1622 OnlyVal = MultipleVal;
1623 }
1624
1625 // If the predecessor ends with an indirect goto, we can't change its
1626 // destination.
1627 if (isa<IndirectBrInst>(Pred->getTerminator()))
1628 continue;
1629
1630 PredToDestList.emplace_back(Pred, DestBB);
1631 }
1632
1633 // If all edges were unthreadable, we fail.
1634 if (PredToDestList.empty())
1635 return false;
1636
1637 // If all the predecessors go to a single known successor, we want to fold,
1638 // not thread. By doing so, we do not need to duplicate the current block and
1639 // also miss potential opportunities in case we dont/cant duplicate.
1640 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1641 if (BB->hasNPredecessors(PredToDestList.size())) {
1642 bool SeenFirstBranchToOnlyDest = false;
1643 std::vector <DominatorTree::UpdateType> Updates;
1644 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1645 for (BasicBlock *SuccBB : successors(BB)) {
1646 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1647 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1648 } else {
1649 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1650 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1651 }
1652 }
1653
1654 // Finally update the terminator.
1655 Instruction *Term = BB->getTerminator();
1656 BranchInst::Create(OnlyDest, Term);
1657 ++NumFolds;
1658 Term->eraseFromParent();
1659 DTU->applyUpdatesPermissive(Updates);
1660 if (auto *BPI = getBPI())
1661 BPI->eraseBlock(BB);
1662
1663 // If the condition is now dead due to the removal of the old terminator,
1664 // erase it.
1665 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1666 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1667 CondInst->eraseFromParent();
1668 // We can safely replace *some* uses of the CondInst if it has
1669 // exactly one value as returned by LVI. RAUW is incorrect in the
1670 // presence of guards and assumes, that have the `Cond` as the use. This
1671 // is because we use the guards/assume to reason about the `Cond` value
1672 // at the end of block, but RAUW unconditionally replaces all uses
1673 // including the guards/assumes themselves and the uses before the
1674 // guard/assume.
1675 else if (OnlyVal && OnlyVal != MultipleVal)
1676 replaceFoldableUses(CondInst, OnlyVal, BB);
1677 }
1678 return true;
1679 }
1680 }
1681
1682 // Determine which is the most common successor. If we have many inputs and
1683 // this block is a switch, we want to start by threading the batch that goes
1684 // to the most popular destination first. If we only know about one
1685 // threadable destination (the common case) we can avoid this.
1686 BasicBlock *MostPopularDest = OnlyDest;
1687
1688 if (MostPopularDest == MultipleDestSentinel) {
1689 // Remove any loop headers from the Dest list, threadEdge conservatively
1690 // won't process them, but we might have other destination that are eligible
1691 // and we still want to process.
1692 erase_if(PredToDestList,
1693 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1694 return LoopHeaders.contains(PredToDest.second);
1695 });
1696
1697 if (PredToDestList.empty())
1698 return false;
1699
1700 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1701 }
1702
1703 // Now that we know what the most popular destination is, factor all
1704 // predecessors that will jump to it into a single predecessor.
1705 SmallVector<BasicBlock*, 16> PredsToFactor;
1706 for (const auto &PredToDest : PredToDestList)
1707 if (PredToDest.second == MostPopularDest) {
1708 BasicBlock *Pred = PredToDest.first;
1709
1710 // This predecessor may be a switch or something else that has multiple
1711 // edges to the block. Factor each of these edges by listing them
1712 // according to # occurrences in PredsToFactor.
1713 for (BasicBlock *Succ : successors(Pred))
1714 if (Succ == BB)
1715 PredsToFactor.push_back(Pred);
1716 }
1717
1718 // If the threadable edges are branching on an undefined value, we get to pick
1719 // the destination that these predecessors should get to.
1720 if (!MostPopularDest)
1721 MostPopularDest = BB->getTerminator()->
1722 getSuccessor(getBestDestForJumpOnUndef(BB));
1723
1724 // Ok, try to thread it!
1725 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1726}
1727
1728/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1729/// a PHI node (or freeze PHI) in the current block. See if there are any
1730/// simplifications we can do based on inputs to the phi node.
1732 BasicBlock *BB = PN->getParent();
1733
1734 // TODO: We could make use of this to do it once for blocks with common PHI
1735 // values.
1737 PredBBs.resize(1);
1738
1739 // If any of the predecessor blocks end in an unconditional branch, we can
1740 // *duplicate* the conditional branch into that block in order to further
1741 // encourage jump threading and to eliminate cases where we have branch on a
1742 // phi of an icmp (branch on icmp is much better).
1743 // This is still beneficial when a frozen phi is used as the branch condition
1744 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1745 // to br(icmp(freeze ...)).
1746 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1747 BasicBlock *PredBB = PN->getIncomingBlock(i);
1748 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1749 if (PredBr->isUnconditional()) {
1750 PredBBs[0] = PredBB;
1751 // Try to duplicate BB into PredBB.
1752 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1753 return true;
1754 }
1755 }
1756
1757 return false;
1758}
1759
1760/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1761/// a xor instruction in the current block. See if there are any
1762/// simplifications we can do based on inputs to the xor.
1764 BasicBlock *BB = BO->getParent();
1765
1766 // If either the LHS or RHS of the xor is a constant, don't do this
1767 // optimization.
1768 if (isa<ConstantInt>(BO->getOperand(0)) ||
1769 isa<ConstantInt>(BO->getOperand(1)))
1770 return false;
1771
1772 // If the first instruction in BB isn't a phi, we won't be able to infer
1773 // anything special about any particular predecessor.
1774 if (!isa<PHINode>(BB->front()))
1775 return false;
1776
1777 // If this BB is a landing pad, we won't be able to split the edge into it.
1778 if (BB->isEHPad())
1779 return false;
1780
1781 // If we have a xor as the branch input to this block, and we know that the
1782 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1783 // the condition into the predecessor and fix that value to true, saving some
1784 // logical ops on that path and encouraging other paths to simplify.
1785 //
1786 // This copies something like this:
1787 //
1788 // BB:
1789 // %X = phi i1 [1], [%X']
1790 // %Y = icmp eq i32 %A, %B
1791 // %Z = xor i1 %X, %Y
1792 // br i1 %Z, ...
1793 //
1794 // Into:
1795 // BB':
1796 // %Y = icmp ne i32 %A, %B
1797 // br i1 %Y, ...
1798
1799 PredValueInfoTy XorOpValues;
1800 bool isLHS = true;
1801 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1802 WantInteger, BO)) {
1803 assert(XorOpValues.empty());
1804 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1805 WantInteger, BO))
1806 return false;
1807 isLHS = false;
1808 }
1809
1810 assert(!XorOpValues.empty() &&
1811 "computeValueKnownInPredecessors returned true with no values");
1812
1813 // Scan the information to see which is most popular: true or false. The
1814 // predecessors can be of the set true, false, or undef.
1815 unsigned NumTrue = 0, NumFalse = 0;
1816 for (const auto &XorOpValue : XorOpValues) {
1817 if (isa<UndefValue>(XorOpValue.first))
1818 // Ignore undefs for the count.
1819 continue;
1820 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1821 ++NumFalse;
1822 else
1823 ++NumTrue;
1824 }
1825
1826 // Determine which value to split on, true, false, or undef if neither.
1827 ConstantInt *SplitVal = nullptr;
1828 if (NumTrue > NumFalse)
1829 SplitVal = ConstantInt::getTrue(BB->getContext());
1830 else if (NumTrue != 0 || NumFalse != 0)
1831 SplitVal = ConstantInt::getFalse(BB->getContext());
1832
1833 // Collect all of the blocks that this can be folded into so that we can
1834 // factor this once and clone it once.
1835 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1836 for (const auto &XorOpValue : XorOpValues) {
1837 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1838 continue;
1839
1840 BlocksToFoldInto.push_back(XorOpValue.second);
1841 }
1842
1843 // If we inferred a value for all of the predecessors, then duplication won't
1844 // help us. However, we can just replace the LHS or RHS with the constant.
1845 if (BlocksToFoldInto.size() ==
1846 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1847 if (!SplitVal) {
1848 // If all preds provide undef, just nuke the xor, because it is undef too.
1850 BO->eraseFromParent();
1851 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1852 // If all preds provide 0, replace the xor with the other input.
1853 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1854 BO->eraseFromParent();
1855 } else {
1856 // If all preds provide 1, set the computed value to 1.
1857 BO->setOperand(!isLHS, SplitVal);
1858 }
1859
1860 return true;
1861 }
1862
1863 // If any of predecessors end with an indirect goto, we can't change its
1864 // destination.
1865 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1866 return isa<IndirectBrInst>(Pred->getTerminator());
1867 }))
1868 return false;
1869
1870 // Try to duplicate BB into PredBB.
1871 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1872}
1873
1874/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1875/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1876/// NewPred using the entries from OldPred (suitably mapped).
1878 BasicBlock *OldPred,
1879 BasicBlock *NewPred,
1881 for (PHINode &PN : PHIBB->phis()) {
1882 // Ok, we have a PHI node. Figure out what the incoming value was for the
1883 // DestBlock.
1884 Value *IV = PN.getIncomingValueForBlock(OldPred);
1885
1886 // Remap the value if necessary.
1887 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1889 if (I != ValueMap.end())
1890 IV = I->second;
1891 }
1892
1893 PN.addIncoming(IV, NewPred);
1894 }
1895}
1896
1897/// Merge basic block BB into its sole predecessor if possible.
1899 BasicBlock *SinglePred = BB->getSinglePredecessor();
1900 if (!SinglePred)
1901 return false;
1902
1903 const Instruction *TI = SinglePred->getTerminator();
1904 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1905 SinglePred == BB || hasAddressTakenAndUsed(BB))
1906 return false;
1907
1908 // If SinglePred was a loop header, BB becomes one.
1909 if (LoopHeaders.erase(SinglePred))
1910 LoopHeaders.insert(BB);
1911
1912 LVI->eraseBlock(SinglePred);
1913 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1914
1915 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1916 // BB code within one basic block `BB`), we need to invalidate the LVI
1917 // information associated with BB, because the LVI information need not be
1918 // true for all of BB after the merge. For example,
1919 // Before the merge, LVI info and code is as follows:
1920 // SinglePred: <LVI info1 for %p val>
1921 // %y = use of %p
1922 // call @exit() // need not transfer execution to successor.
1923 // assume(%p) // from this point on %p is true
1924 // br label %BB
1925 // BB: <LVI info2 for %p val, i.e. %p is true>
1926 // %x = use of %p
1927 // br label exit
1928 //
1929 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1930 // (info2 and info1 respectively). After the merge and the deletion of the
1931 // LVI info1 for SinglePred. We have the following code:
1932 // BB: <LVI info2 for %p val>
1933 // %y = use of %p
1934 // call @exit()
1935 // assume(%p)
1936 // %x = use of %p <-- LVI info2 is correct from here onwards.
1937 // br label exit
1938 // LVI info2 for BB is incorrect at the beginning of BB.
1939
1940 // Invalidate LVI information for BB if the LVI is not provably true for
1941 // all of BB.
1943 LVI->eraseBlock(BB);
1944 return true;
1945}
1946
1947/// Update the SSA form. NewBB contains instructions that are copied from BB.
1948/// ValueMapping maps old values in BB to new ones in NewBB.
1950 BasicBlock *BB, BasicBlock *NewBB,
1951 DenseMap<Instruction *, Value *> &ValueMapping) {
1952 // If there were values defined in BB that are used outside the block, then we
1953 // now have to update all uses of the value to use either the original value,
1954 // the cloned value, or some PHI derived value. This can require arbitrary
1955 // PHI insertion, of which we are prepared to do, clean these up now.
1956 SSAUpdater SSAUpdate;
1957 SmallVector<Use *, 16> UsesToRename;
1960
1961 for (Instruction &I : *BB) {
1962 // Scan all uses of this instruction to see if it is used outside of its
1963 // block, and if so, record them in UsesToRename.
1964 for (Use &U : I.uses()) {
1965 Instruction *User = cast<Instruction>(U.getUser());
1966 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1967 if (UserPN->getIncomingBlock(U) == BB)
1968 continue;
1969 } else if (User->getParent() == BB)
1970 continue;
1971
1972 UsesToRename.push_back(&U);
1973 }
1974
1975 // Find debug values outside of the block
1976 findDbgValues(DbgValues, &I, &DPValues);
1977 llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
1978 return DbgVal->getParent() == BB;
1979 });
1980 llvm::erase_if(DPValues, [&](const DPValue *DPVal) {
1981 return DPVal->getParent() == BB;
1982 });
1983
1984 // If there are no uses outside the block, we're done with this instruction.
1985 if (UsesToRename.empty() && DbgValues.empty() && DPValues.empty())
1986 continue;
1987 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1988
1989 // We found a use of I outside of BB. Rename all uses of I that are outside
1990 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1991 // with the two values we know.
1992 SSAUpdate.Initialize(I.getType(), I.getName());
1993 SSAUpdate.AddAvailableValue(BB, &I);
1994 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1995
1996 while (!UsesToRename.empty())
1997 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1998 if (!DbgValues.empty() || !DPValues.empty()) {
1999 SSAUpdate.UpdateDebugValues(&I, DbgValues);
2000 SSAUpdate.UpdateDebugValues(&I, DPValues);
2001 DbgValues.clear();
2002 DPValues.clear();
2003 }
2004
2005 LLVM_DEBUG(dbgs() << "\n");
2006 }
2007}
2008
2009/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2010/// arguments that come from PredBB. Return the map from the variables in the
2011/// source basic block to the variables in the newly created basic block.
2015 BasicBlock *PredBB) {
2016 // We are going to have to map operands from the source basic block to the new
2017 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2018 // block, evaluate them to account for entry from PredBB.
2020
2021 // Retargets llvm.dbg.value to any renamed variables.
2022 auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
2023 auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
2024 if (!DbgInstruction)
2025 return false;
2026
2027 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2028 for (auto DbgOperand : DbgInstruction->location_ops()) {
2029 auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
2030 if (!DbgOperandInstruction)
2031 continue;
2032
2033 auto I = ValueMapping.find(DbgOperandInstruction);
2034 if (I != ValueMapping.end()) {
2035 OperandsToRemap.insert(
2036 std::pair<Value *, Value *>(DbgOperand, I->second));
2037 }
2038 }
2039
2040 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2041 DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
2042 return true;
2043 };
2044
2045 // Duplicate implementation of the above dbg.value code, using DPValues
2046 // instead.
2047 auto RetargetDPValueIfPossible = [&](DPValue *DPV) {
2048 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2049 for (auto *Op : DPV->location_ops()) {
2050 Instruction *OpInst = dyn_cast<Instruction>(Op);
2051 if (!OpInst)
2052 continue;
2053
2054 auto I = ValueMapping.find(OpInst);
2055 if (I != ValueMapping.end())
2056 OperandsToRemap.insert({OpInst, I->second});
2057 }
2058
2059 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2060 DPV->replaceVariableLocationOp(OldOp, MappedOp);
2061 };
2062
2063 BasicBlock *RangeBB = BI->getParent();
2064
2065 // Clone the phi nodes of the source basic block into NewBB. The resulting
2066 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2067 // might need to rewrite the operand of the cloned phi.
2068 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2069 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2070 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2071 ValueMapping[PN] = NewPN;
2072 }
2073
2074 // Clone noalias scope declarations in the threaded block. When threading a
2075 // loop exit, we would otherwise end up with two idential scope declarations
2076 // visible at the same time.
2077 SmallVector<MDNode *> NoAliasScopes;
2078 DenseMap<MDNode *, MDNode *> ClonedScopes;
2079 LLVMContext &Context = PredBB->getContext();
2080 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2081 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2082
2083 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2084 auto DPVRange = NewInst->cloneDebugInfoFrom(From);
2085 for (DPValue &DPV : DPValue::filter(DPVRange))
2086 RetargetDPValueIfPossible(&DPV);
2087 };
2088
2089 // Clone the non-phi instructions of the source basic block into NewBB,
2090 // keeping track of the mapping and using it to remap operands in the cloned
2091 // instructions.
2092 for (; BI != BE; ++BI) {
2093 Instruction *New = BI->clone();
2094 New->setName(BI->getName());
2095 New->insertInto(NewBB, NewBB->end());
2096 ValueMapping[&*BI] = New;
2097 adaptNoAliasScopes(New, ClonedScopes, Context);
2098
2099 CloneAndRemapDbgInfo(New, &*BI);
2100
2101 if (RetargetDbgValueIfPossible(New))
2102 continue;
2103
2104 // Remap operands to patch up intra-block references.
2105 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2106 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2108 if (I != ValueMapping.end())
2109 New->setOperand(i, I->second);
2110 }
2111 }
2112
2113 // There may be DPValues on the terminator, clone directly from marker
2114 // to marker as there isn't an instruction there.
2115 if (BE != RangeBB->end() && BE->hasDbgValues()) {
2116 // Dump them at the end.
2117 DPMarker *Marker = RangeBB->getMarker(BE);
2118 DPMarker *EndMarker = NewBB->createMarker(NewBB->end());
2119 auto DPVRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2120 for (DPValue &DPV : DPValue::filter(DPVRange))
2121 RetargetDPValueIfPossible(&DPV);
2122 }
2123
2124 return ValueMapping;
2125}
2126
2127/// Attempt to thread through two successive basic blocks.
2129 Value *Cond) {
2130 // Consider:
2131 //
2132 // PredBB:
2133 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2134 // %tobool = icmp eq i32 %cond, 0
2135 // br i1 %tobool, label %BB, label ...
2136 //
2137 // BB:
2138 // %cmp = icmp eq i32* %var, null
2139 // br i1 %cmp, label ..., label ...
2140 //
2141 // We don't know the value of %var at BB even if we know which incoming edge
2142 // we take to BB. However, once we duplicate PredBB for each of its incoming
2143 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2144 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2145
2146 // Require that BB end with a Branch for simplicity.
2147 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2148 if (!CondBr)
2149 return false;
2150
2151 // BB must have exactly one predecessor.
2152 BasicBlock *PredBB = BB->getSinglePredecessor();
2153 if (!PredBB)
2154 return false;
2155
2156 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2157 // unconditional branch, we should be merging PredBB and BB instead. For
2158 // simplicity, we don't deal with a switch.
2159 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2160 if (!PredBBBranch || PredBBBranch->isUnconditional())
2161 return false;
2162
2163 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2164 // PredBB.
2165 if (PredBB->getSinglePredecessor())
2166 return false;
2167
2168 // Don't thread through PredBB if it contains a successor edge to itself, in
2169 // which case we would infinite loop. Suppose we are threading an edge from
2170 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2171 // successor edge to itself. If we allowed jump threading in this case, we
2172 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2173 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2174 // with another jump threading opportunity from PredBB.thread through PredBB
2175 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2176 // would keep peeling one iteration from PredBB.
2177 if (llvm::is_contained(successors(PredBB), PredBB))
2178 return false;
2179
2180 // Don't thread across a loop header.
2181 if (LoopHeaders.count(PredBB))
2182 return false;
2183
2184 // Avoid complication with duplicating EH pads.
2185 if (PredBB->isEHPad())
2186 return false;
2187
2188 // Find a predecessor that we can thread. For simplicity, we only consider a
2189 // successor edge out of BB to which we thread exactly one incoming edge into
2190 // PredBB.
2191 unsigned ZeroCount = 0;
2192 unsigned OneCount = 0;
2193 BasicBlock *ZeroPred = nullptr;
2194 BasicBlock *OnePred = nullptr;
2195 for (BasicBlock *P : predecessors(PredBB)) {
2196 // If PredPred ends with IndirectBrInst, we can't handle it.
2197 if (isa<IndirectBrInst>(P->getTerminator()))
2198 continue;
2199 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2201 if (CI->isZero()) {
2202 ZeroCount++;
2203 ZeroPred = P;
2204 } else if (CI->isOne()) {
2205 OneCount++;
2206 OnePred = P;
2207 }
2208 }
2209 }
2210
2211 // Disregard complicated cases where we have to thread multiple edges.
2212 BasicBlock *PredPredBB;
2213 if (ZeroCount == 1) {
2214 PredPredBB = ZeroPred;
2215 } else if (OneCount == 1) {
2216 PredPredBB = OnePred;
2217 } else {
2218 return false;
2219 }
2220
2221 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2222
2223 // If threading to the same block as we come from, we would infinite loop.
2224 if (SuccBB == BB) {
2225 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2226 << "' - would thread to self!\n");
2227 return false;
2228 }
2229
2230 // If threading this would thread across a loop header, don't thread the edge.
2231 // See the comments above findLoopHeaders for justifications and caveats.
2232 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2233 LLVM_DEBUG({
2234 bool BBIsHeader = LoopHeaders.count(BB);
2235 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2236 dbgs() << " Not threading across "
2237 << (BBIsHeader ? "loop header BB '" : "block BB '")
2238 << BB->getName() << "' to dest "
2239 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2240 << SuccBB->getName()
2241 << "' - it might create an irreducible loop!\n";
2242 });
2243 return false;
2244 }
2245
2246 // Compute the cost of duplicating BB and PredBB.
2247 unsigned BBCost = getJumpThreadDuplicationCost(
2248 TTI, BB, BB->getTerminator(), BBDupThreshold);
2249 unsigned PredBBCost = getJumpThreadDuplicationCost(
2250 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2251
2252 // Give up if costs are too high. We need to check BBCost and PredBBCost
2253 // individually before checking their sum because getJumpThreadDuplicationCost
2254 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2255 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2256 BBCost + PredBBCost > BBDupThreshold) {
2257 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2258 << "' - Cost is too high: " << PredBBCost
2259 << " for PredBB, " << BBCost << "for BB\n");
2260 return false;
2261 }
2262
2263 // Now we are ready to duplicate PredBB.
2264 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2265 return true;
2266}
2267
2269 BasicBlock *PredBB,
2270 BasicBlock *BB,
2271 BasicBlock *SuccBB) {
2272 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2273 << BB->getName() << "'\n");
2274
2275 // Build BPI/BFI before any changes are made to IR.
2276 bool HasProfile = doesBlockHaveProfileData(BB);
2277 auto *BFI = getOrCreateBFI(HasProfile);
2278 auto *BPI = getOrCreateBPI(BFI != nullptr);
2279
2280 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2281 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2282
2283 BasicBlock *NewBB =
2284 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2285 PredBB->getParent(), PredBB);
2286 NewBB->moveAfter(PredBB);
2287
2288 // Set the block frequency of NewBB.
2289 if (BFI) {
2290 assert(BPI && "It's expected BPI to exist along with BFI");
2291 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2292 BPI->getEdgeProbability(PredPredBB, PredBB);
2293 BFI->setBlockFreq(NewBB, NewBBFreq);
2294 }
2295
2296 // We are going to have to map operands from the original BB block to the new
2297 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2298 // to account for entry from PredPredBB.
2300 cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2301
2302 // Copy the edge probabilities from PredBB to NewBB.
2303 if (BPI)
2304 BPI->copyEdgeProbabilities(PredBB, NewBB);
2305
2306 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2307 // This eliminates predecessors from PredPredBB, which requires us to simplify
2308 // any PHI nodes in PredBB.
2309 Instruction *PredPredTerm = PredPredBB->getTerminator();
2310 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2311 if (PredPredTerm->getSuccessor(i) == PredBB) {
2312 PredBB->removePredecessor(PredPredBB, true);
2313 PredPredTerm->setSuccessor(i, NewBB);
2314 }
2315
2316 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2317 ValueMapping);
2318 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2319 ValueMapping);
2320
2321 DTU->applyUpdatesPermissive(
2322 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2323 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2324 {DominatorTree::Insert, PredPredBB, NewBB},
2325 {DominatorTree::Delete, PredPredBB, PredBB}});
2326
2327 updateSSA(PredBB, NewBB, ValueMapping);
2328
2329 // Clean up things like PHI nodes with single operands, dead instructions,
2330 // etc.
2331 SimplifyInstructionsInBlock(NewBB, TLI);
2332 SimplifyInstructionsInBlock(PredBB, TLI);
2333
2334 SmallVector<BasicBlock *, 1> PredsToFactor;
2335 PredsToFactor.push_back(NewBB);
2336 threadEdge(BB, PredsToFactor, SuccBB);
2337}
2338
2339/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2341 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2342 BasicBlock *SuccBB) {
2343 // If threading to the same block as we come from, we would infinite loop.
2344 if (SuccBB == BB) {
2345 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2346 << "' - would thread to self!\n");
2347 return false;
2348 }
2349
2350 // If threading this would thread across a loop header, don't thread the edge.
2351 // See the comments above findLoopHeaders for justifications and caveats.
2352 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2353 LLVM_DEBUG({
2354 bool BBIsHeader = LoopHeaders.count(BB);
2355 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2356 dbgs() << " Not threading across "
2357 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2358 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2359 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2360 });
2361 return false;
2362 }
2363
2364 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2365 TTI, BB, BB->getTerminator(), BBDupThreshold);
2366 if (JumpThreadCost > BBDupThreshold) {
2367 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2368 << "' - Cost is too high: " << JumpThreadCost << "\n");
2369 return false;
2370 }
2371
2372 threadEdge(BB, PredBBs, SuccBB);
2373 return true;
2374}
2375
2376/// threadEdge - We have decided that it is safe and profitable to factor the
2377/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2378/// across BB. Transform the IR to reflect this change.
2380 const SmallVectorImpl<BasicBlock *> &PredBBs,
2381 BasicBlock *SuccBB) {
2382 assert(SuccBB != BB && "Don't create an infinite loop");
2383
2384 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2385 "Don't thread across loop headers");
2386
2387 // Build BPI/BFI before any changes are made to IR.
2388 bool HasProfile = doesBlockHaveProfileData(BB);
2389 auto *BFI = getOrCreateBFI(HasProfile);
2390 auto *BPI = getOrCreateBPI(BFI != nullptr);
2391
2392 // And finally, do it! Start by factoring the predecessors if needed.
2393 BasicBlock *PredBB;
2394 if (PredBBs.size() == 1)
2395 PredBB = PredBBs[0];
2396 else {
2397 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2398 << " common predecessors.\n");
2399 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2400 }
2401
2402 // And finally, do it!
2403 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2404 << "' to '" << SuccBB->getName()
2405 << ", across block:\n " << *BB << "\n");
2406
2407 LVI->threadEdge(PredBB, BB, SuccBB);
2408
2410 BB->getName()+".thread",
2411 BB->getParent(), BB);
2412 NewBB->moveAfter(PredBB);
2413
2414 // Set the block frequency of NewBB.
2415 if (BFI) {
2416 assert(BPI && "It's expected BPI to exist along with BFI");
2417 auto NewBBFreq =
2418 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2419 BFI->setBlockFreq(NewBB, NewBBFreq);
2420 }
2421
2422 // Copy all the instructions from BB to NewBB except the terminator.
2424 cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
2425
2426 // We didn't copy the terminator from BB over to NewBB, because there is now
2427 // an unconditional jump to SuccBB. Insert the unconditional jump.
2428 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2429 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2430
2431 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2432 // PHI nodes for NewBB now.
2433 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2434
2435 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2436 // eliminates predecessors from BB, which requires us to simplify any PHI
2437 // nodes in BB.
2438 Instruction *PredTerm = PredBB->getTerminator();
2439 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2440 if (PredTerm->getSuccessor(i) == BB) {
2441 BB->removePredecessor(PredBB, true);
2442 PredTerm->setSuccessor(i, NewBB);
2443 }
2444
2445 // Enqueue required DT updates.
2446 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2447 {DominatorTree::Insert, PredBB, NewBB},
2448 {DominatorTree::Delete, PredBB, BB}});
2449
2450 updateSSA(BB, NewBB, ValueMapping);
2451
2452 // At this point, the IR is fully up to date and consistent. Do a quick scan
2453 // over the new instructions and zap any that are constants or dead. This
2454 // frequently happens because of phi translation.
2455 SimplifyInstructionsInBlock(NewBB, TLI);
2456
2457 // Update the edge weight from BB to SuccBB, which should be less than before.
2458 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2459
2460 // Threaded an edge!
2461 ++NumThreads;
2462}
2463
2464/// Create a new basic block that will be the predecessor of BB and successor of
2465/// all blocks in Preds. When profile data is available, update the frequency of
2466/// this new block.
2467BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2469 const char *Suffix) {
2471
2472 // Collect the frequencies of all predecessors of BB, which will be used to
2473 // update the edge weight of the result of splitting predecessors.
2475 auto *BFI = getBFI();
2476 if (BFI) {
2477 auto *BPI = getOrCreateBPI(true);
2478 for (auto *Pred : Preds)
2479 FreqMap.insert(std::make_pair(
2480 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2481 }
2482
2483 // In the case when BB is a LandingPad block we create 2 new predecessors
2484 // instead of just one.
2485 if (BB->isLandingPad()) {
2486 std::string NewName = std::string(Suffix) + ".split-lp";
2487 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2488 } else {
2489 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2490 }
2491
2492 std::vector<DominatorTree::UpdateType> Updates;
2493 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2494 for (auto *NewBB : NewBBs) {
2495 BlockFrequency NewBBFreq(0);
2496 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2497 for (auto *Pred : predecessors(NewBB)) {
2498 Updates.push_back({DominatorTree::Delete, Pred, BB});
2499 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2500 if (BFI) // Update frequencies between Pred -> NewBB.
2501 NewBBFreq += FreqMap.lookup(Pred);
2502 }
2503 if (BFI) // Apply the summed frequency to NewBB.
2504 BFI->setBlockFreq(NewBB, NewBBFreq);
2505 }
2506
2507 DTU->applyUpdatesPermissive(Updates);
2508 return NewBBs[0];
2509}
2510
2511bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2512 const Instruction *TI = BB->getTerminator();
2513 if (!TI || TI->getNumSuccessors() < 2)
2514 return false;
2515
2516 return hasValidBranchWeightMD(*TI);
2517}
2518
2519/// Update the block frequency of BB and branch weight and the metadata on the
2520/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2521/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2522void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2523 BasicBlock *BB,
2524 BasicBlock *NewBB,
2525 BasicBlock *SuccBB,
2526 BlockFrequencyInfo *BFI,
2528 bool HasProfile) {
2529 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2530 "Both BFI & BPI should either be set or unset");
2531
2532 if (!BFI) {
2533 assert(!HasProfile &&
2534 "It's expected to have BFI/BPI when profile info exists");
2535 return;
2536 }
2537
2538 // As the edge from PredBB to BB is deleted, we have to update the block
2539 // frequency of BB.
2540 auto BBOrigFreq = BFI->getBlockFreq(BB);
2541 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2542 auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
2543 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2544 BFI->setBlockFreq(BB, BBNewFreq);
2545
2546 // Collect updated outgoing edges' frequencies from BB and use them to update
2547 // edge probabilities.
2548 SmallVector<uint64_t, 4> BBSuccFreq;
2549 for (BasicBlock *Succ : successors(BB)) {
2550 auto SuccFreq = (Succ == SuccBB)
2551 ? BB2SuccBBFreq - NewBBFreq
2552 : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
2553 BBSuccFreq.push_back(SuccFreq.getFrequency());
2554 }
2555
2556 uint64_t MaxBBSuccFreq =
2557 *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
2558
2560 if (MaxBBSuccFreq == 0)
2561 BBSuccProbs.assign(BBSuccFreq.size(),
2562 {1, static_cast<unsigned>(BBSuccFreq.size())});
2563 else {
2564 for (uint64_t Freq : BBSuccFreq)
2565 BBSuccProbs.push_back(
2566 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2567 // Normalize edge probabilities so that they sum up to one.
2569 BBSuccProbs.end());
2570 }
2571
2572 // Update edge probabilities in BPI.
2573 BPI->setEdgeProbability(BB, BBSuccProbs);
2574
2575 // Update the profile metadata as well.
2576 //
2577 // Don't do this if the profile of the transformed blocks was statically
2578 // estimated. (This could occur despite the function having an entry
2579 // frequency in completely cold parts of the CFG.)
2580 //
2581 // In this case we don't want to suggest to subsequent passes that the
2582 // calculated weights are fully consistent. Consider this graph:
2583 //
2584 // check_1
2585 // 50% / |
2586 // eq_1 | 50%
2587 // \ |
2588 // check_2
2589 // 50% / |
2590 // eq_2 | 50%
2591 // \ |
2592 // check_3
2593 // 50% / |
2594 // eq_3 | 50%
2595 // \ |
2596 //
2597 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2598 // the overall probabilities are inconsistent; the total probability that the
2599 // value is either 1, 2 or 3 is 150%.
2600 //
2601 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2602 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2603 // the loop exit edge. Then based solely on static estimation we would assume
2604 // the loop was extremely hot.
2605 //
2606 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2607 // shouldn't make edges extremely likely or unlikely based solely on static
2608 // estimation.
2609 if (BBSuccProbs.size() >= 2 && HasProfile) {
2611 for (auto Prob : BBSuccProbs)
2612 Weights.push_back(Prob.getNumerator());
2613
2614 auto TI = BB->getTerminator();
2615 setBranchWeights(*TI, Weights);
2616 }
2617}
2618
2619/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2620/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2621/// If we can duplicate the contents of BB up into PredBB do so now, this
2622/// improves the odds that the branch will be on an analyzable instruction like
2623/// a compare.
2625 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2626 assert(!PredBBs.empty() && "Can't handle an empty set");
2627
2628 // If BB is a loop header, then duplicating this block outside the loop would
2629 // cause us to transform this into an irreducible loop, don't do this.
2630 // See the comments above findLoopHeaders for justifications and caveats.
2631 if (LoopHeaders.count(BB)) {
2632 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2633 << "' into predecessor block '" << PredBBs[0]->getName()
2634 << "' - it might create an irreducible loop!\n");
2635 return false;
2636 }
2637
2638 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2639 TTI, BB, BB->getTerminator(), BBDupThreshold);
2640 if (DuplicationCost > BBDupThreshold) {
2641 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2642 << "' - Cost is too high: " << DuplicationCost << "\n");
2643 return false;
2644 }
2645
2646 // And finally, do it! Start by factoring the predecessors if needed.
2647 std::vector<DominatorTree::UpdateType> Updates;
2648 BasicBlock *PredBB;
2649 if (PredBBs.size() == 1)
2650 PredBB = PredBBs[0];
2651 else {
2652 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2653 << " common predecessors.\n");
2654 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2655 }
2656 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2657
2658 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2659 // of PredBB.
2660 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2661 << "' into end of '" << PredBB->getName()
2662 << "' to eliminate branch on phi. Cost: "
2663 << DuplicationCost << " block is:" << *BB << "\n");
2664
2665 // Unless PredBB ends with an unconditional branch, split the edge so that we
2666 // can just clone the bits from BB into the end of the new PredBB.
2667 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2668
2669 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2670 BasicBlock *OldPredBB = PredBB;
2671 PredBB = SplitEdge(OldPredBB, BB);
2672 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2673 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2674 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2675 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2676 }
2677
2678 // We are going to have to map operands from the original BB block into the
2679 // PredBB block. Evaluate PHI nodes in BB.
2680 DenseMap<Instruction*, Value*> ValueMapping;
2681
2682 BasicBlock::iterator BI = BB->begin();
2683 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2684 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2685 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2686 // mapping and using it to remap operands in the cloned instructions.
2687 for (; BI != BB->end(); ++BI) {
2688 Instruction *New = BI->clone();
2689 New->insertInto(PredBB, OldPredBranch->getIterator());
2690
2691 // Remap operands to patch up intra-block references.
2692 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2693 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2694 DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
2695 if (I != ValueMapping.end())
2696 New->setOperand(i, I->second);
2697 }
2698
2699 // If this instruction can be simplified after the operands are updated,
2700 // just use the simplified value instead. This frequently happens due to
2701 // phi translation.
2703 New,
2704 {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
2705 ValueMapping[&*BI] = IV;
2706 if (!New->mayHaveSideEffects()) {
2707 New->eraseFromParent();
2708 New = nullptr;
2709 // Clone debug-info on the elided instruction to the destination
2710 // position.
2711 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2712 }
2713 } else {
2714 ValueMapping[&*BI] = New;
2715 }
2716 if (New) {
2717 // Otherwise, insert the new instruction into the block.
2718 New->setName(BI->getName());
2719 // Clone across any debug-info attached to the old instruction.
2720 New->cloneDebugInfoFrom(&*BI);
2721 // Update Dominance from simplified New instruction operands.
2722 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2723 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2724 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2725 }
2726 }
2727
2728 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2729 // add entries to the PHI nodes for branch from PredBB now.
2730 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2731 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2732 ValueMapping);
2733 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2734 ValueMapping);
2735
2736 updateSSA(BB, PredBB, ValueMapping);
2737
2738 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2739 // that we nuked.
2740 BB->removePredecessor(PredBB, true);
2741
2742 // Remove the unconditional branch at the end of the PredBB block.
2743 OldPredBranch->eraseFromParent();
2744 if (auto *BPI = getBPI())
2745 BPI->copyEdgeProbabilities(BB, PredBB);
2746 DTU->applyUpdatesPermissive(Updates);
2747
2748 ++NumDupes;
2749 return true;
2750}
2751
2752// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2753// a Select instruction in Pred. BB has other predecessors and SI is used in
2754// a PHI node in BB. SI has no other use.
2755// A new basic block, NewBB, is created and SI is converted to compare and
2756// conditional branch. SI is erased from parent.
2758 SelectInst *SI, PHINode *SIUse,
2759 unsigned Idx) {
2760 // Expand the select.
2761 //
2762 // Pred --
2763 // | v
2764 // | NewBB
2765 // | |
2766 // |-----
2767 // v
2768 // BB
2769 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2770 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2771 BB->getParent(), BB);
2772 // Move the unconditional branch to NewBB.
2773 PredTerm->removeFromParent();
2774 PredTerm->insertInto(NewBB, NewBB->end());
2775 // Create a conditional branch and update PHI nodes.
2776 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2777 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2778 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2779 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2780 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2781
2782 uint64_t TrueWeight = 1;
2783 uint64_t FalseWeight = 1;
2784 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2785 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2786 (TrueWeight + FalseWeight) != 0) {
2789 TrueWeight, TrueWeight + FalseWeight));
2791 FalseWeight, TrueWeight + FalseWeight));
2792 // Update BPI if exists.
2793 if (auto *BPI = getBPI())
2794 BPI->setEdgeProbability(Pred, BP);
2795 }
2796 // Set the block frequency of NewBB.
2797 if (auto *BFI = getBFI()) {
2798 if ((TrueWeight + FalseWeight) == 0) {
2799 TrueWeight = 1;
2800 FalseWeight = 1;
2801 }
2803 TrueWeight, TrueWeight + FalseWeight);
2804 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2805 BFI->setBlockFreq(NewBB, NewBBFreq);
2806 }
2807
2808 // The select is now dead.
2809 SI->eraseFromParent();
2810 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2811 {DominatorTree::Insert, Pred, NewBB}});
2812
2813 // Update any other PHI nodes in BB.
2814 for (BasicBlock::iterator BI = BB->begin();
2815 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2816 if (Phi != SIUse)
2817 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2818}
2819
2821 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2822
2823 if (!CondPHI || CondPHI->getParent() != BB)
2824 return false;
2825
2826 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2827 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2828 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2829
2830 // The second and third condition can be potentially relaxed. Currently
2831 // the conditions help to simplify the code and allow us to reuse existing
2832 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2833 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2834 continue;
2835
2836 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2837 if (!PredTerm || !PredTerm->isUnconditional())
2838 continue;
2839
2840 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2841 return true;
2842 }
2843 return false;
2844}
2845
2846/// tryToUnfoldSelect - Look for blocks of the form
2847/// bb1:
2848/// %a = select
2849/// br bb2
2850///
2851/// bb2:
2852/// %p = phi [%a, %bb1] ...
2853/// %c = icmp %p
2854/// br i1 %c
2855///
2856/// And expand the select into a branch structure if one of its arms allows %c
2857/// to be folded. This later enables threading from bb1 over bb2.
2859 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2860 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2861 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2862
2863 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2864 CondLHS->getParent() != BB)
2865 return false;
2866
2867 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2868 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2869 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2870
2871 // Look if one of the incoming values is a select in the corresponding
2872 // predecessor.
2873 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2874 continue;
2875
2876 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2877 if (!PredTerm || !PredTerm->isUnconditional())
2878 continue;
2879
2880 // Now check if one of the select values would allow us to constant fold the
2881 // terminator in BB. We don't do the transform if both sides fold, those
2882 // cases will be threaded in any case.
2883 LazyValueInfo::Tristate LHSFolds =
2884 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2885 CondRHS, Pred, BB, CondCmp);
2886 LazyValueInfo::Tristate RHSFolds =
2887 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2888 CondRHS, Pred, BB, CondCmp);
2889 if ((LHSFolds != LazyValueInfo::Unknown ||
2890 RHSFolds != LazyValueInfo::Unknown) &&
2891 LHSFolds != RHSFolds) {
2892 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2893 return true;
2894 }
2895 }
2896 return false;
2897}
2898
2899/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2900/// same BB in the form
2901/// bb:
2902/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2903/// %s = select %p, trueval, falseval
2904///
2905/// or
2906///
2907/// bb:
2908/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2909/// %c = cmp %p, 0
2910/// %s = select %c, trueval, falseval
2911///
2912/// And expand the select into a branch structure. This later enables
2913/// jump-threading over bb in this pass.
2914///
2915/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2916/// select if the associated PHI has at least one constant. If the unfolded
2917/// select is not jump-threaded, it will be folded again in the later
2918/// optimizations.
2920 // This transform would reduce the quality of msan diagnostics.
2921 // Disable this transform under MemorySanitizer.
2922 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2923 return false;
2924
2925 // If threading this would thread across a loop header, don't thread the edge.
2926 // See the comments above findLoopHeaders for justifications and caveats.
2927 if (LoopHeaders.count(BB))
2928 return false;
2929
2930 for (BasicBlock::iterator BI = BB->begin();
2931 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2932 // Look for a Phi having at least one constant incoming value.
2933 if (llvm::all_of(PN->incoming_values(),
2934 [](Value *V) { return !isa<ConstantInt>(V); }))
2935 continue;
2936
2937 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2938 using namespace PatternMatch;
2939
2940 // Check if SI is in BB and use V as condition.
2941 if (SI->getParent() != BB)
2942 return false;
2943 Value *Cond = SI->getCondition();
2944 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2945 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2946 };
2947
2948 SelectInst *SI = nullptr;
2949 for (Use &U : PN->uses()) {
2950 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2951 // Look for a ICmp in BB that compares PN with a constant and is the
2952 // condition of a Select.
2953 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2954 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2955 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2956 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2957 SI = SelectI;
2958 break;
2959 }
2960 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2961 // Look for a Select in BB that uses PN as condition.
2962 if (isUnfoldCandidate(SelectI, U.get())) {
2963 SI = SelectI;
2964 break;
2965 }
2966 }
2967 }
2968
2969 if (!SI)
2970 continue;
2971 // Expand the select.
2972 Value *Cond = SI->getCondition();
2973 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
2974 Cond = new FreezeInst(Cond, "cond.fr", SI);
2975 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2976 Instruction *Term =
2977 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2978 BasicBlock *SplitBB = SI->getParent();
2979 BasicBlock *NewBB = Term->getParent();
2980 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
2981 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2982 NewPN->addIncoming(SI->getFalseValue(), BB);
2983 SI->replaceAllUsesWith(NewPN);
2984 SI->eraseFromParent();
2985 // NewBB and SplitBB are newly created blocks which require insertion.
2986 std::vector<DominatorTree::UpdateType> Updates;
2987 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2988 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2989 Updates.push_back({DominatorTree::Insert, BB, NewBB});
2990 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2991 // BB's successors were moved to SplitBB, update DTU accordingly.
2992 for (auto *Succ : successors(SplitBB)) {
2993 Updates.push_back({DominatorTree::Delete, BB, Succ});
2994 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2995 }
2996 DTU->applyUpdatesPermissive(Updates);
2997 return true;
2998 }
2999 return false;
3000}
3001
3002/// Try to propagate a guard from the current BB into one of its predecessors
3003/// in case if another branch of execution implies that the condition of this
3004/// guard is always true. Currently we only process the simplest case that
3005/// looks like:
3006///
3007/// Start:
3008/// %cond = ...
3009/// br i1 %cond, label %T1, label %F1
3010/// T1:
3011/// br label %Merge
3012/// F1:
3013/// br label %Merge
3014/// Merge:
3015/// %condGuard = ...
3016/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3017///
3018/// And cond either implies condGuard or !condGuard. In this case all the
3019/// instructions before the guard can be duplicated in both branches, and the
3020/// guard is then threaded to one of them.
3022 using namespace PatternMatch;
3023
3024 // We only want to deal with two predecessors.
3025 BasicBlock *Pred1, *Pred2;
3026 auto PI = pred_begin(BB), PE = pred_end(BB);
3027 if (PI == PE)
3028 return false;
3029 Pred1 = *PI++;
3030 if (PI == PE)
3031 return false;
3032 Pred2 = *PI++;
3033 if (PI != PE)
3034 return false;
3035 if (Pred1 == Pred2)
3036 return false;
3037
3038 // Try to thread one of the guards of the block.
3039 // TODO: Look up deeper than to immediate predecessor?
3040 auto *Parent = Pred1->getSinglePredecessor();
3041 if (!Parent || Parent != Pred2->getSinglePredecessor())
3042 return false;
3043
3044 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3045 for (auto &I : *BB)
3046 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3047 return true;
3048
3049 return false;
3050}
3051
3052/// Try to propagate the guard from BB which is the lower block of a diamond
3053/// to one of its branches, in case if diamond's condition implies guard's
3054/// condition.
3056 BranchInst *BI) {
3057 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3058 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3059 Value *GuardCond = Guard->getArgOperand(0);
3060 Value *BranchCond = BI->getCondition();
3061 BasicBlock *TrueDest = BI->getSuccessor(0);
3062 BasicBlock *FalseDest = BI->getSuccessor(1);
3063
3064 auto &DL = BB->getModule()->getDataLayout();
3065 bool TrueDestIsSafe = false;
3066 bool FalseDestIsSafe = false;
3067
3068 // True dest is safe if BranchCond => GuardCond.
3069 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3070 if (Impl && *Impl)
3071 TrueDestIsSafe = true;
3072 else {
3073 // False dest is safe if !BranchCond => GuardCond.
3074 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3075 if (Impl && *Impl)
3076 FalseDestIsSafe = true;
3077 }
3078
3079 if (!TrueDestIsSafe && !FalseDestIsSafe)
3080 return false;
3081
3082 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3083 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3084
3085 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3086 Instruction *AfterGuard = Guard->getNextNode();
3087 unsigned Cost =
3088 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3089 if (Cost > BBDupThreshold)
3090 return false;
3091 // Duplicate all instructions before the guard and the guard itself to the
3092 // branch where implication is not proved.
3094 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3095 assert(GuardedBlock && "Could not create the guarded block?");
3096 // Duplicate all instructions before the guard in the unguarded branch.
3097 // Since we have successfully duplicated the guarded block and this block
3098 // has fewer instructions, we expect it to succeed.
3100 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3101 assert(UnguardedBlock && "Could not create the unguarded block?");
3102 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3103 << GuardedBlock->getName() << "\n");
3104 // Some instructions before the guard may still have uses. For them, we need
3105 // to create Phi nodes merging their copies in both guarded and unguarded
3106 // branches. Those instructions that have no uses can be just removed.
3108 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3109 if (!isa<PHINode>(&*BI))
3110 ToRemove.push_back(&*BI);
3111
3112 BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();
3113 assert(InsertionPoint != BB->end() && "Empty block?");
3114 // Substitute with Phis & remove.
3115 for (auto *Inst : reverse(ToRemove)) {
3116 if (!Inst->use_empty()) {
3117 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3118 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3119 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3120 NewPN->insertBefore(InsertionPoint);
3121 Inst->replaceAllUsesWith(NewPN);
3122 }
3123 Inst->dropDbgValues();
3124 Inst->eraseFromParent();
3125 }
3126 return true;
3127}
3128
3129PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3133
3134 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3135 // TODO: Would be nice to verify BPI/BFI consistency as well.
3136 return PA;
3137}
3138
3139template <typename AnalysisT>
3140typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3141 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3142
3143 // If there were no changes since last call to 'runExternalAnalysis' then all
3144 // analysis is either up to date or explicitly invalidated. Just go ahead and
3145 // run the "external" analysis.
3146 if (!ChangedSinceLastAnalysisUpdate) {
3147 assert(!DTU->hasPendingUpdates() &&
3148 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3149 // Run the "external" analysis.
3150 return &FAM->getResult<AnalysisT>(*F);
3151 }
3152 ChangedSinceLastAnalysisUpdate = false;
3153
3154 auto PA = getPreservedAnalysis();
3155 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3156 // as preserved.
3157 PA.preserve<BranchProbabilityAnalysis>();
3158 PA.preserve<BlockFrequencyAnalysis>();
3159 // Report everything except explicitly preserved as invalid.
3160 FAM->invalidate(*F, PA);
3161 // Update DT/PDT.
3162 DTU->flush();
3163 // Make sure DT/PDT are valid before running "external" analysis.
3164 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3165 assert((!DTU->hasPostDomTree() ||
3166 DTU->getPostDomTree().verify(
3168 // Run the "external" analysis.
3169 auto *Result = &FAM->getResult<AnalysisT>(*F);
3170 // Update analysis JumpThreading depends on and not explicitly preserved.
3171 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3172 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3173 AA = &FAM->getResult<AAManager>(*F);
3174
3175 return Result;
3176}
3177
3178BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3179 if (!BPI) {
3180 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3182 }
3183 return *BPI;
3184}
3185
3186BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3187 if (!BFI) {
3188 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3190 }
3191 return *BFI;
3192}
3193
3194// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3195// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3196// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3197BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3198 auto *Res = getBPI();
3199 if (Res)
3200 return Res;
3201
3202 if (Force)
3203 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3204
3205 return *BPI;
3206}
3207
3208BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3209 auto *Res = getBFI();
3210 if (Res)
3211 return Res;
3212
3213 if (Force)
3214 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3215
3216 return *BFI;
3217}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap< Instruction *, Value * > &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:531
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
ppc ctr loops verify
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:519
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
DPMarker * getMarker(InstListType::iterator It)
Return the DPMarker for the position given by It, so that DPValues can be inserted there.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:389
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:639
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:165
const Instruction & front() const
Definition: BasicBlock.h:452
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:266
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:454
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:432
DPMarker * createMarker(Instruction *I)
Attach a DPMarker to the given instruction.
Definition: BasicBlock.cpp:39
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:150
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:652
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:656
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:271
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:489
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition: Constants.h:888
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1846
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2531
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2328
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:210
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:204
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:41
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:722
Per-instruction record of debug-info.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DPMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DPMarkers from From into this marker.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static auto filter(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DPValue types only and downcast.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
const BasicBlock * getParent() const
This represents the llvm.dbg.value instruction.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:275
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:779
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:88
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:452
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1718
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:150
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1704
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:250
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:449
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
bool isSpecialTerminator() const
Definition: Instruction.h:260
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
DenseMap< Instruction *, Value * > cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, std::optional< BlockFrequencyInfo * > BFI, std::optional< BranchProbabilityInfo * > BPI)
bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond)
bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet< Value * > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
bool processImpliedCondition(BasicBlock *BB)
bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap< Instruction *, Value * > &ValueMapping)
Update the SSA form.
void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:31
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Tristate
This is used to return true/false/dunno results.
Definition: LazyValueInfo.h:61
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
An instruction for reading from memory.
Definition: Instructions.h:184
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
bool isUnordered() const
Definition: Instructions.h:274
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
static LocationSize precise(uint64_t Value)
Metadata node.
Definition: Metadata.h:1067
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator begin()
Definition: MapVector.h:69
Representation for a specific memory location.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:191
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:188
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:53
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:200
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:70
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
'undef' values are things that do not have specified contents.
Definition: Constants.h:1350
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
See the file comment.
Definition: ValueMap.h:84
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1066
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1013
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:982
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:3416
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:582
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:724
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:453
BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1113
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3292
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:764
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1950
Value * simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DPValue * > *DPValues=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:137
void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2031
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1616
void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
unsigned pred_size(const MachineBasicBlock *BB)
void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:34
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1468