LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321
322public:
323 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
324 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
325 const SimplifyCFGOptions &Opts)
326 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
327 assert((!DTU || !DTU->hasPostDomTree()) &&
328 "SimplifyCFG is not yet capable of maintaining validity of a "
329 "PostDomTree, so don't ask for it.");
330 }
331
332 bool simplifyOnce(BasicBlock *BB);
333 bool run(BasicBlock *BB);
334
335 // Helper to set Resimplify and return change indication.
336 bool requestResimplify() {
337 Resimplify = true;
338 return true;
339 }
340};
341
342// we synthesize a || b as select a, true, b
343// we synthesize a && b as select a, b, false
344// this function determines if SI is playing one of those roles.
345[[maybe_unused]] bool
346isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
347 return ((isa<ConstantInt>(SI->getTrueValue()) &&
348 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
349 (isa<ConstantInt>(SI->getFalseValue()) &&
350 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
351}
352
353} // end anonymous namespace
354
355/// Return true if all the PHI nodes in the basic block \p BB
356/// receive compatible (identical) incoming values when coming from
357/// all of the predecessor blocks that are specified in \p IncomingBlocks.
358///
359/// Note that if the values aren't exactly identical, but \p EquivalenceSet
360/// is provided, and *both* of the values are present in the set,
361/// then they are considered equal.
363 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
364 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
365 assert(IncomingBlocks.size() == 2 &&
366 "Only for a pair of incoming blocks at the time!");
367
368 // FIXME: it is okay if one of the incoming values is an `undef` value,
369 // iff the other incoming value is guaranteed to be a non-poison value.
370 // FIXME: it is okay if one of the incoming values is a `poison` value.
371 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
372 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
373 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
374 if (IV0 == IV1)
375 return true;
376 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
377 EquivalenceSet->contains(IV1))
378 return true;
379 return false;
380 });
381}
382
383/// Return true if it is safe to merge these two
384/// terminator instructions together.
385static bool
387 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
388 if (SI1 == SI2)
389 return false; // Can't merge with self!
390
391 // It is not safe to merge these two switch instructions if they have a common
392 // successor, and if that successor has a PHI node, and if *that* PHI node has
393 // conflicting incoming values from the two switch blocks.
394 BasicBlock *SI1BB = SI1->getParent();
395 BasicBlock *SI2BB = SI2->getParent();
396
398 bool Fail = false;
399 for (BasicBlock *Succ : successors(SI2BB)) {
400 if (!SI1Succs.count(Succ))
401 continue;
402 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
403 continue;
404 Fail = true;
405 if (FailBlocks)
406 FailBlocks->insert(Succ);
407 else
408 break;
409 }
410
411 return !Fail;
412}
413
414/// Update PHI nodes in Succ to indicate that there will now be entries in it
415/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
416/// will be the same as those coming in from ExistPred, an existing predecessor
417/// of Succ.
418static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
419 BasicBlock *ExistPred,
420 MemorySSAUpdater *MSSAU = nullptr) {
421 for (PHINode &PN : Succ->phis())
422 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
423 if (MSSAU)
424 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
425 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
426}
427
428/// Compute an abstract "cost" of speculating the given instruction,
429/// which is assumed to be safe to speculate. TCC_Free means cheap,
430/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
431/// expensive.
433 const TargetTransformInfo &TTI) {
434 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
435}
436
437/// If we have a merge point of an "if condition" as accepted above,
438/// return true if the specified value dominates the block. We don't handle
439/// the true generality of domination here, just a special case which works
440/// well enough for us.
441///
442/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
443/// see if V (which must be an instruction) and its recursive operands
444/// that do not dominate BB have a combined cost lower than Budget and
445/// are non-trapping. If both are true, the instruction is inserted into the
446/// set and true is returned.
447///
448/// The cost for most non-trapping instructions is defined as 1 except for
449/// Select whose cost is 2.
450///
451/// After this function returns, Cost is increased by the cost of
452/// V plus its non-dominating operands. If that cost is greater than
453/// Budget, false is returned and Cost is undefined.
455 Value *V, BasicBlock *BB, Instruction *InsertPt,
456 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
459 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
460 // so limit the recursion depth.
461 // TODO: While this recursion limit does prevent pathological behavior, it
462 // would be better to track visited instructions to avoid cycles.
464 return false;
465
467 if (!I) {
468 // Non-instructions dominate all instructions and can be executed
469 // unconditionally.
470 return true;
471 }
472 BasicBlock *PBB = I->getParent();
473
474 // We don't want to allow weird loops that might have the "if condition" in
475 // the bottom of this block.
476 if (PBB == BB)
477 return false;
478
479 // If this instruction is defined in a block that contains an unconditional
480 // branch to BB, then it must be in the 'conditional' part of the "if
481 // statement". If not, it definitely dominates the region.
483 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
484 return true;
485
486 // If we have seen this instruction before, don't count it again.
487 if (AggressiveInsts.count(I))
488 return true;
489
490 // Okay, it looks like the instruction IS in the "condition". Check to
491 // see if it's a cheap instruction to unconditionally compute, and if it
492 // only uses stuff defined outside of the condition. If so, hoist it out.
493 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
494 return false;
495
496 // Overflow arithmetic instruction plus extract value are usually generated
497 // when a division is being replaced. But, in this case, the zero check may
498 // still be kept in the code. In that case it would be worth to hoist these
499 // two instruction out of the basic block. Let's treat this pattern as one
500 // single cheap instruction here!
501 WithOverflowInst *OverflowInst;
502 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
503 ZeroCostInstructions.insert(OverflowInst);
504 Cost += 1;
505 } else if (!ZeroCostInstructions.contains(I))
506 Cost += computeSpeculationCost(I, TTI);
507
508 // Allow exactly one instruction to be speculated regardless of its cost
509 // (as long as it is safe to do so).
510 // This is intended to flatten the CFG even if the instruction is a division
511 // or other expensive operation. The speculation of an expensive instruction
512 // is expected to be undone in CodeGenPrepare if the speculation has not
513 // enabled further IR optimizations.
514 if (Cost > Budget &&
515 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
516 !Cost.isValid()))
517 return false;
518
519 // Okay, we can only really hoist these out if their operands do
520 // not take us over the cost threshold.
521 for (Use &Op : I->operands())
522 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
523 TTI, AC, ZeroCostInstructions, Depth + 1))
524 return false;
525 // Okay, it's safe to do this! Remember this instruction.
526 AggressiveInsts.insert(I);
527 return true;
528}
529
530/// Extract ConstantInt from value, looking through IntToPtr
531/// and PointerNullValue. Return NULL if value is not a constant int.
533 // Normal constant int.
535 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
536 return CI;
537
538 // It is not safe to look through inttoptr or ptrtoint when using unstable
539 // pointer types.
540 if (DL.hasUnstableRepresentation(V->getType()))
541 return nullptr;
542
543 // This is some kind of pointer constant. Turn it into a pointer-sized
544 // ConstantInt if possible.
545 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
546
547 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
549 return ConstantInt::get(IntPtrTy, 0);
550
551 // IntToPtr const int, we can look through this if the semantics of
552 // inttoptr for this address space are a simple (truncating) bitcast.
554 if (CE->getOpcode() == Instruction::IntToPtr)
555 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
556 // The constant is very likely to have the right type already.
557 if (CI->getType() == IntPtrTy)
558 return CI;
559 else
560 return cast<ConstantInt>(
561 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
562 }
563 return nullptr;
564}
565
566namespace {
567
568/// Given a chain of or (||) or and (&&) comparison of a value against a
569/// constant, this will try to recover the information required for a switch
570/// structure.
571/// It will depth-first traverse the chain of comparison, seeking for patterns
572/// like %a == 12 or %a < 4 and combine them to produce a set of integer
573/// representing the different cases for the switch.
574/// Note that if the chain is composed of '||' it will build the set of elements
575/// that matches the comparisons (i.e. any of this value validate the chain)
576/// while for a chain of '&&' it will build the set elements that make the test
577/// fail.
578struct ConstantComparesGatherer {
579 const DataLayout &DL;
580
581 /// Value found for the switch comparison
582 Value *CompValue = nullptr;
583
584 /// Extra clause to be checked before the switch
585 Value *Extra = nullptr;
586
587 /// Set of integers to match in switch
589
590 /// Number of comparisons matched in the and/or chain
591 unsigned UsedICmps = 0;
592
593 /// If the elements in Vals matches the comparisons
594 bool IsEq = false;
595
596 // Used to check if the first matched CompValue shall be the Extra check.
597 bool IgnoreFirstMatch = false;
598 bool MultipleMatches = false;
599
600 /// Construct and compute the result for the comparison instruction Cond
601 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
602 gather(Cond);
603 if (CompValue || !MultipleMatches)
604 return;
605 Extra = nullptr;
606 Vals.clear();
607 UsedICmps = 0;
608 IgnoreFirstMatch = true;
609 gather(Cond);
610 }
611
612 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
613 ConstantComparesGatherer &
614 operator=(const ConstantComparesGatherer &) = delete;
615
616private:
617 /// Try to set the current value used for the comparison, it succeeds only if
618 /// it wasn't set before or if the new value is the same as the old one
619 bool setValueOnce(Value *NewVal) {
620 if (IgnoreFirstMatch) {
621 IgnoreFirstMatch = false;
622 return false;
623 }
624 if (CompValue && CompValue != NewVal) {
625 MultipleMatches = true;
626 return false;
627 }
628 CompValue = NewVal;
629 return true;
630 }
631
632 /// Try to match Instruction "I" as a comparison against a constant and
633 /// populates the array Vals with the set of values that match (or do not
634 /// match depending on isEQ).
635 /// Return false on failure. On success, the Value the comparison matched
636 /// against is placed in CompValue.
637 /// If CompValue is already set, the function is expected to fail if a match
638 /// is found but the value compared to is different.
639 bool matchInstruction(Instruction *I, bool isEQ) {
640 if (match(I, m_Not(m_Instruction(I))))
641 isEQ = !isEQ;
642
643 Value *Val;
644 if (match(I, m_NUWTrunc(m_Value(Val)))) {
645 // If we already have a value for the switch, it has to match!
646 if (!setValueOnce(Val))
647 return false;
648 UsedICmps++;
649 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
650 return true;
651 }
652 // If this is an icmp against a constant, handle this as one of the cases.
653 ICmpInst *ICI;
654 ConstantInt *C;
655 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
656 (C = getConstantInt(I->getOperand(1), DL)))) {
657 return false;
658 }
659
660 Value *RHSVal;
661 const APInt *RHSC;
662
663 // Pattern match a special case
664 // (x & ~2^z) == y --> x == y || x == y|2^z
665 // This undoes a transformation done by instcombine to fuse 2 compares.
666 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
667 // It's a little bit hard to see why the following transformations are
668 // correct. Here is a CVC3 program to verify them for 64-bit values:
669
670 /*
671 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
672 x : BITVECTOR(64);
673 y : BITVECTOR(64);
674 z : BITVECTOR(64);
675 mask : BITVECTOR(64) = BVSHL(ONE, z);
676 QUERY( (y & ~mask = y) =>
677 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
678 );
679 QUERY( (y | mask = y) =>
680 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
681 );
682 */
683
684 // Please note that each pattern must be a dual implication (<--> or
685 // iff). One directional implication can create spurious matches. If the
686 // implication is only one-way, an unsatisfiable condition on the left
687 // side can imply a satisfiable condition on the right side. Dual
688 // implication ensures that satisfiable conditions are transformed to
689 // other satisfiable conditions and unsatisfiable conditions are
690 // transformed to other unsatisfiable conditions.
691
692 // Here is a concrete example of a unsatisfiable condition on the left
693 // implying a satisfiable condition on the right:
694 //
695 // mask = (1 << z)
696 // (x & ~mask) == y --> (x == y || x == (y | mask))
697 //
698 // Substituting y = 3, z = 0 yields:
699 // (x & -2) == 3 --> (x == 3 || x == 2)
700
701 // Pattern match a special case:
702 /*
703 QUERY( (y & ~mask = y) =>
704 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
705 );
706 */
707 if (match(ICI->getOperand(0),
708 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
709 APInt Mask = ~*RHSC;
710 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
711 // If we already have a value for the switch, it has to match!
712 if (!setValueOnce(RHSVal))
713 return false;
714
715 Vals.push_back(C);
716 Vals.push_back(
717 ConstantInt::get(C->getContext(),
718 C->getValue() | Mask));
719 UsedICmps++;
720 return true;
721 }
722 }
723
724 // Pattern match a special case:
725 /*
726 QUERY( (y | mask = y) =>
727 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
728 );
729 */
730 if (match(ICI->getOperand(0),
731 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
732 APInt Mask = *RHSC;
733 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
734 // If we already have a value for the switch, it has to match!
735 if (!setValueOnce(RHSVal))
736 return false;
737
738 Vals.push_back(C);
739 Vals.push_back(ConstantInt::get(C->getContext(),
740 C->getValue() & ~Mask));
741 UsedICmps++;
742 return true;
743 }
744 }
745
746 // If we already have a value for the switch, it has to match!
747 if (!setValueOnce(ICI->getOperand(0)))
748 return false;
749
750 UsedICmps++;
751 Vals.push_back(C);
752 return true;
753 }
754
755 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
756 ConstantRange Span =
758
759 // Shift the range if the compare is fed by an add. This is the range
760 // compare idiom as emitted by instcombine.
761 Value *CandidateVal = I->getOperand(0);
762 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
763 Span = Span.subtract(*RHSC);
764 CandidateVal = RHSVal;
765 }
766
767 // If this is an and/!= check, then we are looking to build the set of
768 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
769 // x != 0 && x != 1.
770 if (!isEQ)
771 Span = Span.inverse();
772
773 // If there are a ton of values, we don't want to make a ginormous switch.
774 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
775 return false;
776 }
777
778 // If we already have a value for the switch, it has to match!
779 if (!setValueOnce(CandidateVal))
780 return false;
781
782 // Add all values from the range to the set
783 APInt Tmp = Span.getLower();
784 do
785 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
786 while (++Tmp != Span.getUpper());
787
788 UsedICmps++;
789 return true;
790 }
791
792 /// Given a potentially 'or'd or 'and'd together collection of icmp
793 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
794 /// the value being compared, and stick the list constants into the Vals
795 /// vector.
796 /// One "Extra" case is allowed to differ from the other.
797 void gather(Value *V) {
798 Value *Op0, *Op1;
799 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
800 IsEq = true;
801 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
802 IsEq = false;
803 else
804 return;
805 // Keep a stack (SmallVector for efficiency) for depth-first traversal
806 SmallVector<Value *, 8> DFT{Op0, Op1};
807 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
808
809 while (!DFT.empty()) {
810 V = DFT.pop_back_val();
811
812 if (Instruction *I = dyn_cast<Instruction>(V)) {
813 // If it is a || (or && depending on isEQ), process the operands.
814 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
815 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
816 if (Visited.insert(Op1).second)
817 DFT.push_back(Op1);
818 if (Visited.insert(Op0).second)
819 DFT.push_back(Op0);
820
821 continue;
822 }
823
824 // Try to match the current instruction
825 if (matchInstruction(I, IsEq))
826 // Match succeed, continue the loop
827 continue;
828 }
829
830 // One element of the sequence of || (or &&) could not be match as a
831 // comparison against the same value as the others.
832 // We allow only one "Extra" case to be checked before the switch
833 if (!Extra) {
834 Extra = V;
835 continue;
836 }
837 // Failed to parse a proper sequence, abort now
838 CompValue = nullptr;
839 break;
840 }
841 }
842};
843
844} // end anonymous namespace
845
847 MemorySSAUpdater *MSSAU = nullptr) {
848 Instruction *Cond = nullptr;
850 Cond = dyn_cast<Instruction>(SI->getCondition());
851 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
852 if (BI->isConditional())
853 Cond = dyn_cast<Instruction>(BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
855 Cond = dyn_cast<Instruction>(IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
873 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
876 CV = ICI->getOperand(0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
901 Cases.reserve(SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 BranchInst *BI = cast<BranchInst>(TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(ICI->getOperand(1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Cond);
918 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(Cases, BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(V1, V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(V1->begin(), V1->end());
954 array_pod_sort(V2->begin(), V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
990 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
995 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(PredCases, ThisCases))
1004 return false;
1005
1006 if (isa<BranchInst>(TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(i->getCaseValue())) {
1046 Successor->removePredecessor(PredDef);
1047 SI.removeCase(i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Succ);
1098 Succ->removePredecessor(TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(Weights.front(), Weights.back());
1163 }
1164}
1165
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 Instruction *NewBonusInst = BonusInst.clone();
1178
1179 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1180 // Unless the instruction has the same !dbg location as the original
1181 // branch, drop it. When we fold the bonus instructions we want to make
1182 // sure we reset their debug locations in order to avoid stepping on
1183 // dead code caused by folding dead branches.
1184 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1185 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1186 mapAtomInstance(DL, VMap);
1187 }
1188
1189 RemapInstruction(NewBonusInst, VMap,
1191
1192 // If we speculated an instruction, we need to drop any metadata that may
1193 // result in undefined behavior, as the metadata might have been valid
1194 // only given the branch precondition.
1195 // Similarly strip attributes on call parameters that may cause UB in
1196 // location the call is moved to.
1197 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1198
1199 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1200 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1201 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1203
1204 NewBonusInst->takeName(&BonusInst);
1205 BonusInst.setName(NewBonusInst->getName() + ".old");
1206 VMap[&BonusInst] = NewBonusInst;
1207
1208 // Update (liveout) uses of bonus instructions,
1209 // now that the bonus instruction has been cloned into predecessor.
1210 // Note that we expect to be in a block-closed SSA form for this to work!
1211 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1212 auto *UI = cast<Instruction>(U.getUser());
1213 auto *PN = dyn_cast<PHINode>(UI);
1214 if (!PN) {
1215 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1216 "If the user is not a PHI node, then it should be in the same "
1217 "block as, and come after, the original bonus instruction.");
1218 continue; // Keep using the original bonus instruction.
1219 }
1220 // Is this the block-closed SSA form PHI node?
1221 if (PN->getIncomingBlock(U) == BB)
1222 continue; // Great, keep using the original bonus instruction.
1223 // The only other alternative is an "use" when coming from
1224 // the predecessor block - here we should refer to the cloned bonus instr.
1225 assert(PN->getIncomingBlock(U) == PredBlock &&
1226 "Not in block-closed SSA form?");
1227 U.set(NewBonusInst);
1228 }
1229 }
1230
1231 // Key Instructions: We may have propagated atom info into the pred. If the
1232 // pred's terminator already has atom info do nothing as merging would drop
1233 // one atom group anyway. If it doesn't, propagte the remapped atom group
1234 // from BB's terminator.
1235 if (auto &PredDL = PTI->getDebugLoc()) {
1236 auto &DL = BB->getTerminator()->getDebugLoc();
1237 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1238 PredDL.isSameSourceLocation(DL)) {
1239 PTI->setDebugLoc(DL);
1240 RemapSourceAtom(PTI, VMap);
1241 }
1242 }
1243}
1244
1245bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1246 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1247 BasicBlock *BB = TI->getParent();
1248 BasicBlock *Pred = PTI->getParent();
1249
1251
1252 // Figure out which 'cases' to copy from SI to PSI.
1253 std::vector<ValueEqualityComparisonCase> BBCases;
1254 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1255
1256 std::vector<ValueEqualityComparisonCase> PredCases;
1257 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1258
1259 // Based on whether the default edge from PTI goes to BB or not, fill in
1260 // PredCases and PredDefault with the new switch cases we would like to
1261 // build.
1262 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1263
1264 // Update the branch weight metadata along the way
1265 SmallVector<uint64_t, 8> Weights;
1266 bool PredHasWeights = hasBranchWeightMD(*PTI);
1267 bool SuccHasWeights = hasBranchWeightMD(*TI);
1268
1269 if (PredHasWeights) {
1270 getBranchWeights(PTI, Weights);
1271 // branch-weight metadata is inconsistent here.
1272 if (Weights.size() != 1 + PredCases.size())
1273 PredHasWeights = SuccHasWeights = false;
1274 } else if (SuccHasWeights)
1275 // If there are no predecessor weights but there are successor weights,
1276 // populate Weights with 1, which will later be scaled to the sum of
1277 // successor's weights
1278 Weights.assign(1 + PredCases.size(), 1);
1279
1280 SmallVector<uint64_t, 8> SuccWeights;
1281 if (SuccHasWeights) {
1282 getBranchWeights(TI, SuccWeights);
1283 // branch-weight metadata is inconsistent here.
1284 if (SuccWeights.size() != 1 + BBCases.size())
1285 PredHasWeights = SuccHasWeights = false;
1286 } else if (PredHasWeights)
1287 SuccWeights.assign(1 + BBCases.size(), 1);
1288
1289 if (PredDefault == BB) {
1290 // If this is the default destination from PTI, only the edges in TI
1291 // that don't occur in PTI, or that branch to BB will be activated.
1292 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1293 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1294 if (PredCases[i].Dest != BB)
1295 PTIHandled.insert(PredCases[i].Value);
1296 else {
1297 // The default destination is BB, we don't need explicit targets.
1298 std::swap(PredCases[i], PredCases.back());
1299
1300 if (PredHasWeights || SuccHasWeights) {
1301 // Increase weight for the default case.
1302 Weights[0] += Weights[i + 1];
1303 std::swap(Weights[i + 1], Weights.back());
1304 Weights.pop_back();
1305 }
1306
1307 PredCases.pop_back();
1308 --i;
1309 --e;
1310 }
1311
1312 // Reconstruct the new switch statement we will be building.
1313 if (PredDefault != BBDefault) {
1314 PredDefault->removePredecessor(Pred);
1315 if (DTU && PredDefault != BB)
1316 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1317 PredDefault = BBDefault;
1318 ++NewSuccessors[BBDefault];
1319 }
1320
1321 unsigned CasesFromPred = Weights.size();
1322 uint64_t ValidTotalSuccWeight = 0;
1323 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1324 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1325 PredCases.push_back(BBCases[i]);
1326 ++NewSuccessors[BBCases[i].Dest];
1327 if (SuccHasWeights || PredHasWeights) {
1328 // The default weight is at index 0, so weight for the ith case
1329 // should be at index i+1. Scale the cases from successor by
1330 // PredDefaultWeight (Weights[0]).
1331 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1332 ValidTotalSuccWeight += SuccWeights[i + 1];
1333 }
1334 }
1335
1336 if (SuccHasWeights || PredHasWeights) {
1337 ValidTotalSuccWeight += SuccWeights[0];
1338 // Scale the cases from predecessor by ValidTotalSuccWeight.
1339 for (unsigned i = 1; i < CasesFromPred; ++i)
1340 Weights[i] *= ValidTotalSuccWeight;
1341 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1342 Weights[0] *= SuccWeights[0];
1343 }
1344 } else {
1345 // If this is not the default destination from PSI, only the edges
1346 // in SI that occur in PSI with a destination of BB will be
1347 // activated.
1348 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1349 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1350 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1351 if (PredCases[i].Dest == BB) {
1352 PTIHandled.insert(PredCases[i].Value);
1353
1354 if (PredHasWeights || SuccHasWeights) {
1355 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1356 std::swap(Weights[i + 1], Weights.back());
1357 Weights.pop_back();
1358 }
1359
1360 std::swap(PredCases[i], PredCases.back());
1361 PredCases.pop_back();
1362 --i;
1363 --e;
1364 }
1365
1366 // Okay, now we know which constants were sent to BB from the
1367 // predecessor. Figure out where they will all go now.
1368 for (const ValueEqualityComparisonCase &Case : BBCases)
1369 if (PTIHandled.count(Case.Value)) {
1370 // If this is one we are capable of getting...
1371 if (PredHasWeights || SuccHasWeights)
1372 Weights.push_back(WeightsForHandled[Case.Value]);
1373 PredCases.push_back(Case);
1374 ++NewSuccessors[Case.Dest];
1375 PTIHandled.erase(Case.Value); // This constant is taken care of
1376 }
1377
1378 // If there are any constants vectored to BB that TI doesn't handle,
1379 // they must go to the default destination of TI.
1380 for (ConstantInt *I : PTIHandled) {
1381 if (PredHasWeights || SuccHasWeights)
1382 Weights.push_back(WeightsForHandled[I]);
1383 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1384 ++NewSuccessors[BBDefault];
1385 }
1386 }
1387
1388 // Okay, at this point, we know which new successor Pred will get. Make
1389 // sure we update the number of entries in the PHI nodes for these
1390 // successors.
1391 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1392 if (DTU) {
1393 SuccsOfPred = {llvm::from_range, successors(Pred)};
1394 Updates.reserve(Updates.size() + NewSuccessors.size());
1395 }
1396 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1397 NewSuccessors) {
1398 for (auto I : seq(NewSuccessor.second)) {
1399 (void)I;
1400 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1401 }
1402 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1403 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1404 }
1405
1406 Builder.SetInsertPoint(PTI);
1407 // Convert pointer to int before we switch.
1408 if (CV->getType()->isPointerTy()) {
1409 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1410 "Should not end up here with unstable pointers");
1411 CV =
1412 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1413 }
1414
1415 // Now that the successors are updated, create the new Switch instruction.
1416 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1417 NewSI->setDebugLoc(PTI->getDebugLoc());
1418 for (ValueEqualityComparisonCase &V : PredCases)
1419 NewSI->addCase(V.Value, V.Dest);
1420
1421 if (PredHasWeights || SuccHasWeights)
1422 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1423 /*ElideAllZero=*/true);
1424
1426
1427 // Okay, last check. If BB is still a successor of PSI, then we must
1428 // have an infinite loop case. If so, add an infinitely looping block
1429 // to handle the case to preserve the behavior of the code.
1430 BasicBlock *InfLoopBlock = nullptr;
1431 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1432 if (NewSI->getSuccessor(i) == BB) {
1433 if (!InfLoopBlock) {
1434 // Insert it at the end of the function, because it's either code,
1435 // or it won't matter if it's hot. :)
1436 InfLoopBlock =
1437 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1438 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1439 if (DTU)
1440 Updates.push_back(
1441 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1442 }
1443 NewSI->setSuccessor(i, InfLoopBlock);
1444 }
1445
1446 if (DTU) {
1447 if (InfLoopBlock)
1448 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1449
1450 Updates.push_back({DominatorTree::Delete, Pred, BB});
1451
1452 DTU->applyUpdates(Updates);
1453 }
1454
1455 ++NumFoldValueComparisonIntoPredecessors;
1456 return true;
1457}
1458
1459/// The specified terminator is a value equality comparison instruction
1460/// (either a switch or a branch on "X == c").
1461/// See if any of the predecessors of the terminator block are value comparisons
1462/// on the same value. If so, and if safe to do so, fold them together.
1463bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1464 IRBuilder<> &Builder) {
1465 BasicBlock *BB = TI->getParent();
1466 Value *CV = isValueEqualityComparison(TI); // CondVal
1467 assert(CV && "Not a comparison?");
1468
1469 bool Changed = false;
1470
1471 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1472 while (!Preds.empty()) {
1473 BasicBlock *Pred = Preds.pop_back_val();
1474 Instruction *PTI = Pred->getTerminator();
1475
1476 // Don't try to fold into itself.
1477 if (Pred == BB)
1478 continue;
1479
1480 // See if the predecessor is a comparison with the same value.
1481 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1482 if (PCV != CV)
1483 continue;
1484
1485 SmallSetVector<BasicBlock *, 4> FailBlocks;
1486 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1487 for (auto *Succ : FailBlocks) {
1488 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1489 return false;
1490 }
1491 }
1492
1493 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1494 Changed = true;
1495 }
1496 return Changed;
1497}
1498
1499// If we would need to insert a select that uses the value of this invoke
1500// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1501// need to do this), we can't hoist the invoke, as there is nowhere to put the
1502// select in this case.
1504 Instruction *I1, Instruction *I2) {
1505 for (BasicBlock *Succ : successors(BB1)) {
1506 for (const PHINode &PN : Succ->phis()) {
1507 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1508 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1509 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1510 return false;
1511 }
1512 }
1513 }
1514 return true;
1515}
1516
1517// Get interesting characteristics of instructions that
1518// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1519// instructions can be reordered across.
1525
1527 unsigned Flags = 0;
1528 if (I->mayReadFromMemory())
1529 Flags |= SkipReadMem;
1530 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1531 // inalloca) across stacksave/stackrestore boundaries.
1532 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1533 Flags |= SkipSideEffect;
1535 Flags |= SkipImplicitControlFlow;
1536 return Flags;
1537}
1538
1539// Returns true if it is safe to reorder an instruction across preceding
1540// instructions in a basic block.
1541static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1542 // Don't reorder a store over a load.
1543 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1544 return false;
1545
1546 // If we have seen an instruction with side effects, it's unsafe to reorder an
1547 // instruction which reads memory or itself has side effects.
1548 if ((Flags & SkipSideEffect) &&
1549 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1550 return false;
1551
1552 // Reordering across an instruction which does not necessarily transfer
1553 // control to the next instruction is speculation.
1555 return false;
1556
1557 // Hoisting of llvm.deoptimize is only legal together with the next return
1558 // instruction, which this pass is not always able to do.
1559 if (auto *CB = dyn_cast<CallBase>(I))
1560 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1561 return false;
1562
1563 // It's also unsafe/illegal to hoist an instruction above its instruction
1564 // operands
1565 BasicBlock *BB = I->getParent();
1566 for (Value *Op : I->operands()) {
1567 if (auto *J = dyn_cast<Instruction>(Op))
1568 if (J->getParent() == BB)
1569 return false;
1570 }
1571
1572 return true;
1573}
1574
1575static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1576
1577/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1578/// instructions \p I1 and \p I2 can and should be hoisted.
1580 const TargetTransformInfo &TTI) {
1581 // If we're going to hoist a call, make sure that the two instructions
1582 // we're commoning/hoisting are both marked with musttail, or neither of
1583 // them is marked as such. Otherwise, we might end up in a situation where
1584 // we hoist from a block where the terminator is a `ret` to a block where
1585 // the terminator is a `br`, and `musttail` calls expect to be followed by
1586 // a return.
1587 auto *C1 = dyn_cast<CallInst>(I1);
1588 auto *C2 = dyn_cast<CallInst>(I2);
1589 if (C1 && C2)
1590 if (C1->isMustTailCall() != C2->isMustTailCall())
1591 return false;
1592
1593 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1594 return false;
1595
1596 // If any of the two call sites has nomerge or convergent attribute, stop
1597 // hoisting.
1598 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1599 if (CB1->cannotMerge() || CB1->isConvergent())
1600 return false;
1601 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1602 if (CB2->cannotMerge() || CB2->isConvergent())
1603 return false;
1604
1605 return true;
1606}
1607
1608/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1609/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1610/// hoistCommonCodeFromSuccessors. e.g. The input:
1611/// I1 DVRs: { x, z },
1612/// OtherInsts: { I2 DVRs: { x, y, z } }
1613/// would result in hoisting only DbgVariableRecord x.
1615 Instruction *TI, Instruction *I1,
1616 SmallVectorImpl<Instruction *> &OtherInsts) {
1617 if (!I1->hasDbgRecords())
1618 return;
1619 using CurrentAndEndIt =
1620 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1621 // Vector of {Current, End} iterators.
1623 Itrs.reserve(OtherInsts.size() + 1);
1624 // Helper lambdas for lock-step checks:
1625 // Return true if this Current == End.
1626 auto atEnd = [](const CurrentAndEndIt &Pair) {
1627 return Pair.first == Pair.second;
1628 };
1629 // Return true if all Current are identical.
1630 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1631 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1633 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1634 });
1635 };
1636
1637 // Collect the iterators.
1638 Itrs.push_back(
1639 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1640 for (Instruction *Other : OtherInsts) {
1641 if (!Other->hasDbgRecords())
1642 return;
1643 Itrs.push_back(
1644 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1645 }
1646
1647 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1648 // the lock-step DbgRecord are identical, hoist all of them to TI.
1649 // This replicates the dbg.* intrinsic behaviour in
1650 // hoistCommonCodeFromSuccessors.
1651 while (none_of(Itrs, atEnd)) {
1652 bool HoistDVRs = allIdentical(Itrs);
1653 for (CurrentAndEndIt &Pair : Itrs) {
1654 // Increment Current iterator now as we may be about to move the
1655 // DbgRecord.
1656 DbgRecord &DR = *Pair.first++;
1657 if (HoistDVRs) {
1658 DR.removeFromParent();
1659 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1660 }
1661 }
1662 }
1663}
1664
1666 const Instruction *I2) {
1667 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1668 return true;
1669
1670 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1671 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1672 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1673 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1674 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1675
1676 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1677 return I1->getOperand(0) == I2->getOperand(1) &&
1678 I1->getOperand(1) == I2->getOperand(0) &&
1679 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1680 }
1681
1682 return false;
1683}
1684
1685/// If the target supports conditional faulting,
1686/// we look for the following pattern:
1687/// \code
1688/// BB:
1689/// ...
1690/// %cond = icmp ult %x, %y
1691/// br i1 %cond, label %TrueBB, label %FalseBB
1692/// FalseBB:
1693/// store i32 1, ptr %q, align 4
1694/// ...
1695/// TrueBB:
1696/// %maskedloadstore = load i32, ptr %b, align 4
1697/// store i32 %maskedloadstore, ptr %p, align 4
1698/// ...
1699/// \endcode
1700///
1701/// and transform it into:
1702///
1703/// \code
1704/// BB:
1705/// ...
1706/// %cond = icmp ult %x, %y
1707/// %maskedloadstore = cload i32, ptr %b, %cond
1708/// cstore i32 %maskedloadstore, ptr %p, %cond
1709/// cstore i32 1, ptr %q, ~%cond
1710/// br i1 %cond, label %TrueBB, label %FalseBB
1711/// FalseBB:
1712/// ...
1713/// TrueBB:
1714/// ...
1715/// \endcode
1716///
1717/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1718/// e.g.
1719///
1720/// \code
1721/// %vcond = bitcast i1 %cond to <1 x i1>
1722/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1723/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1724/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1725/// call void @llvm.masked.store.v1i32.p0
1726/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1727/// %cond.not = xor i1 %cond, true
1728/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1729/// call void @llvm.masked.store.v1i32.p0
1730/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1731/// \endcode
1732///
1733/// So we need to turn hoisted load/store into cload/cstore.
1734///
1735/// \param BI The branch instruction.
1736/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1737/// will be speculated.
1738/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1740 BranchInst *BI,
1741 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1742 std::optional<bool> Invert, Instruction *Sel) {
1743 auto &Context = BI->getParent()->getContext();
1744 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1745 auto *Cond = BI->getOperand(0);
1746 // Construct the condition if needed.
1747 BasicBlock *BB = BI->getParent();
1748 Value *Mask = nullptr;
1749 Value *MaskFalse = nullptr;
1750 Value *MaskTrue = nullptr;
1751 if (Invert.has_value()) {
1752 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1753 Mask = Builder.CreateBitCast(
1754 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1755 VCondTy);
1756 } else {
1757 IRBuilder<> Builder(BI);
1758 MaskFalse = Builder.CreateBitCast(
1759 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1760 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1761 }
1762 auto PeekThroughBitcasts = [](Value *V) {
1763 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1764 V = BitCast->getOperand(0);
1765 return V;
1766 };
1767 for (auto *I : SpeculatedConditionalLoadsStores) {
1768 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1769 if (!Invert.has_value())
1770 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1771 // We currently assume conditional faulting load/store is supported for
1772 // scalar types only when creating new instructions. This can be easily
1773 // extended for vector types in the future.
1774 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1775 auto *Op0 = I->getOperand(0);
1776 CallInst *MaskedLoadStore = nullptr;
1777 if (auto *LI = dyn_cast<LoadInst>(I)) {
1778 // Handle Load.
1779 auto *Ty = I->getType();
1780 PHINode *PN = nullptr;
1781 Value *PassThru = nullptr;
1782 if (Invert.has_value())
1783 for (User *U : I->users()) {
1784 if ((PN = dyn_cast<PHINode>(U))) {
1785 PassThru = Builder.CreateBitCast(
1786 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1787 FixedVectorType::get(Ty, 1));
1788 } else if (auto *Ins = cast<Instruction>(U);
1789 Sel && Ins->getParent() == BB) {
1790 // This happens when store or/and a speculative instruction between
1791 // load and store were hoisted to the BB. Make sure the masked load
1792 // inserted before its use.
1793 // We assume there's one of such use.
1794 Builder.SetInsertPoint(Ins);
1795 }
1796 }
1797 MaskedLoadStore = Builder.CreateMaskedLoad(
1798 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1799 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1800 if (PN)
1801 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1802 I->replaceAllUsesWith(NewLoadStore);
1803 } else {
1804 // Handle Store.
1805 auto *StoredVal = Builder.CreateBitCast(
1806 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1807 MaskedLoadStore = Builder.CreateMaskedStore(
1808 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1809 }
1810 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1811 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1812 //
1813 // !nonnull, !align : Not support pointer type, no need to keep.
1814 // !range: Load type is changed from scalar to vector, but the metadata on
1815 // vector specifies a per-element range, so the semantics stay the
1816 // same. Keep it.
1817 // !annotation: Not impact semantics. Keep it.
1818 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1819 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1820 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1821 // FIXME: DIAssignID is not supported for masked store yet.
1822 // (Verifier::visitDIAssignIDMetadata)
1824 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1825 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1826 });
1827 MaskedLoadStore->copyMetadata(*I);
1828 I->eraseFromParent();
1829 }
1830}
1831
1833 const TargetTransformInfo &TTI) {
1834 // Not handle volatile or atomic.
1835 bool IsStore = false;
1836 if (auto *L = dyn_cast<LoadInst>(I)) {
1837 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1838 return false;
1839 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1840 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1841 return false;
1842 IsStore = true;
1843 } else
1844 return false;
1845
1846 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1847 // That's why we have the alignment limitation.
1848 // FIXME: Update the prototype of the intrinsics?
1849 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1851}
1852
1853/// Hoist any common code in the successor blocks up into the block. This
1854/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1855/// given, only perform hoisting in case all successors blocks contain matching
1856/// instructions only. In that case, all instructions can be hoisted and the
1857/// original branch will be replaced and selects for PHIs are added.
1858bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1859 bool AllInstsEqOnly) {
1860 // This does very trivial matching, with limited scanning, to find identical
1861 // instructions in the two blocks. In particular, we don't want to get into
1862 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1863 // such, we currently just scan for obviously identical instructions in an
1864 // identical order, possibly separated by the same number of non-identical
1865 // instructions.
1866 BasicBlock *BB = TI->getParent();
1867 unsigned int SuccSize = succ_size(BB);
1868 if (SuccSize < 2)
1869 return false;
1870
1871 // If either of the blocks has it's address taken, then we can't do this fold,
1872 // because the code we'd hoist would no longer run when we jump into the block
1873 // by it's address.
1874 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1875 for (auto *Succ : UniqueSuccessors) {
1876 if (Succ->hasAddressTaken())
1877 return false;
1878 // Use getUniquePredecessor instead of getSinglePredecessor to support
1879 // multi-cases successors in switch.
1880 if (Succ->getUniquePredecessor())
1881 continue;
1882 // If Succ has >1 predecessors, continue to check if the Succ contains only
1883 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1884 // can relax the condition based on the assumptiom that the program would
1885 // never enter Succ and trigger such an UB.
1886 if (isa<UnreachableInst>(*Succ->begin()))
1887 continue;
1888 return false;
1889 }
1890 // The second of pair is a SkipFlags bitmask.
1891 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1892 SmallVector<SuccIterPair, 8> SuccIterPairs;
1893 for (auto *Succ : UniqueSuccessors) {
1894 BasicBlock::iterator SuccItr = Succ->begin();
1895 if (isa<PHINode>(*SuccItr))
1896 return false;
1897 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1898 }
1899
1900 if (AllInstsEqOnly) {
1901 // Check if all instructions in the successor blocks match. This allows
1902 // hoisting all instructions and removing the blocks we are hoisting from,
1903 // so does not add any new instructions.
1904
1905 // Check if sizes and terminators of all successors match.
1906 unsigned Size0 = UniqueSuccessors[0]->size();
1907 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1908 bool AllSame =
1909 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1910 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1911 Succ->size() == Size0;
1912 });
1913 if (!AllSame)
1914 return false;
1915 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1916 while (LRI.isValid()) {
1917 Instruction *I0 = (*LRI)[0];
1918 if (any_of(*LRI, [I0](Instruction *I) {
1919 return !areIdenticalUpToCommutativity(I0, I);
1920 })) {
1921 return false;
1922 }
1923 --LRI;
1924 }
1925 // Now we know that all instructions in all successors can be hoisted. Let
1926 // the loop below handle the hoisting.
1927 }
1928
1929 // Count how many instructions were not hoisted so far. There's a limit on how
1930 // many instructions we skip, serving as a compilation time control as well as
1931 // preventing excessive increase of life ranges.
1932 unsigned NumSkipped = 0;
1933 // If we find an unreachable instruction at the beginning of a basic block, we
1934 // can still hoist instructions from the rest of the basic blocks.
1935 if (SuccIterPairs.size() > 2) {
1936 erase_if(SuccIterPairs,
1937 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1938 if (SuccIterPairs.size() < 2)
1939 return false;
1940 }
1941
1942 bool Changed = false;
1943
1944 for (;;) {
1945 auto *SuccIterPairBegin = SuccIterPairs.begin();
1946 auto &BB1ItrPair = *SuccIterPairBegin++;
1947 auto OtherSuccIterPairRange =
1948 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1949 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1950
1951 Instruction *I1 = &*BB1ItrPair.first;
1952
1953 bool AllInstsAreIdentical = true;
1954 bool HasTerminator = I1->isTerminator();
1955 for (auto &SuccIter : OtherSuccIterRange) {
1956 Instruction *I2 = &*SuccIter;
1957 HasTerminator |= I2->isTerminator();
1958 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1959 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1960 AllInstsAreIdentical = false;
1961 }
1962
1963 SmallVector<Instruction *, 8> OtherInsts;
1964 for (auto &SuccIter : OtherSuccIterRange)
1965 OtherInsts.push_back(&*SuccIter);
1966
1967 // If we are hoisting the terminator instruction, don't move one (making a
1968 // broken BB), instead clone it, and remove BI.
1969 if (HasTerminator) {
1970 // Even if BB, which contains only one unreachable instruction, is ignored
1971 // at the beginning of the loop, we can hoist the terminator instruction.
1972 // If any instructions remain in the block, we cannot hoist terminators.
1973 if (NumSkipped || !AllInstsAreIdentical) {
1974 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1975 return Changed;
1976 }
1977
1978 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1979 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1980 Changed;
1981 }
1982
1983 if (AllInstsAreIdentical) {
1984 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1985 AllInstsAreIdentical =
1986 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1987 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1988 Instruction *I2 = &*Pair.first;
1989 unsigned SkipFlagsBB2 = Pair.second;
1990 // Even if the instructions are identical, it may not
1991 // be safe to hoist them if we have skipped over
1992 // instructions with side effects or their operands
1993 // weren't hoisted.
1994 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1996 });
1997 }
1998
1999 if (AllInstsAreIdentical) {
2000 BB1ItrPair.first++;
2001 // For a normal instruction, we just move one to right before the
2002 // branch, then replace all uses of the other with the first. Finally,
2003 // we remove the now redundant second instruction.
2004 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2005 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2006 // and leave any that were not hoisted behind (by calling moveBefore
2007 // rather than moveBeforePreserving).
2008 I1->moveBefore(TI->getIterator());
2009 for (auto &SuccIter : OtherSuccIterRange) {
2010 Instruction *I2 = &*SuccIter++;
2011 assert(I2 != I1);
2012 if (!I2->use_empty())
2013 I2->replaceAllUsesWith(I1);
2014 I1->andIRFlags(I2);
2015 if (auto *CB = dyn_cast<CallBase>(I1)) {
2016 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2017 assert(Success && "We should not be trying to hoist callbases "
2018 "with non-intersectable attributes");
2019 // For NDEBUG Compile.
2020 (void)Success;
2021 }
2022
2023 combineMetadataForCSE(I1, I2, true);
2024 // I1 and I2 are being combined into a single instruction. Its debug
2025 // location is the merged locations of the original instructions.
2026 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2027 I2->eraseFromParent();
2028 }
2029 if (!Changed)
2030 NumHoistCommonCode += SuccIterPairs.size();
2031 Changed = true;
2032 NumHoistCommonInstrs += SuccIterPairs.size();
2033 } else {
2034 if (NumSkipped >= HoistCommonSkipLimit) {
2035 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2036 return Changed;
2037 }
2038 // We are about to skip over a pair of non-identical instructions. Record
2039 // if any have characteristics that would prevent reordering instructions
2040 // across them.
2041 for (auto &SuccIterPair : SuccIterPairs) {
2042 Instruction *I = &*SuccIterPair.first++;
2043 SuccIterPair.second |= skippedInstrFlags(I);
2044 }
2045 ++NumSkipped;
2046 }
2047 }
2048}
2049
2050bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2051 Instruction *TI, Instruction *I1,
2052 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2053 ArrayRef<BasicBlock *> UniqueSuccessors) {
2054
2055 auto *BI = dyn_cast<BranchInst>(TI);
2056
2057 bool Changed = false;
2058 BasicBlock *TIParent = TI->getParent();
2059 BasicBlock *BB1 = I1->getParent();
2060
2061 // Use only for an if statement.
2062 auto *I2 = *OtherSuccTIs.begin();
2063 auto *BB2 = I2->getParent();
2064 if (BI) {
2065 assert(OtherSuccTIs.size() == 1);
2066 assert(BI->getSuccessor(0) == I1->getParent());
2067 assert(BI->getSuccessor(1) == I2->getParent());
2068 }
2069
2070 // In the case of an if statement, we try to hoist an invoke.
2071 // FIXME: Can we define a safety predicate for CallBr?
2072 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2073 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2074 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2075 return false;
2076
2077 // TODO: callbr hoisting currently disabled pending further study.
2078 if (isa<CallBrInst>(I1))
2079 return false;
2080
2081 for (BasicBlock *Succ : successors(BB1)) {
2082 for (PHINode &PN : Succ->phis()) {
2083 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2084 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2085 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2086 if (BB1V == BB2V)
2087 continue;
2088
2089 // In the case of an if statement, check for
2090 // passingValueIsAlwaysUndefined here because we would rather eliminate
2091 // undefined control flow then converting it to a select.
2092 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2094 return false;
2095 }
2096 }
2097 }
2098
2099 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2100 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2101 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2102 // Clone the terminator and hoist it into the pred, without any debug info.
2103 Instruction *NT = I1->clone();
2104 NT->insertInto(TIParent, TI->getIterator());
2105 if (!NT->getType()->isVoidTy()) {
2106 I1->replaceAllUsesWith(NT);
2107 for (Instruction *OtherSuccTI : OtherSuccTIs)
2108 OtherSuccTI->replaceAllUsesWith(NT);
2109 NT->takeName(I1);
2110 }
2111 Changed = true;
2112 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2113
2114 // Ensure terminator gets a debug location, even an unknown one, in case
2115 // it involves inlinable calls.
2117 Locs.push_back(I1->getDebugLoc());
2118 for (auto *OtherSuccTI : OtherSuccTIs)
2119 Locs.push_back(OtherSuccTI->getDebugLoc());
2120 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2121
2122 // PHIs created below will adopt NT's merged DebugLoc.
2123 IRBuilder<NoFolder> Builder(NT);
2124
2125 // In the case of an if statement, hoisting one of the terminators from our
2126 // successor is a great thing. Unfortunately, the successors of the if/else
2127 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2128 // must agree for all PHI nodes, so we insert select instruction to compute
2129 // the final result.
2130 if (BI) {
2131 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2132 for (BasicBlock *Succ : successors(BB1)) {
2133 for (PHINode &PN : Succ->phis()) {
2134 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2135 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2136 if (BB1V == BB2V)
2137 continue;
2138
2139 // These values do not agree. Insert a select instruction before NT
2140 // that determines the right value.
2141 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2142 if (!SI) {
2143 // Propagate fast-math-flags from phi node to its replacement select.
2145 BI->getCondition(), BB1V, BB2V,
2146 isa<FPMathOperator>(PN) ? &PN : nullptr,
2147 BB1V->getName() + "." + BB2V->getName(), BI));
2148 }
2149
2150 // Make the PHI node use the select for all incoming values for BB1/BB2
2151 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2152 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2153 PN.setIncomingValue(i, SI);
2154 }
2155 }
2156 }
2157
2159
2160 // Update any PHI nodes in our new successors.
2161 for (BasicBlock *Succ : successors(BB1)) {
2162 addPredecessorToBlock(Succ, TIParent, BB1);
2163 if (DTU)
2164 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2165 }
2166
2167 if (DTU) {
2168 // TI might be a switch with multi-cases destination, so we need to care for
2169 // the duplication of successors.
2170 for (BasicBlock *Succ : UniqueSuccessors)
2171 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2172 }
2173
2175 if (DTU)
2176 DTU->applyUpdates(Updates);
2177 return Changed;
2178}
2179
2180// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2181// into variables.
2183 int OpIdx) {
2184 // Divide/Remainder by constant is typically much cheaper than by variable.
2185 if (I->isIntDivRem())
2186 return OpIdx != 1;
2187 return !isa<IntrinsicInst>(I);
2188}
2189
2190// All instructions in Insts belong to different blocks that all unconditionally
2191// branch to a common successor. Analyze each instruction and return true if it
2192// would be possible to sink them into their successor, creating one common
2193// instruction instead. For every value that would be required to be provided by
2194// PHI node (because an operand varies in each input block), add to PHIOperands.
2197 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2198 // Prune out obviously bad instructions to move. Each instruction must have
2199 // the same number of uses, and we check later that the uses are consistent.
2200 std::optional<unsigned> NumUses;
2201 for (auto *I : Insts) {
2202 // These instructions may change or break semantics if moved.
2203 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2204 I->getType()->isTokenTy())
2205 return false;
2206
2207 // Do not try to sink an instruction in an infinite loop - it can cause
2208 // this algorithm to infinite loop.
2209 if (I->getParent()->getSingleSuccessor() == I->getParent())
2210 return false;
2211
2212 // Conservatively return false if I is an inline-asm instruction. Sinking
2213 // and merging inline-asm instructions can potentially create arguments
2214 // that cannot satisfy the inline-asm constraints.
2215 // If the instruction has nomerge or convergent attribute, return false.
2216 if (const auto *C = dyn_cast<CallBase>(I))
2217 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2218 return false;
2219
2220 if (!NumUses)
2221 NumUses = I->getNumUses();
2222 else if (NumUses != I->getNumUses())
2223 return false;
2224 }
2225
2226 const Instruction *I0 = Insts.front();
2227 const auto I0MMRA = MMRAMetadata(*I0);
2228 for (auto *I : Insts) {
2229 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2230 return false;
2231
2232 // Treat MMRAs conservatively. This pass can be quite aggressive and
2233 // could drop a lot of MMRAs otherwise.
2234 if (MMRAMetadata(*I) != I0MMRA)
2235 return false;
2236 }
2237
2238 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2239 // then the other phi operands must match the instructions from Insts. This
2240 // also has to hold true for any phi nodes that would be created as a result
2241 // of sinking. Both of these cases are represented by PhiOperands.
2242 for (const Use &U : I0->uses()) {
2243 auto It = PHIOperands.find(&U);
2244 if (It == PHIOperands.end())
2245 // There may be uses in other blocks when sinking into a loop header.
2246 return false;
2247 if (!equal(Insts, It->second))
2248 return false;
2249 }
2250
2251 // For calls to be sinkable, they must all be indirect, or have same callee.
2252 // I.e. if we have two direct calls to different callees, we don't want to
2253 // turn that into an indirect call. Likewise, if we have an indirect call,
2254 // and a direct call, we don't actually want to have a single indirect call.
2255 if (isa<CallBase>(I0)) {
2256 auto IsIndirectCall = [](const Instruction *I) {
2257 return cast<CallBase>(I)->isIndirectCall();
2258 };
2259 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2260 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2261 if (HaveIndirectCalls) {
2262 if (!AllCallsAreIndirect)
2263 return false;
2264 } else {
2265 // All callees must be identical.
2266 Value *Callee = nullptr;
2267 for (const Instruction *I : Insts) {
2268 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2269 if (!Callee)
2270 Callee = CurrCallee;
2271 else if (Callee != CurrCallee)
2272 return false;
2273 }
2274 }
2275 }
2276
2277 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2278 Value *Op = I0->getOperand(OI);
2279 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2280 assert(I->getNumOperands() == I0->getNumOperands());
2281 return I->getOperand(OI) == I0->getOperand(OI);
2282 };
2283 if (!all_of(Insts, SameAsI0)) {
2286 // We can't create a PHI from this GEP.
2287 return false;
2288 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2289 for (auto *I : Insts)
2290 Ops.push_back(I->getOperand(OI));
2291 }
2292 }
2293 return true;
2294}
2295
2296// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2297// instruction of every block in Blocks to their common successor, commoning
2298// into one instruction.
2300 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2301
2302 // canSinkInstructions returning true guarantees that every block has at
2303 // least one non-terminator instruction.
2305 for (auto *BB : Blocks) {
2306 Instruction *I = BB->getTerminator();
2307 I = I->getPrevNode();
2308 Insts.push_back(I);
2309 }
2310
2311 // We don't need to do any more checking here; canSinkInstructions should
2312 // have done it all for us.
2313 SmallVector<Value*, 4> NewOperands;
2314 Instruction *I0 = Insts.front();
2315 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2316 // This check is different to that in canSinkInstructions. There, we
2317 // cared about the global view once simplifycfg (and instcombine) have
2318 // completed - it takes into account PHIs that become trivially
2319 // simplifiable. However here we need a more local view; if an operand
2320 // differs we create a PHI and rely on instcombine to clean up the very
2321 // small mess we may make.
2322 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2323 return I->getOperand(O) != I0->getOperand(O);
2324 });
2325 if (!NeedPHI) {
2326 NewOperands.push_back(I0->getOperand(O));
2327 continue;
2328 }
2329
2330 // Create a new PHI in the successor block and populate it.
2331 auto *Op = I0->getOperand(O);
2332 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2333 auto *PN =
2334 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2335 PN->insertBefore(BBEnd->begin());
2336 for (auto *I : Insts)
2337 PN->addIncoming(I->getOperand(O), I->getParent());
2338 NewOperands.push_back(PN);
2339 }
2340
2341 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2342 // and move it to the start of the successor block.
2343 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2344 I0->getOperandUse(O).set(NewOperands[O]);
2345
2346 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2347
2348 // Update metadata and IR flags, and merge debug locations.
2349 for (auto *I : Insts)
2350 if (I != I0) {
2351 // The debug location for the "common" instruction is the merged locations
2352 // of all the commoned instructions. We start with the original location
2353 // of the "common" instruction and iteratively merge each location in the
2354 // loop below.
2355 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2356 // However, as N-way merge for CallInst is rare, so we use simplified API
2357 // instead of using complex API for N-way merge.
2358 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2359 combineMetadataForCSE(I0, I, true);
2360 I0->andIRFlags(I);
2361 if (auto *CB = dyn_cast<CallBase>(I0)) {
2362 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2363 assert(Success && "We should not be trying to sink callbases "
2364 "with non-intersectable attributes");
2365 // For NDEBUG Compile.
2366 (void)Success;
2367 }
2368 }
2369
2370 for (User *U : make_early_inc_range(I0->users())) {
2371 // canSinkLastInstruction checked that all instructions are only used by
2372 // phi nodes in a way that allows replacing the phi node with the common
2373 // instruction.
2374 auto *PN = cast<PHINode>(U);
2375 PN->replaceAllUsesWith(I0);
2376 PN->eraseFromParent();
2377 }
2378
2379 // Finally nuke all instructions apart from the common instruction.
2380 for (auto *I : Insts) {
2381 if (I == I0)
2382 continue;
2383 // The remaining uses are debug users, replace those with the common inst.
2384 // In most (all?) cases this just introduces a use-before-def.
2385 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2386 I->replaceAllUsesWith(I0);
2387 I->eraseFromParent();
2388 }
2389}
2390
2391/// Check whether BB's predecessors end with unconditional branches. If it is
2392/// true, sink any common code from the predecessors to BB.
2394 DomTreeUpdater *DTU) {
2395 // We support two situations:
2396 // (1) all incoming arcs are unconditional
2397 // (2) there are non-unconditional incoming arcs
2398 //
2399 // (2) is very common in switch defaults and
2400 // else-if patterns;
2401 //
2402 // if (a) f(1);
2403 // else if (b) f(2);
2404 //
2405 // produces:
2406 //
2407 // [if]
2408 // / \
2409 // [f(1)] [if]
2410 // | | \
2411 // | | |
2412 // | [f(2)]|
2413 // \ | /
2414 // [ end ]
2415 //
2416 // [end] has two unconditional predecessor arcs and one conditional. The
2417 // conditional refers to the implicit empty 'else' arc. This conditional
2418 // arc can also be caused by an empty default block in a switch.
2419 //
2420 // In this case, we attempt to sink code from all *unconditional* arcs.
2421 // If we can sink instructions from these arcs (determined during the scan
2422 // phase below) we insert a common successor for all unconditional arcs and
2423 // connect that to [end], to enable sinking:
2424 //
2425 // [if]
2426 // / \
2427 // [x(1)] [if]
2428 // | | \
2429 // | | \
2430 // | [x(2)] |
2431 // \ / |
2432 // [sink.split] |
2433 // \ /
2434 // [ end ]
2435 //
2436 SmallVector<BasicBlock*,4> UnconditionalPreds;
2437 bool HaveNonUnconditionalPredecessors = false;
2438 for (auto *PredBB : predecessors(BB)) {
2439 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2440 if (PredBr && PredBr->isUnconditional())
2441 UnconditionalPreds.push_back(PredBB);
2442 else
2443 HaveNonUnconditionalPredecessors = true;
2444 }
2445 if (UnconditionalPreds.size() < 2)
2446 return false;
2447
2448 // We take a two-step approach to tail sinking. First we scan from the end of
2449 // each block upwards in lockstep. If the n'th instruction from the end of each
2450 // block can be sunk, those instructions are added to ValuesToSink and we
2451 // carry on. If we can sink an instruction but need to PHI-merge some operands
2452 // (because they're not identical in each instruction) we add these to
2453 // PHIOperands.
2454 // We prepopulate PHIOperands with the phis that already exist in BB.
2456 for (PHINode &PN : BB->phis()) {
2458 for (const Use &U : PN.incoming_values())
2459 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2460 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2461 for (BasicBlock *Pred : UnconditionalPreds)
2462 Ops.push_back(*IncomingVals[Pred]);
2463 }
2464
2465 int ScanIdx = 0;
2466 SmallPtrSet<Value*,4> InstructionsToSink;
2467 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2468 while (LRI.isValid() &&
2469 canSinkInstructions(*LRI, PHIOperands)) {
2470 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2471 << "\n");
2472 InstructionsToSink.insert_range(*LRI);
2473 ++ScanIdx;
2474 --LRI;
2475 }
2476
2477 // If no instructions can be sunk, early-return.
2478 if (ScanIdx == 0)
2479 return false;
2480
2481 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2482
2483 if (!followedByDeoptOrUnreachable) {
2484 // Check whether this is the pointer operand of a load/store.
2485 auto IsMemOperand = [](Use &U) {
2486 auto *I = cast<Instruction>(U.getUser());
2487 if (isa<LoadInst>(I))
2488 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2489 if (isa<StoreInst>(I))
2490 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2491 return false;
2492 };
2493
2494 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2495 // actually sink before encountering instruction that is unprofitable to
2496 // sink?
2497 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2498 unsigned NumPHIInsts = 0;
2499 for (Use &U : (*LRI)[0]->operands()) {
2500 auto It = PHIOperands.find(&U);
2501 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2502 return InstructionsToSink.contains(V);
2503 })) {
2504 ++NumPHIInsts;
2505 // Do not separate a load/store from the gep producing the address.
2506 // The gep can likely be folded into the load/store as an addressing
2507 // mode. Additionally, a load of a gep is easier to analyze than a
2508 // load of a phi.
2509 if (IsMemOperand(U) &&
2510 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2511 return false;
2512 // FIXME: this check is overly optimistic. We may end up not sinking
2513 // said instruction, due to the very same profitability check.
2514 // See @creating_too_many_phis in sink-common-code.ll.
2515 }
2516 }
2517 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2518 return NumPHIInsts <= 1;
2519 };
2520
2521 // We've determined that we are going to sink last ScanIdx instructions,
2522 // and recorded them in InstructionsToSink. Now, some instructions may be
2523 // unprofitable to sink. But that determination depends on the instructions
2524 // that we are going to sink.
2525
2526 // First, forward scan: find the first instruction unprofitable to sink,
2527 // recording all the ones that are profitable to sink.
2528 // FIXME: would it be better, after we detect that not all are profitable.
2529 // to either record the profitable ones, or erase the unprofitable ones?
2530 // Maybe we need to choose (at runtime) the one that will touch least
2531 // instrs?
2532 LRI.reset();
2533 int Idx = 0;
2534 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2535 while (Idx < ScanIdx) {
2536 if (!ProfitableToSinkInstruction(LRI)) {
2537 // Too many PHIs would be created.
2538 LLVM_DEBUG(
2539 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2540 break;
2541 }
2542 InstructionsProfitableToSink.insert_range(*LRI);
2543 --LRI;
2544 ++Idx;
2545 }
2546
2547 // If no instructions can be sunk, early-return.
2548 if (Idx == 0)
2549 return false;
2550
2551 // Did we determine that (only) some instructions are unprofitable to sink?
2552 if (Idx < ScanIdx) {
2553 // Okay, some instructions are unprofitable.
2554 ScanIdx = Idx;
2555 InstructionsToSink = InstructionsProfitableToSink;
2556
2557 // But, that may make other instructions unprofitable, too.
2558 // So, do a backward scan, do any earlier instructions become
2559 // unprofitable?
2560 assert(
2561 !ProfitableToSinkInstruction(LRI) &&
2562 "We already know that the last instruction is unprofitable to sink");
2563 ++LRI;
2564 --Idx;
2565 while (Idx >= 0) {
2566 // If we detect that an instruction becomes unprofitable to sink,
2567 // all earlier instructions won't be sunk either,
2568 // so preemptively keep InstructionsProfitableToSink in sync.
2569 // FIXME: is this the most performant approach?
2570 for (auto *I : *LRI)
2571 InstructionsProfitableToSink.erase(I);
2572 if (!ProfitableToSinkInstruction(LRI)) {
2573 // Everything starting with this instruction won't be sunk.
2574 ScanIdx = Idx;
2575 InstructionsToSink = InstructionsProfitableToSink;
2576 }
2577 ++LRI;
2578 --Idx;
2579 }
2580 }
2581
2582 // If no instructions can be sunk, early-return.
2583 if (ScanIdx == 0)
2584 return false;
2585 }
2586
2587 bool Changed = false;
2588
2589 if (HaveNonUnconditionalPredecessors) {
2590 if (!followedByDeoptOrUnreachable) {
2591 // It is always legal to sink common instructions from unconditional
2592 // predecessors. However, if not all predecessors are unconditional,
2593 // this transformation might be pessimizing. So as a rule of thumb,
2594 // don't do it unless we'd sink at least one non-speculatable instruction.
2595 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2596 LRI.reset();
2597 int Idx = 0;
2598 bool Profitable = false;
2599 while (Idx < ScanIdx) {
2600 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2601 Profitable = true;
2602 break;
2603 }
2604 --LRI;
2605 ++Idx;
2606 }
2607 if (!Profitable)
2608 return false;
2609 }
2610
2611 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2612 // We have a conditional edge and we're going to sink some instructions.
2613 // Insert a new block postdominating all blocks we're going to sink from.
2614 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2615 // Edges couldn't be split.
2616 return false;
2617 Changed = true;
2618 }
2619
2620 // Now that we've analyzed all potential sinking candidates, perform the
2621 // actual sink. We iteratively sink the last non-terminator of the source
2622 // blocks into their common successor unless doing so would require too
2623 // many PHI instructions to be generated (currently only one PHI is allowed
2624 // per sunk instruction).
2625 //
2626 // We can use InstructionsToSink to discount values needing PHI-merging that will
2627 // actually be sunk in a later iteration. This allows us to be more
2628 // aggressive in what we sink. This does allow a false positive where we
2629 // sink presuming a later value will also be sunk, but stop half way through
2630 // and never actually sink it which means we produce more PHIs than intended.
2631 // This is unlikely in practice though.
2632 int SinkIdx = 0;
2633 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2634 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2635 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2636 << "\n");
2637
2638 // Because we've sunk every instruction in turn, the current instruction to
2639 // sink is always at index 0.
2640 LRI.reset();
2641
2642 sinkLastInstruction(UnconditionalPreds);
2643 NumSinkCommonInstrs++;
2644 Changed = true;
2645 }
2646 if (SinkIdx != 0)
2647 ++NumSinkCommonCode;
2648 return Changed;
2649}
2650
2651namespace {
2652
2653struct CompatibleSets {
2654 using SetTy = SmallVector<InvokeInst *, 2>;
2655
2657
2658 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2659
2660 SetTy &getCompatibleSet(InvokeInst *II);
2661
2662 void insert(InvokeInst *II);
2663};
2664
2665CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2666 // Perform a linear scan over all the existing sets, see if the new `invoke`
2667 // is compatible with any particular set. Since we know that all the `invokes`
2668 // within a set are compatible, only check the first `invoke` in each set.
2669 // WARNING: at worst, this has quadratic complexity.
2670 for (CompatibleSets::SetTy &Set : Sets) {
2671 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2672 return Set;
2673 }
2674
2675 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2676 return Sets.emplace_back();
2677}
2678
2679void CompatibleSets::insert(InvokeInst *II) {
2680 getCompatibleSet(II).emplace_back(II);
2681}
2682
2683bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2684 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2685
2686 // Can we theoretically merge these `invoke`s?
2687 auto IsIllegalToMerge = [](InvokeInst *II) {
2688 return II->cannotMerge() || II->isInlineAsm();
2689 };
2690 if (any_of(Invokes, IsIllegalToMerge))
2691 return false;
2692
2693 // Either both `invoke`s must be direct,
2694 // or both `invoke`s must be indirect.
2695 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2696 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2697 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2698 if (HaveIndirectCalls) {
2699 if (!AllCallsAreIndirect)
2700 return false;
2701 } else {
2702 // All callees must be identical.
2703 Value *Callee = nullptr;
2704 for (InvokeInst *II : Invokes) {
2705 Value *CurrCallee = II->getCalledOperand();
2706 assert(CurrCallee && "There is always a called operand.");
2707 if (!Callee)
2708 Callee = CurrCallee;
2709 else if (Callee != CurrCallee)
2710 return false;
2711 }
2712 }
2713
2714 // Either both `invoke`s must not have a normal destination,
2715 // or both `invoke`s must have a normal destination,
2716 auto HasNormalDest = [](InvokeInst *II) {
2717 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2718 };
2719 if (any_of(Invokes, HasNormalDest)) {
2720 // Do not merge `invoke` that does not have a normal destination with one
2721 // that does have a normal destination, even though doing so would be legal.
2722 if (!all_of(Invokes, HasNormalDest))
2723 return false;
2724
2725 // All normal destinations must be identical.
2726 BasicBlock *NormalBB = nullptr;
2727 for (InvokeInst *II : Invokes) {
2728 BasicBlock *CurrNormalBB = II->getNormalDest();
2729 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2730 if (!NormalBB)
2731 NormalBB = CurrNormalBB;
2732 else if (NormalBB != CurrNormalBB)
2733 return false;
2734 }
2735
2736 // In the normal destination, the incoming values for these two `invoke`s
2737 // must be compatible.
2738 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2740 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2741 &EquivalenceSet))
2742 return false;
2743 }
2744
2745#ifndef NDEBUG
2746 // All unwind destinations must be identical.
2747 // We know that because we have started from said unwind destination.
2748 BasicBlock *UnwindBB = nullptr;
2749 for (InvokeInst *II : Invokes) {
2750 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2751 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2752 if (!UnwindBB)
2753 UnwindBB = CurrUnwindBB;
2754 else
2755 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2756 }
2757#endif
2758
2759 // In the unwind destination, the incoming values for these two `invoke`s
2760 // must be compatible.
2762 Invokes.front()->getUnwindDest(),
2763 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2764 return false;
2765
2766 // Ignoring arguments, these `invoke`s must be identical,
2767 // including operand bundles.
2768 const InvokeInst *II0 = Invokes.front();
2769 for (auto *II : Invokes.drop_front())
2770 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2771 return false;
2772
2773 // Can we theoretically form the data operands for the merged `invoke`?
2774 auto IsIllegalToMergeArguments = [](auto Ops) {
2775 Use &U0 = std::get<0>(Ops);
2776 Use &U1 = std::get<1>(Ops);
2777 if (U0 == U1)
2778 return false;
2780 U0.getOperandNo());
2781 };
2782 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2783 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2784 IsIllegalToMergeArguments))
2785 return false;
2786
2787 return true;
2788}
2789
2790} // namespace
2791
2792// Merge all invokes in the provided set, all of which are compatible
2793// as per the `CompatibleSets::shouldBelongToSameSet()`.
2795 DomTreeUpdater *DTU) {
2796 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2797
2799 if (DTU)
2800 Updates.reserve(2 + 3 * Invokes.size());
2801
2802 bool HasNormalDest =
2803 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2804
2805 // Clone one of the invokes into a new basic block.
2806 // Since they are all compatible, it doesn't matter which invoke is cloned.
2807 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2808 InvokeInst *II0 = Invokes.front();
2809 BasicBlock *II0BB = II0->getParent();
2810 BasicBlock *InsertBeforeBlock =
2811 II0->getParent()->getIterator()->getNextNode();
2812 Function *Func = II0BB->getParent();
2813 LLVMContext &Ctx = II0->getContext();
2814
2815 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2816 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2817
2818 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2819 // NOTE: all invokes have the same attributes, so no handling needed.
2820 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2821
2822 if (!HasNormalDest) {
2823 // This set does not have a normal destination,
2824 // so just form a new block with unreachable terminator.
2825 BasicBlock *MergedNormalDest = BasicBlock::Create(
2826 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2827 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2828 UI->setDebugLoc(DebugLoc::getTemporary());
2829 MergedInvoke->setNormalDest(MergedNormalDest);
2830 }
2831
2832 // The unwind destination, however, remainds identical for all invokes here.
2833
2834 return MergedInvoke;
2835 }();
2836
2837 if (DTU) {
2838 // Predecessor blocks that contained these invokes will now branch to
2839 // the new block that contains the merged invoke, ...
2840 for (InvokeInst *II : Invokes)
2841 Updates.push_back(
2842 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2843
2844 // ... which has the new `unreachable` block as normal destination,
2845 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2846 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2847 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2848 SuccBBOfMergedInvoke});
2849
2850 // Since predecessor blocks now unconditionally branch to a new block,
2851 // they no longer branch to their original successors.
2852 for (InvokeInst *II : Invokes)
2853 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2854 Updates.push_back(
2855 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2856 }
2857
2858 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2859
2860 // Form the merged operands for the merged invoke.
2861 for (Use &U : MergedInvoke->operands()) {
2862 // Only PHI together the indirect callees and data operands.
2863 if (MergedInvoke->isCallee(&U)) {
2864 if (!IsIndirectCall)
2865 continue;
2866 } else if (!MergedInvoke->isDataOperand(&U))
2867 continue;
2868
2869 // Don't create trivial PHI's with all-identical incoming values.
2870 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2871 return II->getOperand(U.getOperandNo()) != U.get();
2872 });
2873 if (!NeedPHI)
2874 continue;
2875
2876 // Form a PHI out of all the data ops under this index.
2878 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2879 for (InvokeInst *II : Invokes)
2880 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2881
2882 U.set(PN);
2883 }
2884
2885 // We've ensured that each PHI node has compatible (identical) incoming values
2886 // when coming from each of the `invoke`s in the current merge set,
2887 // so update the PHI nodes accordingly.
2888 for (BasicBlock *Succ : successors(MergedInvoke))
2889 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2890 /*ExistPred=*/Invokes.front()->getParent());
2891
2892 // And finally, replace the original `invoke`s with an unconditional branch
2893 // to the block with the merged `invoke`. Also, give that merged `invoke`
2894 // the merged debugloc of all the original `invoke`s.
2895 DILocation *MergedDebugLoc = nullptr;
2896 for (InvokeInst *II : Invokes) {
2897 // Compute the debug location common to all the original `invoke`s.
2898 if (!MergedDebugLoc)
2899 MergedDebugLoc = II->getDebugLoc();
2900 else
2901 MergedDebugLoc =
2902 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2903
2904 // And replace the old `invoke` with an unconditionally branch
2905 // to the block with the merged `invoke`.
2906 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2907 OrigSuccBB->removePredecessor(II->getParent());
2908 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2909 // The unconditional branch is part of the replacement for the original
2910 // invoke, so should use its DebugLoc.
2911 BI->setDebugLoc(II->getDebugLoc());
2912 bool Success = MergedInvoke->tryIntersectAttributes(II);
2913 assert(Success && "Merged invokes with incompatible attributes");
2914 // For NDEBUG Compile
2915 (void)Success;
2916 II->replaceAllUsesWith(MergedInvoke);
2917 II->eraseFromParent();
2918 ++NumInvokesMerged;
2919 }
2920 MergedInvoke->setDebugLoc(MergedDebugLoc);
2921 ++NumInvokeSetsFormed;
2922
2923 if (DTU)
2924 DTU->applyUpdates(Updates);
2925}
2926
2927/// If this block is a `landingpad` exception handling block, categorize all
2928/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2929/// being "mergeable" together, and then merge invokes in each set together.
2930///
2931/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2932/// [...] [...]
2933/// | |
2934/// [invoke0] [invoke1]
2935/// / \ / \
2936/// [cont0] [landingpad] [cont1]
2937/// to:
2938/// [...] [...]
2939/// \ /
2940/// [invoke]
2941/// / \
2942/// [cont] [landingpad]
2943///
2944/// But of course we can only do that if the invokes share the `landingpad`,
2945/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2946/// and the invoked functions are "compatible".
2949 return false;
2950
2951 bool Changed = false;
2952
2953 // FIXME: generalize to all exception handling blocks?
2954 if (!BB->isLandingPad())
2955 return Changed;
2956
2957 CompatibleSets Grouper;
2958
2959 // Record all the predecessors of this `landingpad`. As per verifier,
2960 // the only allowed predecessor is the unwind edge of an `invoke`.
2961 // We want to group "compatible" `invokes` into the same set to be merged.
2962 for (BasicBlock *PredBB : predecessors(BB))
2963 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2964
2965 // And now, merge `invoke`s that were grouped togeter.
2966 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2967 if (Invokes.size() < 2)
2968 continue;
2969 Changed = true;
2970 mergeCompatibleInvokesImpl(Invokes, DTU);
2971 }
2972
2973 return Changed;
2974}
2975
2976namespace {
2977/// Track ephemeral values, which should be ignored for cost-modelling
2978/// purposes. Requires walking instructions in reverse order.
2979class EphemeralValueTracker {
2980 SmallPtrSet<const Instruction *, 32> EphValues;
2981
2982 bool isEphemeral(const Instruction *I) {
2983 if (isa<AssumeInst>(I))
2984 return true;
2985 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2986 all_of(I->users(), [&](const User *U) {
2987 return EphValues.count(cast<Instruction>(U));
2988 });
2989 }
2990
2991public:
2992 bool track(const Instruction *I) {
2993 if (isEphemeral(I)) {
2994 EphValues.insert(I);
2995 return true;
2996 }
2997 return false;
2998 }
2999
3000 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3001};
3002} // namespace
3003
3004/// Determine if we can hoist sink a sole store instruction out of a
3005/// conditional block.
3006///
3007/// We are looking for code like the following:
3008/// BrBB:
3009/// store i32 %add, i32* %arrayidx2
3010/// ... // No other stores or function calls (we could be calling a memory
3011/// ... // function).
3012/// %cmp = icmp ult %x, %y
3013/// br i1 %cmp, label %EndBB, label %ThenBB
3014/// ThenBB:
3015/// store i32 %add5, i32* %arrayidx2
3016/// br label EndBB
3017/// EndBB:
3018/// ...
3019/// We are going to transform this into:
3020/// BrBB:
3021/// store i32 %add, i32* %arrayidx2
3022/// ... //
3023/// %cmp = icmp ult %x, %y
3024/// %add.add5 = select i1 %cmp, i32 %add, %add5
3025/// store i32 %add.add5, i32* %arrayidx2
3026/// ...
3027///
3028/// \return The pointer to the value of the previous store if the store can be
3029/// hoisted into the predecessor block. 0 otherwise.
3031 BasicBlock *StoreBB, BasicBlock *EndBB) {
3032 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3033 if (!StoreToHoist)
3034 return nullptr;
3035
3036 // Volatile or atomic.
3037 if (!StoreToHoist->isSimple())
3038 return nullptr;
3039
3040 Value *StorePtr = StoreToHoist->getPointerOperand();
3041 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3042
3043 // Look for a store to the same pointer in BrBB.
3044 unsigned MaxNumInstToLookAt = 9;
3045 // Skip pseudo probe intrinsic calls which are not really killing any memory
3046 // accesses.
3047 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3048 if (!MaxNumInstToLookAt)
3049 break;
3050 --MaxNumInstToLookAt;
3051
3052 // Could be calling an instruction that affects memory like free().
3053 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3054 return nullptr;
3055
3056 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3057 // Found the previous store to same location and type. Make sure it is
3058 // simple, to avoid introducing a spurious non-atomic write after an
3059 // atomic write.
3060 if (SI->getPointerOperand() == StorePtr &&
3061 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3062 SI->getAlign() >= StoreToHoist->getAlign())
3063 // Found the previous store, return its value operand.
3064 return SI->getValueOperand();
3065 return nullptr; // Unknown store.
3066 }
3067
3068 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3069 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3070 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3071 Value *Obj = getUnderlyingObject(StorePtr);
3072 bool ExplicitlyDereferenceableOnly;
3073 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3075 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3077 (!ExplicitlyDereferenceableOnly ||
3078 isDereferenceablePointer(StorePtr, StoreTy,
3079 LI->getDataLayout()))) {
3080 // Found a previous load, return it.
3081 return LI;
3082 }
3083 }
3084 // The load didn't work out, but we may still find a store.
3085 }
3086 }
3087
3088 return nullptr;
3089}
3090
3091/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3092/// converted to selects.
3094 BasicBlock *EndBB,
3095 unsigned &SpeculatedInstructions,
3096 InstructionCost &Cost,
3097 const TargetTransformInfo &TTI) {
3099 BB->getParent()->hasMinSize()
3102
3103 bool HaveRewritablePHIs = false;
3104 for (PHINode &PN : EndBB->phis()) {
3105 Value *OrigV = PN.getIncomingValueForBlock(BB);
3106 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3107
3108 // FIXME: Try to remove some of the duplication with
3109 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3110 if (ThenV == OrigV)
3111 continue;
3112
3113 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3114 CmpInst::makeCmpResultType(PN.getType()),
3116
3117 // Don't convert to selects if we could remove undefined behavior instead.
3118 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3120 return false;
3121
3122 HaveRewritablePHIs = true;
3123 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3124 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3125 if (!OrigCE && !ThenCE)
3126 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3127
3128 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3129 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3130 InstructionCost MaxCost =
3132 if (OrigCost + ThenCost > MaxCost)
3133 return false;
3134
3135 // Account for the cost of an unfolded ConstantExpr which could end up
3136 // getting expanded into Instructions.
3137 // FIXME: This doesn't account for how many operations are combined in the
3138 // constant expression.
3139 ++SpeculatedInstructions;
3140 if (SpeculatedInstructions > 1)
3141 return false;
3142 }
3143
3144 return HaveRewritablePHIs;
3145}
3146
3148 std::optional<bool> Invert,
3149 const TargetTransformInfo &TTI) {
3150 // If the branch is non-unpredictable, and is predicted to *not* branch to
3151 // the `then` block, then avoid speculating it.
3152 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3153 return true;
3154
3155 uint64_t TWeight, FWeight;
3156 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3157 return true;
3158
3159 if (!Invert.has_value())
3160 return false;
3161
3162 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3163 BranchProbability BIEndProb =
3164 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3165 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3166 return BIEndProb < Likely;
3167}
3168
3169/// Speculate a conditional basic block flattening the CFG.
3170///
3171/// Note that this is a very risky transform currently. Speculating
3172/// instructions like this is most often not desirable. Instead, there is an MI
3173/// pass which can do it with full awareness of the resource constraints.
3174/// However, some cases are "obvious" and we should do directly. An example of
3175/// this is speculating a single, reasonably cheap instruction.
3176///
3177/// There is only one distinct advantage to flattening the CFG at the IR level:
3178/// it makes very common but simplistic optimizations such as are common in
3179/// instcombine and the DAG combiner more powerful by removing CFG edges and
3180/// modeling their effects with easier to reason about SSA value graphs.
3181///
3182///
3183/// An illustration of this transform is turning this IR:
3184/// \code
3185/// BB:
3186/// %cmp = icmp ult %x, %y
3187/// br i1 %cmp, label %EndBB, label %ThenBB
3188/// ThenBB:
3189/// %sub = sub %x, %y
3190/// br label BB2
3191/// EndBB:
3192/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3193/// ...
3194/// \endcode
3195///
3196/// Into this IR:
3197/// \code
3198/// BB:
3199/// %cmp = icmp ult %x, %y
3200/// %sub = sub %x, %y
3201/// %cond = select i1 %cmp, 0, %sub
3202/// ...
3203/// \endcode
3204///
3205/// \returns true if the conditional block is removed.
3206bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3207 BasicBlock *ThenBB) {
3208 if (!Options.SpeculateBlocks)
3209 return false;
3210
3211 // Be conservative for now. FP select instruction can often be expensive.
3212 Value *BrCond = BI->getCondition();
3213 if (isa<FCmpInst>(BrCond))
3214 return false;
3215
3216 BasicBlock *BB = BI->getParent();
3217 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3218 InstructionCost Budget =
3220
3221 // If ThenBB is actually on the false edge of the conditional branch, remember
3222 // to swap the select operands later.
3223 bool Invert = false;
3224 if (ThenBB != BI->getSuccessor(0)) {
3225 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3226 Invert = true;
3227 }
3228 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3229
3230 if (!isProfitableToSpeculate(BI, Invert, TTI))
3231 return false;
3232
3233 // Keep a count of how many times instructions are used within ThenBB when
3234 // they are candidates for sinking into ThenBB. Specifically:
3235 // - They are defined in BB, and
3236 // - They have no side effects, and
3237 // - All of their uses are in ThenBB.
3238 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3239
3240 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3241
3242 unsigned SpeculatedInstructions = 0;
3243 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3244 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3245 Value *SpeculatedStoreValue = nullptr;
3246 StoreInst *SpeculatedStore = nullptr;
3247 EphemeralValueTracker EphTracker;
3248 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3249 // Skip pseudo probes. The consequence is we lose track of the branch
3250 // probability for ThenBB, which is fine since the optimization here takes
3251 // place regardless of the branch probability.
3252 if (isa<PseudoProbeInst>(I)) {
3253 // The probe should be deleted so that it will not be over-counted when
3254 // the samples collected on the non-conditional path are counted towards
3255 // the conditional path. We leave it for the counts inference algorithm to
3256 // figure out a proper count for an unknown probe.
3257 SpeculatedPseudoProbes.push_back(&I);
3258 continue;
3259 }
3260
3261 // Ignore ephemeral values, they will be dropped by the transform.
3262 if (EphTracker.track(&I))
3263 continue;
3264
3265 // Only speculatively execute a single instruction (not counting the
3266 // terminator) for now.
3267 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3269 SpeculatedConditionalLoadsStores.size() <
3271 // Not count load/store into cost if target supports conditional faulting
3272 // b/c it's cheap to speculate it.
3273 if (IsSafeCheapLoadStore)
3274 SpeculatedConditionalLoadsStores.push_back(&I);
3275 else
3276 ++SpeculatedInstructions;
3277
3278 if (SpeculatedInstructions > 1)
3279 return false;
3280
3281 // Don't hoist the instruction if it's unsafe or expensive.
3282 if (!IsSafeCheapLoadStore &&
3284 !(HoistCondStores && !SpeculatedStoreValue &&
3285 (SpeculatedStoreValue =
3286 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3287 return false;
3288 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3291 return false;
3292
3293 // Store the store speculation candidate.
3294 if (!SpeculatedStore && SpeculatedStoreValue)
3295 SpeculatedStore = cast<StoreInst>(&I);
3296
3297 // Do not hoist the instruction if any of its operands are defined but not
3298 // used in BB. The transformation will prevent the operand from
3299 // being sunk into the use block.
3300 for (Use &Op : I.operands()) {
3302 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3303 continue; // Not a candidate for sinking.
3304
3305 ++SinkCandidateUseCounts[OpI];
3306 }
3307 }
3308
3309 // Consider any sink candidates which are only used in ThenBB as costs for
3310 // speculation. Note, while we iterate over a DenseMap here, we are summing
3311 // and so iteration order isn't significant.
3312 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3313 if (Inst->hasNUses(Count)) {
3314 ++SpeculatedInstructions;
3315 if (SpeculatedInstructions > 1)
3316 return false;
3317 }
3318
3319 // Check that we can insert the selects and that it's not too expensive to do
3320 // so.
3321 bool Convert =
3322 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3324 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3325 SpeculatedInstructions, Cost, TTI);
3326 if (!Convert || Cost > Budget)
3327 return false;
3328
3329 // If we get here, we can hoist the instruction and if-convert.
3330 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3331
3332 Instruction *Sel = nullptr;
3333 // Insert a select of the value of the speculated store.
3334 if (SpeculatedStoreValue) {
3335 IRBuilder<NoFolder> Builder(BI);
3336 Value *OrigV = SpeculatedStore->getValueOperand();
3337 Value *TrueV = SpeculatedStore->getValueOperand();
3338 Value *FalseV = SpeculatedStoreValue;
3339 if (Invert)
3340 std::swap(TrueV, FalseV);
3341 Value *S = Builder.CreateSelect(
3342 BrCond, TrueV, FalseV, "spec.store.select", BI);
3343 Sel = cast<Instruction>(S);
3344 SpeculatedStore->setOperand(0, S);
3345 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3346 SpeculatedStore->getDebugLoc());
3347 // The value stored is still conditional, but the store itself is now
3348 // unconditonally executed, so we must be sure that any linked dbg.assign
3349 // intrinsics are tracking the new stored value (the result of the
3350 // select). If we don't, and the store were to be removed by another pass
3351 // (e.g. DSE), then we'd eventually end up emitting a location describing
3352 // the conditional value, unconditionally.
3353 //
3354 // === Before this transformation ===
3355 // pred:
3356 // store %one, %x.dest, !DIAssignID !1
3357 // dbg.assign %one, "x", ..., !1, ...
3358 // br %cond if.then
3359 //
3360 // if.then:
3361 // store %two, %x.dest, !DIAssignID !2
3362 // dbg.assign %two, "x", ..., !2, ...
3363 //
3364 // === After this transformation ===
3365 // pred:
3366 // store %one, %x.dest, !DIAssignID !1
3367 // dbg.assign %one, "x", ..., !1
3368 /// ...
3369 // %merge = select %cond, %two, %one
3370 // store %merge, %x.dest, !DIAssignID !2
3371 // dbg.assign %merge, "x", ..., !2
3372 for (DbgVariableRecord *DbgAssign :
3373 at::getDVRAssignmentMarkers(SpeculatedStore))
3374 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3375 DbgAssign->replaceVariableLocationOp(OrigV, S);
3376 }
3377
3378 // Metadata can be dependent on the condition we are hoisting above.
3379 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3380 // to avoid making it appear as if the condition is a constant, which would
3381 // be misleading while debugging.
3382 // Similarly strip attributes that maybe dependent on condition we are
3383 // hoisting above.
3384 for (auto &I : make_early_inc_range(*ThenBB)) {
3385 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3386 I.dropLocation();
3387 }
3388 I.dropUBImplyingAttrsAndMetadata();
3389
3390 // Drop ephemeral values.
3391 if (EphTracker.contains(&I)) {
3392 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3393 I.eraseFromParent();
3394 }
3395 }
3396
3397 // Hoist the instructions.
3398 // Drop DbgVariableRecords attached to these instructions.
3399 for (auto &It : *ThenBB)
3400 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3401 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3402 // equivalent).
3403 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3404 !DVR || !DVR->isDbgAssign())
3405 It.dropOneDbgRecord(&DR);
3406 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3407 std::prev(ThenBB->end()));
3408
3409 if (!SpeculatedConditionalLoadsStores.empty())
3410 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3411 Sel);
3412
3413 // Insert selects and rewrite the PHI operands.
3414 IRBuilder<NoFolder> Builder(BI);
3415 for (PHINode &PN : EndBB->phis()) {
3416 unsigned OrigI = PN.getBasicBlockIndex(BB);
3417 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3418 Value *OrigV = PN.getIncomingValue(OrigI);
3419 Value *ThenV = PN.getIncomingValue(ThenI);
3420
3421 // Skip PHIs which are trivial.
3422 if (OrigV == ThenV)
3423 continue;
3424
3425 // Create a select whose true value is the speculatively executed value and
3426 // false value is the pre-existing value. Swap them if the branch
3427 // destinations were inverted.
3428 Value *TrueV = ThenV, *FalseV = OrigV;
3429 if (Invert)
3430 std::swap(TrueV, FalseV);
3431 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3432 PN.setIncomingValue(OrigI, V);
3433 PN.setIncomingValue(ThenI, V);
3434 }
3435
3436 // Remove speculated pseudo probes.
3437 for (Instruction *I : SpeculatedPseudoProbes)
3438 I->eraseFromParent();
3439
3440 ++NumSpeculations;
3441 return true;
3442}
3443
3445
3446// Return false if number of blocks searched is too much.
3447static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3448 BlocksSet &ReachesNonLocalUses) {
3449 if (BB == DefBB)
3450 return true;
3451 if (!ReachesNonLocalUses.insert(BB).second)
3452 return true;
3453
3454 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3455 return false;
3456 for (BasicBlock *Pred : predecessors(BB))
3457 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3458 return false;
3459 return true;
3460}
3461
3462/// Return true if we can thread a branch across this block.
3464 BlocksSet &NonLocalUseBlocks) {
3465 int Size = 0;
3466 EphemeralValueTracker EphTracker;
3467
3468 // Walk the loop in reverse so that we can identify ephemeral values properly
3469 // (values only feeding assumes).
3470 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3471 // Can't fold blocks that contain noduplicate or convergent calls.
3472 if (CallInst *CI = dyn_cast<CallInst>(&I))
3473 if (CI->cannotDuplicate() || CI->isConvergent())
3474 return false;
3475
3476 // Ignore ephemeral values which are deleted during codegen.
3477 // We will delete Phis while threading, so Phis should not be accounted in
3478 // block's size.
3479 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3480 if (Size++ > MaxSmallBlockSize)
3481 return false; // Don't clone large BB's.
3482 }
3483
3484 // Record blocks with non-local uses of values defined in the current basic
3485 // block.
3486 for (User *U : I.users()) {
3488 BasicBlock *UsedInBB = UI->getParent();
3489 if (UsedInBB == BB) {
3490 if (isa<PHINode>(UI))
3491 return false;
3492 } else
3493 NonLocalUseBlocks.insert(UsedInBB);
3494 }
3495
3496 // Looks ok, continue checking.
3497 }
3498
3499 return true;
3500}
3501
3503 BasicBlock *To) {
3504 // Don't look past the block defining the value, we might get the value from
3505 // a previous loop iteration.
3506 auto *I = dyn_cast<Instruction>(V);
3507 if (I && I->getParent() == To)
3508 return nullptr;
3509
3510 // We know the value if the From block branches on it.
3511 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3512 if (BI && BI->isConditional() && BI->getCondition() == V &&
3513 BI->getSuccessor(0) != BI->getSuccessor(1))
3514 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3516
3517 return nullptr;
3518}
3519
3520/// If we have a conditional branch on something for which we know the constant
3521/// value in predecessors (e.g. a phi node in the current block), thread edges
3522/// from the predecessor to their ultimate destination.
3523static std::optional<bool>
3525 const DataLayout &DL,
3526 AssumptionCache *AC) {
3528 BasicBlock *BB = BI->getParent();
3529 Value *Cond = BI->getCondition();
3531 if (PN && PN->getParent() == BB) {
3532 // Degenerate case of a single entry PHI.
3533 if (PN->getNumIncomingValues() == 1) {
3535 return true;
3536 }
3537
3538 for (Use &U : PN->incoming_values())
3539 if (auto *CB = dyn_cast<ConstantInt>(U))
3540 KnownValues[CB].insert(PN->getIncomingBlock(U));
3541 } else {
3542 for (BasicBlock *Pred : predecessors(BB)) {
3543 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3544 KnownValues[CB].insert(Pred);
3545 }
3546 }
3547
3548 if (KnownValues.empty())
3549 return false;
3550
3551 // Now we know that this block has multiple preds and two succs.
3552 // Check that the block is small enough and record which non-local blocks use
3553 // values defined in the block.
3554
3555 BlocksSet NonLocalUseBlocks;
3556 BlocksSet ReachesNonLocalUseBlocks;
3557 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3558 return false;
3559
3560 // Jump-threading can only be done to destinations where no values defined
3561 // in BB are live.
3562
3563 // Quickly check if both destinations have uses. If so, jump-threading cannot
3564 // be done.
3565 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3566 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3567 return false;
3568
3569 // Search backward from NonLocalUseBlocks to find which blocks
3570 // reach non-local uses.
3571 for (BasicBlock *UseBB : NonLocalUseBlocks)
3572 // Give up if too many blocks are searched.
3573 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3574 return false;
3575
3576 for (const auto &Pair : KnownValues) {
3577 ConstantInt *CB = Pair.first;
3578 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3579 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3580
3581 // Okay, we now know that all edges from PredBB should be revectored to
3582 // branch to RealDest.
3583 if (RealDest == BB)
3584 continue; // Skip self loops.
3585
3586 // Skip if the predecessor's terminator is an indirect branch.
3587 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3588 return isa<IndirectBrInst>(PredBB->getTerminator());
3589 }))
3590 continue;
3591
3592 // Only revector to RealDest if no values defined in BB are live.
3593 if (ReachesNonLocalUseBlocks.contains(RealDest))
3594 continue;
3595
3596 LLVM_DEBUG({
3597 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3598 << " has value " << *Pair.first << " in predecessors:\n";
3599 for (const BasicBlock *PredBB : Pair.second)
3600 dbgs() << " " << PredBB->getName() << "\n";
3601 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3602 });
3603
3604 // Split the predecessors we are threading into a new edge block. We'll
3605 // clone the instructions into this block, and then redirect it to RealDest.
3606 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3607 if (!EdgeBB)
3608 continue;
3609
3610 // TODO: These just exist to reduce test diff, we can drop them if we like.
3611 EdgeBB->setName(RealDest->getName() + ".critedge");
3612 EdgeBB->moveBefore(RealDest);
3613
3614 // Update PHI nodes.
3615 addPredecessorToBlock(RealDest, EdgeBB, BB);
3616
3617 // BB may have instructions that are being threaded over. Clone these
3618 // instructions into EdgeBB. We know that there will be no uses of the
3619 // cloned instructions outside of EdgeBB.
3620 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3621 ValueToValueMapTy TranslateMap; // Track translated values.
3622 TranslateMap[Cond] = CB;
3623
3624 // RemoveDIs: track instructions that we optimise away while folding, so
3625 // that we can copy DbgVariableRecords from them later.
3626 BasicBlock::iterator SrcDbgCursor = BB->begin();
3627 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3628 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3629 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3630 continue;
3631 }
3632 // Clone the instruction.
3633 Instruction *N = BBI->clone();
3634 // Insert the new instruction into its new home.
3635 N->insertInto(EdgeBB, InsertPt);
3636
3637 if (BBI->hasName())
3638 N->setName(BBI->getName() + ".c");
3639
3640 // Update operands due to translation.
3641 // Key Instructions: Remap all the atom groups.
3642 if (const DebugLoc &DL = BBI->getDebugLoc())
3643 mapAtomInstance(DL, TranslateMap);
3644 RemapInstruction(N, TranslateMap,
3646
3647 // Check for trivial simplification.
3648 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3649 if (!BBI->use_empty())
3650 TranslateMap[&*BBI] = V;
3651 if (!N->mayHaveSideEffects()) {
3652 N->eraseFromParent(); // Instruction folded away, don't need actual
3653 // inst
3654 N = nullptr;
3655 }
3656 } else {
3657 if (!BBI->use_empty())
3658 TranslateMap[&*BBI] = N;
3659 }
3660 if (N) {
3661 // Copy all debug-info attached to instructions from the last we
3662 // successfully clone, up to this instruction (they might have been
3663 // folded away).
3664 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3665 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3666 SrcDbgCursor = std::next(BBI);
3667 // Clone debug-info on this instruction too.
3668 N->cloneDebugInfoFrom(&*BBI);
3669
3670 // Register the new instruction with the assumption cache if necessary.
3671 if (auto *Assume = dyn_cast<AssumeInst>(N))
3672 if (AC)
3673 AC->registerAssumption(Assume);
3674 }
3675 }
3676
3677 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3678 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3679 InsertPt->cloneDebugInfoFrom(BI);
3680
3681 BB->removePredecessor(EdgeBB);
3682 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3683 EdgeBI->setSuccessor(0, RealDest);
3684 EdgeBI->setDebugLoc(BI->getDebugLoc());
3685
3686 if (DTU) {
3688 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3689 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3690 DTU->applyUpdates(Updates);
3691 }
3692
3693 // For simplicity, we created a separate basic block for the edge. Merge
3694 // it back into the predecessor if possible. This not only avoids
3695 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3696 // bypass the check for trivial cycles above.
3697 MergeBlockIntoPredecessor(EdgeBB, DTU);
3698
3699 // Signal repeat, simplifying any other constants.
3700 return std::nullopt;
3701 }
3702
3703 return false;
3704}
3705
3706bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3707 // Note: If BB is a loop header then there is a risk that threading introduces
3708 // a non-canonical loop by moving a back edge. So we avoid this optimization
3709 // for loop headers if NeedCanonicalLoop is set.
3710 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3711 return false;
3712
3713 std::optional<bool> Result;
3714 bool EverChanged = false;
3715 do {
3716 // Note that None means "we changed things, but recurse further."
3717 Result =
3719 EverChanged |= Result == std::nullopt || *Result;
3720 } while (Result == std::nullopt);
3721 return EverChanged;
3722}
3723
3724/// Given a BB that starts with the specified two-entry PHI node,
3725/// see if we can eliminate it.
3728 const DataLayout &DL,
3729 bool SpeculateUnpredictables) {
3730 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3731 // statement", which has a very simple dominance structure. Basically, we
3732 // are trying to find the condition that is being branched on, which
3733 // subsequently causes this merge to happen. We really want control
3734 // dependence information for this check, but simplifycfg can't keep it up
3735 // to date, and this catches most of the cases we care about anyway.
3736 BasicBlock *BB = PN->getParent();
3737
3738 BasicBlock *IfTrue, *IfFalse;
3739 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3740 if (!DomBI)
3741 return false;
3742 Value *IfCond = DomBI->getCondition();
3743 // Don't bother if the branch will be constant folded trivially.
3744 if (isa<ConstantInt>(IfCond))
3745 return false;
3746
3747 BasicBlock *DomBlock = DomBI->getParent();
3750 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3751 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3752 });
3753 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3754 "Will have either one or two blocks to speculate.");
3755
3756 // If the branch is non-unpredictable, see if we either predictably jump to
3757 // the merge bb (if we have only a single 'then' block), or if we predictably
3758 // jump to one specific 'then' block (if we have two of them).
3759 // It isn't beneficial to speculatively execute the code
3760 // from the block that we know is predictably not entered.
3761 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3762 if (!IsUnpredictable) {
3763 uint64_t TWeight, FWeight;
3764 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3765 (TWeight + FWeight) != 0) {
3766 BranchProbability BITrueProb =
3767 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3768 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3769 BranchProbability BIFalseProb = BITrueProb.getCompl();
3770 if (IfBlocks.size() == 1) {
3771 BranchProbability BIBBProb =
3772 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3773 if (BIBBProb >= Likely)
3774 return false;
3775 } else {
3776 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3777 return false;
3778 }
3779 }
3780 }
3781
3782 // Don't try to fold an unreachable block. For example, the phi node itself
3783 // can't be the candidate if-condition for a select that we want to form.
3784 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3785 if (IfCondPhiInst->getParent() == BB)
3786 return false;
3787
3788 // Okay, we found that we can merge this two-entry phi node into a select.
3789 // Doing so would require us to fold *all* two entry phi nodes in this block.
3790 // At some point this becomes non-profitable (particularly if the target
3791 // doesn't support cmov's). Only do this transformation if there are two or
3792 // fewer PHI nodes in this block.
3793 unsigned NumPhis = 0;
3794 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3795 if (NumPhis > 2)
3796 return false;
3797
3798 // Loop over the PHI's seeing if we can promote them all to select
3799 // instructions. While we are at it, keep track of the instructions
3800 // that need to be moved to the dominating block.
3801 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3802 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3803 InstructionCost Cost = 0;
3804 InstructionCost Budget =
3806 if (SpeculateUnpredictables && IsUnpredictable)
3807 Budget += TTI.getBranchMispredictPenalty();
3808
3809 bool Changed = false;
3810 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3811 PHINode *PN = cast<PHINode>(II++);
3812 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3813 PN->replaceAllUsesWith(V);
3814 PN->eraseFromParent();
3815 Changed = true;
3816 continue;
3817 }
3818
3819 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3820 AggressiveInsts, Cost, Budget, TTI, AC,
3821 ZeroCostInstructions) ||
3822 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3823 AggressiveInsts, Cost, Budget, TTI, AC,
3824 ZeroCostInstructions))
3825 return Changed;
3826 }
3827
3828 // If we folded the first phi, PN dangles at this point. Refresh it. If
3829 // we ran out of PHIs then we simplified them all.
3830 PN = dyn_cast<PHINode>(BB->begin());
3831 if (!PN)
3832 return true;
3833
3834 // Return true if at least one of these is a 'not', and another is either
3835 // a 'not' too, or a constant.
3836 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3837 if (!match(V0, m_Not(m_Value())))
3838 std::swap(V0, V1);
3839 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3840 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3841 };
3842
3843 // Don't fold i1 branches on PHIs which contain binary operators or
3844 // (possibly inverted) select form of or/ands, unless one of
3845 // the incoming values is an 'not' and another one is freely invertible.
3846 // These can often be turned into switches and other things.
3847 auto IsBinOpOrAnd = [](Value *V) {
3848 return match(
3850 };
3851 if (PN->getType()->isIntegerTy(1) &&
3852 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3853 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3854 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3855 PN->getIncomingValue(1)))
3856 return Changed;
3857
3858 // If all PHI nodes are promotable, check to make sure that all instructions
3859 // in the predecessor blocks can be promoted as well. If not, we won't be able
3860 // to get rid of the control flow, so it's not worth promoting to select
3861 // instructions.
3862 for (BasicBlock *IfBlock : IfBlocks)
3863 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3864 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3865 // This is not an aggressive instruction that we can promote.
3866 // Because of this, we won't be able to get rid of the control flow, so
3867 // the xform is not worth it.
3868 return Changed;
3869 }
3870
3871 // If either of the blocks has it's address taken, we can't do this fold.
3872 if (any_of(IfBlocks,
3873 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3874 return Changed;
3875
3876 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3877 if (IsUnpredictable) dbgs() << " (unpredictable)";
3878 dbgs() << " T: " << IfTrue->getName()
3879 << " F: " << IfFalse->getName() << "\n");
3880
3881 // If we can still promote the PHI nodes after this gauntlet of tests,
3882 // do all of the PHI's now.
3883
3884 // Move all 'aggressive' instructions, which are defined in the
3885 // conditional parts of the if's up to the dominating block.
3886 for (BasicBlock *IfBlock : IfBlocks)
3887 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3888
3889 IRBuilder<NoFolder> Builder(DomBI);
3890 // Propagate fast-math-flags from phi nodes to replacement selects.
3891 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3892 // Change the PHI node into a select instruction.
3893 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3894 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3895
3896 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3897 isa<FPMathOperator>(PN) ? PN : nullptr,
3898 "", DomBI);
3899 PN->replaceAllUsesWith(Sel);
3900 Sel->takeName(PN);
3901 PN->eraseFromParent();
3902 }
3903
3904 // At this point, all IfBlocks are empty, so our if statement
3905 // has been flattened. Change DomBlock to jump directly to our new block to
3906 // avoid other simplifycfg's kicking in on the diamond.
3907 Builder.CreateBr(BB);
3908
3910 if (DTU) {
3911 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3912 for (auto *Successor : successors(DomBlock))
3913 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3914 }
3915
3916 DomBI->eraseFromParent();
3917 if (DTU)
3918 DTU->applyUpdates(Updates);
3919
3920 return true;
3921}
3922
3925 Value *RHS, const Twine &Name = "") {
3926 // Try to relax logical op to binary op.
3927 if (impliesPoison(RHS, LHS))
3928 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3929 if (Opc == Instruction::And)
3930 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3931 if (Opc == Instruction::Or)
3932 return Builder.CreateLogicalOr(LHS, RHS, Name);
3933 llvm_unreachable("Invalid logical opcode");
3934}
3935
3936/// Return true if either PBI or BI has branch weight available, and store
3937/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3938/// not have branch weight, use 1:1 as its weight.
3940 uint64_t &PredTrueWeight,
3941 uint64_t &PredFalseWeight,
3942 uint64_t &SuccTrueWeight,
3943 uint64_t &SuccFalseWeight) {
3944 bool PredHasWeights =
3945 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3946 bool SuccHasWeights =
3947 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3948 if (PredHasWeights || SuccHasWeights) {
3949 if (!PredHasWeights)
3950 PredTrueWeight = PredFalseWeight = 1;
3951 if (!SuccHasWeights)
3952 SuccTrueWeight = SuccFalseWeight = 1;
3953 return true;
3954 } else {
3955 return false;
3956 }
3957}
3958
3959/// Determine if the two branches share a common destination and deduce a glue
3960/// that joins the branches' conditions to arrive at the common destination if
3961/// that would be profitable.
3962static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3964 const TargetTransformInfo *TTI) {
3965 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3966 "Both blocks must end with a conditional branches.");
3968 "PredBB must be a predecessor of BB.");
3969
3970 // We have the potential to fold the conditions together, but if the
3971 // predecessor branch is predictable, we may not want to merge them.
3972 uint64_t PTWeight, PFWeight;
3973 BranchProbability PBITrueProb, Likely;
3974 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3975 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3976 (PTWeight + PFWeight) != 0) {
3977 PBITrueProb =
3978 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3979 Likely = TTI->getPredictableBranchThreshold();
3980 }
3981
3982 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3983 // Speculate the 2nd condition unless the 1st is probably true.
3984 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3985 return {{BI->getSuccessor(0), Instruction::Or, false}};
3986 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3987 // Speculate the 2nd condition unless the 1st is probably false.
3988 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3989 return {{BI->getSuccessor(1), Instruction::And, false}};
3990 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3991 // Speculate the 2nd condition unless the 1st is probably true.
3992 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3993 return {{BI->getSuccessor(1), Instruction::And, true}};
3994 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3995 // Speculate the 2nd condition unless the 1st is probably false.
3996 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3997 return {{BI->getSuccessor(0), Instruction::Or, true}};
3998 }
3999 return std::nullopt;
4000}
4001
4003 DomTreeUpdater *DTU,
4004 MemorySSAUpdater *MSSAU,
4005 const TargetTransformInfo *TTI) {
4006 BasicBlock *BB = BI->getParent();
4007 BasicBlock *PredBlock = PBI->getParent();
4008
4009 // Determine if the two branches share a common destination.
4010 BasicBlock *CommonSucc;
4012 bool InvertPredCond;
4013 std::tie(CommonSucc, Opc, InvertPredCond) =
4015
4016 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4017
4018 IRBuilder<> Builder(PBI);
4019 // The builder is used to create instructions to eliminate the branch in BB.
4020 // If BB's terminator has !annotation metadata, add it to the new
4021 // instructions.
4022 Builder.CollectMetadataToCopy(BB->getTerminator(),
4023 {LLVMContext::MD_annotation});
4024
4025 // If we need to invert the condition in the pred block to match, do so now.
4026 if (InvertPredCond) {
4027 InvertBranch(PBI, Builder);
4028 }
4029
4030 BasicBlock *UniqueSucc =
4031 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4032
4033 // Before cloning instructions, notify the successor basic block that it
4034 // is about to have a new predecessor. This will update PHI nodes,
4035 // which will allow us to update live-out uses of bonus instructions.
4036 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4037
4038 // Try to update branch weights.
4039 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4040 SmallVector<uint64_t, 2> MDWeights;
4041 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4042 SuccTrueWeight, SuccFalseWeight)) {
4043
4044 if (PBI->getSuccessor(0) == BB) {
4045 // PBI: br i1 %x, BB, FalseDest
4046 // BI: br i1 %y, UniqueSucc, FalseDest
4047 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4048 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4049 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4050 // TrueWeight for PBI * FalseWeight for BI.
4051 // We assume that total weights of a BranchInst can fit into 32 bits.
4052 // Therefore, we will not have overflow using 64-bit arithmetic.
4053 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4054 PredTrueWeight * SuccFalseWeight);
4055 } else {
4056 // PBI: br i1 %x, TrueDest, BB
4057 // BI: br i1 %y, TrueDest, UniqueSucc
4058 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4059 // FalseWeight for PBI * TrueWeight for BI.
4060 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4061 PredFalseWeight * SuccTrueWeight);
4062 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4063 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4064 }
4065
4066 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4067 /*ElideAllZero=*/true);
4068
4069 // TODO: If BB is reachable from all paths through PredBlock, then we
4070 // could replace PBI's branch probabilities with BI's.
4071 } else
4072 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4073
4074 // Now, update the CFG.
4075 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4076
4077 if (DTU)
4078 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4079 {DominatorTree::Delete, PredBlock, BB}});
4080
4081 // If BI was a loop latch, it may have had associated loop metadata.
4082 // We need to copy it to the new latch, that is, PBI.
4083 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4084 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4085
4086 ValueToValueMapTy VMap; // maps original values to cloned values
4088
4089 Module *M = BB->getModule();
4090
4091 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4092 for (DbgVariableRecord &DVR :
4094 RemapDbgRecord(M, &DVR, VMap,
4096 }
4097
4098 // Now that the Cond was cloned into the predecessor basic block,
4099 // or/and the two conditions together.
4100 Value *BICond = VMap[BI->getCondition()];
4101 PBI->setCondition(
4102 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4104 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4105 if (!MDWeights.empty()) {
4106 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4107 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4108 /*IsExpected=*/false, /*ElideAllZero=*/true);
4109 }
4110
4111 ++NumFoldBranchToCommonDest;
4112 return true;
4113}
4114
4115/// Return if an instruction's type or any of its operands' types are a vector
4116/// type.
4117static bool isVectorOp(Instruction &I) {
4118 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4119 return U->getType()->isVectorTy();
4120 });
4121}
4122
4123/// If this basic block is simple enough, and if a predecessor branches to us
4124/// and one of our successors, fold the block into the predecessor and use
4125/// logical operations to pick the right destination.
4127 MemorySSAUpdater *MSSAU,
4128 const TargetTransformInfo *TTI,
4129 unsigned BonusInstThreshold) {
4130 // If this block ends with an unconditional branch,
4131 // let speculativelyExecuteBB() deal with it.
4132 if (!BI->isConditional())
4133 return false;
4134
4135 BasicBlock *BB = BI->getParent();
4139
4141
4143 Cond->getParent() != BB || !Cond->hasOneUse())
4144 return false;
4145
4146 // Finally, don't infinitely unroll conditional loops.
4147 if (is_contained(successors(BB), BB))
4148 return false;
4149
4150 // With which predecessors will we want to deal with?
4152 for (BasicBlock *PredBlock : predecessors(BB)) {
4153 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4154
4155 // Check that we have two conditional branches. If there is a PHI node in
4156 // the common successor, verify that the same value flows in from both
4157 // blocks.
4158 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4159 continue;
4160
4161 // Determine if the two branches share a common destination.
4162 BasicBlock *CommonSucc;
4164 bool InvertPredCond;
4165 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4166 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4167 else
4168 continue;
4169
4170 // Check the cost of inserting the necessary logic before performing the
4171 // transformation.
4172 if (TTI) {
4173 Type *Ty = BI->getCondition()->getType();
4174 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4175 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4176 !isa<CmpInst>(PBI->getCondition())))
4177 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4178
4180 continue;
4181 }
4182
4183 // Ok, we do want to deal with this predecessor. Record it.
4184 Preds.emplace_back(PredBlock);
4185 }
4186
4187 // If there aren't any predecessors into which we can fold,
4188 // don't bother checking the cost.
4189 if (Preds.empty())
4190 return false;
4191
4192 // Only allow this transformation if computing the condition doesn't involve
4193 // too many instructions and these involved instructions can be executed
4194 // unconditionally. We denote all involved instructions except the condition
4195 // as "bonus instructions", and only allow this transformation when the
4196 // number of the bonus instructions we'll need to create when cloning into
4197 // each predecessor does not exceed a certain threshold.
4198 unsigned NumBonusInsts = 0;
4199 bool SawVectorOp = false;
4200 const unsigned PredCount = Preds.size();
4201 for (Instruction &I : *BB) {
4202 // Don't check the branch condition comparison itself.
4203 if (&I == Cond)
4204 continue;
4205 // Ignore the terminator.
4206 if (isa<BranchInst>(I))
4207 continue;
4208 // I must be safe to execute unconditionally.
4210 return false;
4211 SawVectorOp |= isVectorOp(I);
4212
4213 // Account for the cost of duplicating this instruction into each
4214 // predecessor. Ignore free instructions.
4215 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4217 NumBonusInsts += PredCount;
4218
4219 // Early exits once we reach the limit.
4220 if (NumBonusInsts >
4221 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4222 return false;
4223 }
4224
4225 auto IsBCSSAUse = [BB, &I](Use &U) {
4226 auto *UI = cast<Instruction>(U.getUser());
4227 if (auto *PN = dyn_cast<PHINode>(UI))
4228 return PN->getIncomingBlock(U) == BB;
4229 return UI->getParent() == BB && I.comesBefore(UI);
4230 };
4231
4232 // Does this instruction require rewriting of uses?
4233 if (!all_of(I.uses(), IsBCSSAUse))
4234 return false;
4235 }
4236 if (NumBonusInsts >
4237 BonusInstThreshold *
4238 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4239 return false;
4240
4241 // Ok, we have the budget. Perform the transformation.
4242 for (BasicBlock *PredBlock : Preds) {
4243 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4244 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4245 }
4246 return false;
4247}
4248
4249// If there is only one store in BB1 and BB2, return it, otherwise return
4250// nullptr.
4252 StoreInst *S = nullptr;
4253 for (auto *BB : {BB1, BB2}) {
4254 if (!BB)
4255 continue;
4256 for (auto &I : *BB)
4257 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4258 if (S)
4259 // Multiple stores seen.
4260 return nullptr;
4261 else
4262 S = SI;
4263 }
4264 }
4265 return S;
4266}
4267
4269 Value *AlternativeV = nullptr) {
4270 // PHI is going to be a PHI node that allows the value V that is defined in
4271 // BB to be referenced in BB's only successor.
4272 //
4273 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4274 // doesn't matter to us what the other operand is (it'll never get used). We
4275 // could just create a new PHI with an undef incoming value, but that could
4276 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4277 // other PHI. So here we directly look for some PHI in BB's successor with V
4278 // as an incoming operand. If we find one, we use it, else we create a new
4279 // one.
4280 //
4281 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4282 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4283 // where OtherBB is the single other predecessor of BB's only successor.
4284 PHINode *PHI = nullptr;
4285 BasicBlock *Succ = BB->getSingleSuccessor();
4286
4287 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4288 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4289 PHI = cast<PHINode>(I);
4290 if (!AlternativeV)
4291 break;
4292
4293 assert(Succ->hasNPredecessors(2));
4294 auto PredI = pred_begin(Succ);
4295 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4296 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4297 break;
4298 PHI = nullptr;
4299 }
4300 if (PHI)
4301 return PHI;
4302
4303 // If V is not an instruction defined in BB, just return it.
4304 if (!AlternativeV &&
4305 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4306 return V;
4307
4308 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4309 PHI->insertBefore(Succ->begin());
4310 PHI->addIncoming(V, BB);
4311 for (BasicBlock *PredBB : predecessors(Succ))
4312 if (PredBB != BB)
4313 PHI->addIncoming(
4314 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4315 return PHI;
4316}
4317
4319 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4320 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4321 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4322 // For every pointer, there must be exactly two stores, one coming from
4323 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4324 // store (to any address) in PTB,PFB or QTB,QFB.
4325 // FIXME: We could relax this restriction with a bit more work and performance
4326 // testing.
4327 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4328 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4329 if (!PStore || !QStore)
4330 return false;
4331
4332 // Now check the stores are compatible.
4333 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4334 PStore->getValueOperand()->getType() !=
4335 QStore->getValueOperand()->getType())
4336 return false;
4337
4338 // Check that sinking the store won't cause program behavior changes. Sinking
4339 // the store out of the Q blocks won't change any behavior as we're sinking
4340 // from a block to its unconditional successor. But we're moving a store from
4341 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4342 // So we need to check that there are no aliasing loads or stores in
4343 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4344 // operations between PStore and the end of its parent block.
4345 //
4346 // The ideal way to do this is to query AliasAnalysis, but we don't
4347 // preserve AA currently so that is dangerous. Be super safe and just
4348 // check there are no other memory operations at all.
4349 for (auto &I : *QFB->getSinglePredecessor())
4350 if (I.mayReadOrWriteMemory())
4351 return false;
4352 for (auto &I : *QFB)
4353 if (&I != QStore && I.mayReadOrWriteMemory())
4354 return false;
4355 if (QTB)
4356 for (auto &I : *QTB)
4357 if (&I != QStore && I.mayReadOrWriteMemory())
4358 return false;
4359 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4360 I != E; ++I)
4361 if (&*I != PStore && I->mayReadOrWriteMemory())
4362 return false;
4363
4364 // If we're not in aggressive mode, we only optimize if we have some
4365 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4366 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4367 if (!BB)
4368 return true;
4369 // Heuristic: if the block can be if-converted/phi-folded and the
4370 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4371 // thread this store.
4372 InstructionCost Cost = 0;
4373 InstructionCost Budget =
4375 for (auto &I : BB->instructionsWithoutDebug(false)) {
4376 // Consider terminator instruction to be free.
4377 if (I.isTerminator())
4378 continue;
4379 // If this is one the stores that we want to speculate out of this BB,
4380 // then don't count it's cost, consider it to be free.
4381 if (auto *S = dyn_cast<StoreInst>(&I))
4382 if (llvm::find(FreeStores, S))
4383 continue;
4384 // Else, we have a white-list of instructions that we are ak speculating.
4386 return false; // Not in white-list - not worthwhile folding.
4387 // And finally, if this is a non-free instruction that we are okay
4388 // speculating, ensure that we consider the speculation budget.
4389 Cost +=
4390 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4391 if (Cost > Budget)
4392 return false; // Eagerly refuse to fold as soon as we're out of budget.
4393 }
4394 assert(Cost <= Budget &&
4395 "When we run out of budget we will eagerly return from within the "
4396 "per-instruction loop.");
4397 return true;
4398 };
4399
4400 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4402 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4403 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4404 return false;
4405
4406 // If PostBB has more than two predecessors, we need to split it so we can
4407 // sink the store.
4408 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4409 // We know that QFB's only successor is PostBB. And QFB has a single
4410 // predecessor. If QTB exists, then its only successor is also PostBB.
4411 // If QTB does not exist, then QFB's only predecessor has a conditional
4412 // branch to QFB and PostBB.
4413 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4414 BasicBlock *NewBB =
4415 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4416 if (!NewBB)
4417 return false;
4418 PostBB = NewBB;
4419 }
4420
4421 // OK, we're going to sink the stores to PostBB. The store has to be
4422 // conditional though, so first create the predicate.
4423 BranchInst *PBranch =
4425 BranchInst *QBranch =
4427 Value *PCond = PBranch->getCondition();
4428 Value *QCond = QBranch->getCondition();
4429
4431 PStore->getParent());
4433 QStore->getParent(), PPHI);
4434
4435 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4436 IRBuilder<> QB(PostBB, PostBBFirst);
4437 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4438
4439 InvertPCond ^= (PStore->getParent() != PTB);
4440 InvertQCond ^= (QStore->getParent() != QTB);
4441 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4442 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4443
4444 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4445
4446 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4447 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4448 /*Unreachable=*/false,
4449 /*BranchWeights=*/nullptr, DTU);
4450 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4452 SmallVector<uint32_t, 2> PWeights, QWeights;
4453 extractBranchWeights(*PBranch, PWeights);
4454 extractBranchWeights(*QBranch, QWeights);
4455 if (InvertPCond)
4456 std::swap(PWeights[0], PWeights[1]);
4457 if (InvertQCond)
4458 std::swap(QWeights[0], QWeights[1]);
4459 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4461 {CombinedWeights[0], CombinedWeights[1]},
4462 /*IsExpected=*/false, /*ElideAllZero=*/true);
4463 }
4464
4465 QB.SetInsertPoint(T);
4466 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4467 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4468 // Choose the minimum alignment. If we could prove both stores execute, we
4469 // could use biggest one. In this case, though, we only know that one of the
4470 // stores executes. And we don't know it's safe to take the alignment from a
4471 // store that doesn't execute.
4472 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4473
4474 QStore->eraseFromParent();
4475 PStore->eraseFromParent();
4476
4477 return true;
4478}
4479
4481 DomTreeUpdater *DTU, const DataLayout &DL,
4482 const TargetTransformInfo &TTI) {
4483 // The intention here is to find diamonds or triangles (see below) where each
4484 // conditional block contains a store to the same address. Both of these
4485 // stores are conditional, so they can't be unconditionally sunk. But it may
4486 // be profitable to speculatively sink the stores into one merged store at the
4487 // end, and predicate the merged store on the union of the two conditions of
4488 // PBI and QBI.
4489 //
4490 // This can reduce the number of stores executed if both of the conditions are
4491 // true, and can allow the blocks to become small enough to be if-converted.
4492 // This optimization will also chain, so that ladders of test-and-set
4493 // sequences can be if-converted away.
4494 //
4495 // We only deal with simple diamonds or triangles:
4496 //
4497 // PBI or PBI or a combination of the two
4498 // / \ | \
4499 // PTB PFB | PFB
4500 // \ / | /
4501 // QBI QBI
4502 // / \ | \
4503 // QTB QFB | QFB
4504 // \ / | /
4505 // PostBB PostBB
4506 //
4507 // We model triangles as a type of diamond with a nullptr "true" block.
4508 // Triangles are canonicalized so that the fallthrough edge is represented by
4509 // a true condition, as in the diagram above.
4510 BasicBlock *PTB = PBI->getSuccessor(0);
4511 BasicBlock *PFB = PBI->getSuccessor(1);
4512 BasicBlock *QTB = QBI->getSuccessor(0);
4513 BasicBlock *QFB = QBI->getSuccessor(1);
4514 BasicBlock *PostBB = QFB->getSingleSuccessor();
4515
4516 // Make sure we have a good guess for PostBB. If QTB's only successor is
4517 // QFB, then QFB is a better PostBB.
4518 if (QTB->getSingleSuccessor() == QFB)
4519 PostBB = QFB;
4520
4521 // If we couldn't find a good PostBB, stop.
4522 if (!PostBB)
4523 return false;
4524
4525 bool InvertPCond = false, InvertQCond = false;
4526 // Canonicalize fallthroughs to the true branches.
4527 if (PFB == QBI->getParent()) {
4528 std::swap(PFB, PTB);
4529 InvertPCond = true;
4530 }
4531 if (QFB == PostBB) {
4532 std::swap(QFB, QTB);
4533 InvertQCond = true;
4534 }
4535
4536 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4537 // and QFB may not. Model fallthroughs as a nullptr block.
4538 if (PTB == QBI->getParent())
4539 PTB = nullptr;
4540 if (QTB == PostBB)
4541 QTB = nullptr;
4542
4543 // Legality bailouts. We must have at least the non-fallthrough blocks and
4544 // the post-dominating block, and the non-fallthroughs must only have one
4545 // predecessor.
4546 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4547 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4548 };
4549 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4550 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4551 return false;
4552 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4553 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4554 return false;
4555 if (!QBI->getParent()->hasNUses(2))
4556 return false;
4557
4558 // OK, this is a sequence of two diamonds or triangles.
4559 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4560 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4561 for (auto *BB : {PTB, PFB}) {
4562 if (!BB)
4563 continue;
4564 for (auto &I : *BB)
4566 PStoreAddresses.insert(SI->getPointerOperand());
4567 }
4568 for (auto *BB : {QTB, QFB}) {
4569 if (!BB)
4570 continue;
4571 for (auto &I : *BB)
4573 QStoreAddresses.insert(SI->getPointerOperand());
4574 }
4575
4576 set_intersect(PStoreAddresses, QStoreAddresses);
4577 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4578 // clear what it contains.
4579 auto &CommonAddresses = PStoreAddresses;
4580
4581 bool Changed = false;
4582 for (auto *Address : CommonAddresses)
4583 Changed |=
4584 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4585 InvertPCond, InvertQCond, DTU, DL, TTI);
4586 return Changed;
4587}
4588
4589/// If the previous block ended with a widenable branch, determine if reusing
4590/// the target block is profitable and legal. This will have the effect of
4591/// "widening" PBI, but doesn't require us to reason about hosting safety.
4593 DomTreeUpdater *DTU) {
4594 // TODO: This can be generalized in two important ways:
4595 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4596 // values from the PBI edge.
4597 // 2) We can sink side effecting instructions into BI's fallthrough
4598 // successor provided they doesn't contribute to computation of
4599 // BI's condition.
4600 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4601 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4602 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4603 !BI->getParent()->getSinglePredecessor())
4604 return false;
4605 if (!IfFalseBB->phis().empty())
4606 return false; // TODO
4607 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4608 // may undo the transform done here.
4609 // TODO: There might be a more fine-grained solution to this.
4610 if (!llvm::succ_empty(IfFalseBB))
4611 return false;
4612 // Use lambda to lazily compute expensive condition after cheap ones.
4613 auto NoSideEffects = [](BasicBlock &BB) {
4614 return llvm::none_of(BB, [](const Instruction &I) {
4615 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4616 });
4617 };
4618 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4619 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4620 NoSideEffects(*BI->getParent())) {
4621 auto *OldSuccessor = BI->getSuccessor(1);
4622 OldSuccessor->removePredecessor(BI->getParent());
4623 BI->setSuccessor(1, IfFalseBB);
4624 if (DTU)
4625 DTU->applyUpdates(
4626 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4627 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4628 return true;
4629 }
4630 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4631 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4632 NoSideEffects(*BI->getParent())) {
4633 auto *OldSuccessor = BI->getSuccessor(0);
4634 OldSuccessor->removePredecessor(BI->getParent());
4635 BI->setSuccessor(0, IfFalseBB);
4636 if (DTU)
4637 DTU->applyUpdates(
4638 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4639 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4640 return true;
4641 }
4642 return false;
4643}
4644
4645/// If we have a conditional branch as a predecessor of another block,
4646/// this function tries to simplify it. We know
4647/// that PBI and BI are both conditional branches, and BI is in one of the
4648/// successor blocks of PBI - PBI branches to BI.
4650 DomTreeUpdater *DTU,
4651 const DataLayout &DL,
4652 const TargetTransformInfo &TTI) {
4653 assert(PBI->isConditional() && BI->isConditional());
4654 BasicBlock *BB = BI->getParent();
4655
4656 // If this block ends with a branch instruction, and if there is a
4657 // predecessor that ends on a branch of the same condition, make
4658 // this conditional branch redundant.
4659 if (PBI->getCondition() == BI->getCondition() &&
4660 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4661 // Okay, the outcome of this conditional branch is statically
4662 // knowable. If this block had a single pred, handle specially, otherwise
4663 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4664 if (BB->getSinglePredecessor()) {
4665 // Turn this into a branch on constant.
4666 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4667 BI->setCondition(
4668 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4669 return true; // Nuke the branch on constant.
4670 }
4671 }
4672
4673 // If the previous block ended with a widenable branch, determine if reusing
4674 // the target block is profitable and legal. This will have the effect of
4675 // "widening" PBI, but doesn't require us to reason about hosting safety.
4676 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4677 return true;
4678
4679 // If both branches are conditional and both contain stores to the same
4680 // address, remove the stores from the conditionals and create a conditional
4681 // merged store at the end.
4682 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4683 return true;
4684
4685 // If this is a conditional branch in an empty block, and if any
4686 // predecessors are a conditional branch to one of our destinations,
4687 // fold the conditions into logical ops and one cond br.
4688
4689 // Ignore dbg intrinsics.
4690 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4691 return false;
4692
4693 int PBIOp, BIOp;
4694 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4695 PBIOp = 0;
4696 BIOp = 0;
4697 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4698 PBIOp = 0;
4699 BIOp = 1;
4700 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4701 PBIOp = 1;
4702 BIOp = 0;
4703 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4704 PBIOp = 1;
4705 BIOp = 1;
4706 } else {
4707 return false;
4708 }
4709
4710 // Check to make sure that the other destination of this branch
4711 // isn't BB itself. If so, this is an infinite loop that will
4712 // keep getting unwound.
4713 if (PBI->getSuccessor(PBIOp) == BB)
4714 return false;
4715
4716 // If predecessor's branch probability to BB is too low don't merge branches.
4717 SmallVector<uint32_t, 2> PredWeights;
4718 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4719 extractBranchWeights(*PBI, PredWeights) &&
4720 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4721
4723 PredWeights[PBIOp],
4724 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4725
4726 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4727 if (CommonDestProb >= Likely)
4728 return false;
4729 }
4730
4731 // Do not perform this transformation if it would require
4732 // insertion of a large number of select instructions. For targets
4733 // without predication/cmovs, this is a big pessimization.
4734
4735 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4736 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4737 unsigned NumPhis = 0;
4738 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4739 ++II, ++NumPhis) {
4740 if (NumPhis > 2) // Disable this xform.
4741 return false;
4742 }
4743
4744 // Finally, if everything is ok, fold the branches to logical ops.
4745 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4746
4747 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4748 << "AND: " << *BI->getParent());
4749
4751
4752 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4753 // branch in it, where one edge (OtherDest) goes back to itself but the other
4754 // exits. We don't *know* that the program avoids the infinite loop
4755 // (even though that seems likely). If we do this xform naively, we'll end up
4756 // recursively unpeeling the loop. Since we know that (after the xform is
4757 // done) that the block *is* infinite if reached, we just make it an obviously
4758 // infinite loop with no cond branch.
4759 if (OtherDest == BB) {
4760 // Insert it at the end of the function, because it's either code,
4761 // or it won't matter if it's hot. :)
4762 BasicBlock *InfLoopBlock =
4763 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4764 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4765 if (DTU)
4766 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4767 OtherDest = InfLoopBlock;
4768 }
4769
4770 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4771
4772 // BI may have other predecessors. Because of this, we leave
4773 // it alone, but modify PBI.
4774
4775 // Make sure we get to CommonDest on True&True directions.
4776 Value *PBICond = PBI->getCondition();
4777 IRBuilder<NoFolder> Builder(PBI);
4778 if (PBIOp)
4779 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4780
4781 Value *BICond = BI->getCondition();
4782 if (BIOp)
4783 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4784
4785 // Merge the conditions.
4786 Value *Cond =
4787 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4788
4789 // Modify PBI to branch on the new condition to the new dests.
4790 PBI->setCondition(Cond);
4791 PBI->setSuccessor(0, CommonDest);
4792 PBI->setSuccessor(1, OtherDest);
4793
4794 if (DTU) {
4795 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4796 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4797
4798 DTU->applyUpdates(Updates);
4799 }
4800
4801 // Update branch weight for PBI.
4802 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4803 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4804 bool HasWeights =
4805 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4806 SuccTrueWeight, SuccFalseWeight);
4807 if (HasWeights) {
4808 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4809 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4810 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4811 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4812 // The weight to CommonDest should be PredCommon * SuccTotal +
4813 // PredOther * SuccCommon.
4814 // The weight to OtherDest should be PredOther * SuccOther.
4815 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4816 PredOther * SuccCommon,
4817 PredOther * SuccOther};
4818
4819 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4820 /*ElideAllZero=*/true);
4821 // Cond may be a select instruction with the first operand set to "true", or
4822 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4824 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4825 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4826 // The select is predicated on PBICond
4828 // The corresponding probabilities are what was referred to above as
4829 // PredCommon and PredOther.
4830 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4831 /*IsExpected=*/false, /*ElideAllZero=*/true);
4832 }
4833 }
4834
4835 // OtherDest may have phi nodes. If so, add an entry from PBI's
4836 // block that are identical to the entries for BI's block.
4837 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4838
4839 // We know that the CommonDest already had an edge from PBI to
4840 // it. If it has PHIs though, the PHIs may have different
4841 // entries for BB and PBI's BB. If so, insert a select to make
4842 // them agree.
4843 for (PHINode &PN : CommonDest->phis()) {
4844 Value *BIV = PN.getIncomingValueForBlock(BB);
4845 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4846 Value *PBIV = PN.getIncomingValue(PBBIdx);
4847 if (BIV != PBIV) {
4848 // Insert a select in PBI to pick the right value.
4850 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4851 PN.setIncomingValue(PBBIdx, NV);
4852 // The select has the same condition as PBI, in the same BB. The
4853 // probabilities don't change.
4854 if (HasWeights) {
4855 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4856 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4857 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4858 /*IsExpected=*/false, /*ElideAllZero=*/true);
4859 }
4860 }
4861 }
4862
4863 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4864 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4865
4866 // This basic block is probably dead. We know it has at least
4867 // one fewer predecessor.
4868 return true;
4869}
4870
4871// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4872// true or to FalseBB if Cond is false.
4873// Takes care of updating the successors and removing the old terminator.
4874// Also makes sure not to introduce new successors by assuming that edges to
4875// non-successor TrueBBs and FalseBBs aren't reachable.
4876bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4877 Value *Cond, BasicBlock *TrueBB,
4878 BasicBlock *FalseBB,
4879 uint32_t TrueWeight,
4880 uint32_t FalseWeight) {
4881 auto *BB = OldTerm->getParent();
4882 // Remove any superfluous successor edges from the CFG.
4883 // First, figure out which successors to preserve.
4884 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4885 // successor.
4886 BasicBlock *KeepEdge1 = TrueBB;
4887 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4888
4889 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4890
4891 // Then remove the rest.
4892 for (BasicBlock *Succ : successors(OldTerm)) {
4893 // Make sure only to keep exactly one copy of each edge.
4894 if (Succ == KeepEdge1)
4895 KeepEdge1 = nullptr;
4896 else if (Succ == KeepEdge2)
4897 KeepEdge2 = nullptr;
4898 else {
4899 Succ->removePredecessor(BB,
4900 /*KeepOneInputPHIs=*/true);
4901
4902 if (Succ != TrueBB && Succ != FalseBB)
4903 RemovedSuccessors.insert(Succ);
4904 }
4905 }
4906
4907 IRBuilder<> Builder(OldTerm);
4908 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4909
4910 // Insert an appropriate new terminator.
4911 if (!KeepEdge1 && !KeepEdge2) {
4912 if (TrueBB == FalseBB) {
4913 // We were only looking for one successor, and it was present.
4914 // Create an unconditional branch to it.
4915 Builder.CreateBr(TrueBB);
4916 } else {
4917 // We found both of the successors we were looking for.
4918 // Create a conditional branch sharing the condition of the select.
4919 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4920 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4921 /*IsExpected=*/false, /*ElideAllZero=*/true);
4922 }
4923 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4924 // Neither of the selected blocks were successors, so this
4925 // terminator must be unreachable.
4926 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4927 } else {
4928 // One of the selected values was a successor, but the other wasn't.
4929 // Insert an unconditional branch to the one that was found;
4930 // the edge to the one that wasn't must be unreachable.
4931 if (!KeepEdge1) {
4932 // Only TrueBB was found.
4933 Builder.CreateBr(TrueBB);
4934 } else {
4935 // Only FalseBB was found.
4936 Builder.CreateBr(FalseBB);
4937 }
4938 }
4939
4941
4942 if (DTU) {
4943 SmallVector<DominatorTree::UpdateType, 2> Updates;
4944 Updates.reserve(RemovedSuccessors.size());
4945 for (auto *RemovedSuccessor : RemovedSuccessors)
4946 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4947 DTU->applyUpdates(Updates);
4948 }
4949
4950 return true;
4951}
4952
4953// Replaces
4954// (switch (select cond, X, Y)) on constant X, Y
4955// with a branch - conditional if X and Y lead to distinct BBs,
4956// unconditional otherwise.
4957bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4958 SelectInst *Select) {
4959 // Check for constant integer values in the select.
4960 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4961 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4962 if (!TrueVal || !FalseVal)
4963 return false;
4964
4965 // Find the relevant condition and destinations.
4966 Value *Condition = Select->getCondition();
4967 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4968 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4969
4970 // Get weight for TrueBB and FalseBB.
4971 uint32_t TrueWeight = 0, FalseWeight = 0;
4972 SmallVector<uint64_t, 8> Weights;
4973 bool HasWeights = hasBranchWeightMD(*SI);
4974 if (HasWeights) {
4975 getBranchWeights(SI, Weights);
4976 if (Weights.size() == 1 + SI->getNumCases()) {
4977 TrueWeight =
4978 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4979 FalseWeight =
4980 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4981 }
4982 }
4983
4984 // Perform the actual simplification.
4985 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4986 FalseWeight);
4987}
4988
4989// Replaces
4990// (indirectbr (select cond, blockaddress(@fn, BlockA),
4991// blockaddress(@fn, BlockB)))
4992// with
4993// (br cond, BlockA, BlockB).
4994bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4995 SelectInst *SI) {
4996 // Check that both operands of the select are block addresses.
4997 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4998 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4999 if (!TBA || !FBA)
5000 return false;
5001
5002 // Extract the actual blocks.
5003 BasicBlock *TrueBB = TBA->getBasicBlock();
5004 BasicBlock *FalseBB = FBA->getBasicBlock();
5005
5006 // The select's profile becomes the profile of the conditional branch that
5007 // replaces the indirect branch.
5008 SmallVector<uint32_t> SelectBranchWeights(2);
5010 extractBranchWeights(*SI, SelectBranchWeights);
5011 // Perform the actual simplification.
5012 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5013 SelectBranchWeights[0],
5014 SelectBranchWeights[1]);
5015}
5016
5017/// This is called when we find an icmp instruction
5018/// (a seteq/setne with a constant) as the only instruction in a
5019/// block that ends with an uncond branch. We are looking for a very specific
5020/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5021/// this case, we merge the first two "or's of icmp" into a switch, but then the
5022/// default value goes to an uncond block with a seteq in it, we get something
5023/// like:
5024///
5025/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5026/// DEFAULT:
5027/// %tmp = icmp eq i8 %A, 92
5028/// br label %end
5029/// end:
5030/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5031///
5032/// We prefer to split the edge to 'end' so that there is a true/false entry to
5033/// the PHI, merging the third icmp into the switch.
5034bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5035 ICmpInst *ICI, IRBuilder<> &Builder) {
5036 // Select == nullptr means we assume that there is a hidden no-op select
5037 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5038 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5039}
5040
5041/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5042/// case. This is called when we find an icmp instruction (a seteq/setne with a
5043/// constant) and its following select instruction as the only TWO instructions
5044/// in a block that ends with an uncond branch. We are looking for a very
5045/// specific pattern that occurs when "
5046/// if (A == 1) return C1;
5047/// if (A == 2) return C2;
5048/// if (A < 3) return C3;
5049/// return C4;
5050/// " gets simplified. In this case, we merge the first two "branches of icmp"
5051/// into a switch, but then the default value goes to an uncond block with a lt
5052/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5053/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5054/// get something like:
5055///
5056/// case1:
5057/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5058/// case2:
5059/// br label %end
5060/// DEFAULT:
5061/// %tmp = icmp eq i8 %A, 2
5062/// %val = select i1 %tmp, i8 C3, i8 C4
5063/// br label %end
5064/// end:
5065/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5066///
5067/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5068/// to the PHI, merging the icmp & select into the switch, as follows:
5069///
5070/// case1:
5071/// switch i8 %A, label %DEFAULT [
5072/// i8 0, label %end
5073/// i8 1, label %case2
5074/// i8 2, label %case3
5075/// ]
5076/// case2:
5077/// br label %end
5078/// case3:
5079/// br label %end
5080/// DEFAULT:
5081/// br label %end
5082/// end:
5083/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5084bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5085 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5086 BasicBlock *BB = ICI->getParent();
5087
5088 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5089 // too complex.
5090 /// TODO: support multi-phis in succ BB of select's BB.
5091 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5092 (Select && !Select->hasOneUse()))
5093 return false;
5094
5095 // The pattern we're looking for is where our only predecessor is a switch on
5096 // 'V' and this block is the default case for the switch. In this case we can
5097 // fold the compared value into the switch to simplify things.
5098 BasicBlock *Pred = BB->getSinglePredecessor();
5099 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5100 return false;
5101
5102 Value *IcmpCond;
5103 ConstantInt *NewCaseVal;
5104 CmpPredicate Predicate;
5105
5106 // Match icmp X, C
5107 if (!match(ICI,
5108 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5109 return false;
5110
5111 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5113 if (!Select) {
5114 // If Select == nullptr, we can assume that there is a hidden no-op select
5115 // just after icmp
5116 SelectCond = ICI;
5117 SelectTrueVal = Builder.getTrue();
5118 SelectFalseVal = Builder.getFalse();
5119 User = ICI->user_back();
5120 } else {
5121 SelectCond = Select->getCondition();
5122 // Check if the select condition is the same as the icmp condition.
5123 if (SelectCond != ICI)
5124 return false;
5125 SelectTrueVal = Select->getTrueValue();
5126 SelectFalseVal = Select->getFalseValue();
5127 User = Select->user_back();
5128 }
5129
5130 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5131 if (SI->getCondition() != IcmpCond)
5132 return false;
5133
5134 // If BB is reachable on a non-default case, then we simply know the value of
5135 // V in this block. Substitute it and constant fold the icmp instruction
5136 // away.
5137 if (SI->getDefaultDest() != BB) {
5138 ConstantInt *VVal = SI->findCaseDest(BB);
5139 assert(VVal && "Should have a unique destination value");
5140 ICI->setOperand(0, VVal);
5141
5142 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5143 ICI->replaceAllUsesWith(V);
5144 ICI->eraseFromParent();
5145 }
5146 // BB is now empty, so it is likely to simplify away.
5147 return requestResimplify();
5148 }
5149
5150 // Ok, the block is reachable from the default dest. If the constant we're
5151 // comparing exists in one of the other edges, then we can constant fold ICI
5152 // and zap it.
5153 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5154 Value *V;
5155 if (Predicate == ICmpInst::ICMP_EQ)
5157 else
5159
5160 ICI->replaceAllUsesWith(V);
5161 ICI->eraseFromParent();
5162 // BB is now empty, so it is likely to simplify away.
5163 return requestResimplify();
5164 }
5165
5166 // The use of the select has to be in the 'end' block, by the only PHI node in
5167 // the block.
5168 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5169 PHINode *PHIUse = dyn_cast<PHINode>(User);
5170 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5172 return false;
5173
5174 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5175 // edge gets SelectTrueVal in the PHI.
5176 Value *DefaultCst = SelectFalseVal;
5177 Value *NewCst = SelectTrueVal;
5178
5179 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5180 std::swap(DefaultCst, NewCst);
5181
5182 // Replace Select (which is used by the PHI for the default value) with
5183 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5184 if (Select) {
5185 Select->replaceAllUsesWith(DefaultCst);
5186 Select->eraseFromParent();
5187 } else {
5188 ICI->replaceAllUsesWith(DefaultCst);
5189 }
5190 ICI->eraseFromParent();
5191
5192 SmallVector<DominatorTree::UpdateType, 2> Updates;
5193
5194 // Okay, the switch goes to this block on a default value. Add an edge from
5195 // the switch to the merge point on the compared value.
5196 BasicBlock *NewBB =
5197 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5198 {
5199 SwitchInstProfUpdateWrapper SIW(*SI);
5200 auto W0 = SIW.getSuccessorWeight(0);
5202 if (W0) {
5203 NewW = ((uint64_t(*W0) + 1) >> 1);
5204 SIW.setSuccessorWeight(0, *NewW);
5205 }
5206 SIW.addCase(NewCaseVal, NewBB, NewW);
5207 if (DTU)
5208 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5209 }
5210
5211 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5212 Builder.SetInsertPoint(NewBB);
5213 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5214 Builder.CreateBr(SuccBlock);
5215 PHIUse->addIncoming(NewCst, NewBB);
5216 if (DTU) {
5217 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5218 DTU->applyUpdates(Updates);
5219 }
5220 return true;
5221}
5222
5223/// The specified branch is a conditional branch.
5224/// Check to see if it is branching on an or/and chain of icmp instructions, and
5225/// fold it into a switch instruction if so.
5226bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5227 IRBuilder<> &Builder,
5228 const DataLayout &DL) {
5230 if (!Cond)
5231 return false;
5232
5233 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5234 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5235 // 'setne's and'ed together, collect them.
5236
5237 // Try to gather values from a chain of and/or to be turned into a switch
5238 ConstantComparesGatherer ConstantCompare(Cond, DL);
5239 // Unpack the result
5240 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5241 Value *CompVal = ConstantCompare.CompValue;
5242 unsigned UsedICmps = ConstantCompare.UsedICmps;
5243 Value *ExtraCase = ConstantCompare.Extra;
5244 bool TrueWhenEqual = ConstantCompare.IsEq;
5245
5246 // If we didn't have a multiply compared value, fail.
5247 if (!CompVal)
5248 return false;
5249
5250 // Avoid turning single icmps into a switch.
5251 if (UsedICmps <= 1)
5252 return false;
5253
5254 // There might be duplicate constants in the list, which the switch
5255 // instruction can't handle, remove them now.
5256 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5257 Values.erase(llvm::unique(Values), Values.end());
5258
5259 // If Extra was used, we require at least two switch values to do the
5260 // transformation. A switch with one value is just a conditional branch.
5261 if (ExtraCase && Values.size() < 2)
5262 return false;
5263
5264 SmallVector<uint32_t> BranchWeights;
5265 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5266 extractBranchWeights(*BI, BranchWeights);
5267
5268 // Figure out which block is which destination.
5269 BasicBlock *DefaultBB = BI->getSuccessor(1);
5270 BasicBlock *EdgeBB = BI->getSuccessor(0);
5271 if (!TrueWhenEqual) {
5272 std::swap(DefaultBB, EdgeBB);
5273 if (HasProfile)
5274 std::swap(BranchWeights[0], BranchWeights[1]);
5275 }
5276
5277 BasicBlock *BB = BI->getParent();
5278
5279 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5280 << " cases into SWITCH. BB is:\n"
5281 << *BB);
5282
5283 SmallVector<DominatorTree::UpdateType, 2> Updates;
5284
5285 // If there are any extra values that couldn't be folded into the switch
5286 // then we evaluate them with an explicit branch first. Split the block
5287 // right before the condbr to handle it.
5288 if (ExtraCase) {
5289 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5290 /*MSSAU=*/nullptr, "switch.early.test");
5291
5292 // Remove the uncond branch added to the old block.
5293 Instruction *OldTI = BB->getTerminator();
5294 Builder.SetInsertPoint(OldTI);
5295
5296 // There can be an unintended UB if extra values are Poison. Before the
5297 // transformation, extra values may not be evaluated according to the
5298 // condition, and it will not raise UB. But after transformation, we are
5299 // evaluating extra values before checking the condition, and it will raise
5300 // UB. It can be solved by adding freeze instruction to extra values.
5301 AssumptionCache *AC = Options.AC;
5302
5303 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5304 ExtraCase = Builder.CreateFreeze(ExtraCase);
5305
5306 // We don't have any info about this condition.
5307 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5308 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5310
5311 OldTI->eraseFromParent();
5312
5313 if (DTU)
5314 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5315
5316 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5317 // for the edge we just added.
5318 addPredecessorToBlock(EdgeBB, BB, NewBB);
5319
5320 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5321 << "\nEXTRABB = " << *BB);
5322 BB = NewBB;
5323 }
5324
5325 Builder.SetInsertPoint(BI);
5326 // Convert pointer to int before we switch.
5327 if (CompVal->getType()->isPointerTy()) {
5328 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5329 "Should not end up here with unstable pointers");
5330 CompVal = Builder.CreatePtrToInt(
5331 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5332 }
5333
5334 // Check if we can represent the values as a contiguous range. If so, we use a
5335 // range check + conditional branch instead of a switch.
5336 if (Values.front()->getValue() - Values.back()->getValue() ==
5337 Values.size() - 1) {
5338 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5339 Values.back()->getValue(), Values.front()->getValue() + 1);
5340 APInt Offset, RHS;
5341 ICmpInst::Predicate Pred;
5342 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5343 Value *X = CompVal;
5344 if (!Offset.isZero())
5345 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5346 Value *Cond =
5347 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5348 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5349 if (HasProfile)
5350 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5351 // We don't need to update PHI nodes since we don't add any new edges.
5352 } else {
5353 // Create the new switch instruction now.
5354 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5355 if (HasProfile) {
5356 // We know the weight of the default case. We don't know the weight of the
5357 // other cases, but rather than completely lose profiling info, we split
5358 // the remaining probability equally over them.
5359 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5360 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5361 // if TrueWhenEqual.
5362 for (auto &V : drop_begin(NewWeights))
5363 V = BranchWeights[0] / Values.size();
5364 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5365 }
5366
5367 // Add all of the 'cases' to the switch instruction.
5368 for (ConstantInt *Val : Values)
5369 New->addCase(Val, EdgeBB);
5370
5371 // We added edges from PI to the EdgeBB. As such, if there were any
5372 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5373 // the number of edges added.
5374 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5375 PHINode *PN = cast<PHINode>(BBI);
5376 Value *InVal = PN->getIncomingValueForBlock(BB);
5377 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5378 PN->addIncoming(InVal, BB);
5379 }
5380 }
5381
5382 // Erase the old branch instruction.
5384 if (DTU)
5385 DTU->applyUpdates(Updates);
5386
5387 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5388 return true;
5389}
5390
5391bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5392 if (isa<PHINode>(RI->getValue()))
5393 return simplifyCommonResume(RI);
5394 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5395 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5396 // The resume must unwind the exception that caused control to branch here.
5397 return simplifySingleResume(RI);
5398
5399 return false;
5400}
5401
5402// Check if cleanup block is empty
5404 for (Instruction &I : R) {
5405 auto *II = dyn_cast<IntrinsicInst>(&I);
5406 if (!II)
5407 return false;
5408
5409 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5410 switch (IntrinsicID) {
5411 case Intrinsic::dbg_declare:
5412 case Intrinsic::dbg_value:
5413 case Intrinsic::dbg_label:
5414 case Intrinsic::lifetime_end:
5415 break;
5416 default:
5417 return false;
5418 }
5419 }
5420 return true;
5421}
5422
5423// Simplify resume that is shared by several landing pads (phi of landing pad).
5424bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5425 BasicBlock *BB = RI->getParent();
5426
5427 // Check that there are no other instructions except for debug and lifetime
5428 // intrinsics between the phi's and resume instruction.
5429 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5430 BB->getTerminator()->getIterator())))
5431 return false;
5432
5433 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5434 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5435
5436 // Check incoming blocks to see if any of them are trivial.
5437 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5438 Idx++) {
5439 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5440 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5441
5442 // If the block has other successors, we can not delete it because
5443 // it has other dependents.
5444 if (IncomingBB->getUniqueSuccessor() != BB)
5445 continue;
5446
5447 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5448 // Not the landing pad that caused the control to branch here.
5449 if (IncomingValue != LandingPad)
5450 continue;
5451
5453 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5454 TrivialUnwindBlocks.insert(IncomingBB);
5455 }
5456
5457 // If no trivial unwind blocks, don't do any simplifications.
5458 if (TrivialUnwindBlocks.empty())
5459 return false;
5460
5461 // Turn all invokes that unwind here into calls.
5462 for (auto *TrivialBB : TrivialUnwindBlocks) {
5463 // Blocks that will be simplified should be removed from the phi node.
5464 // Note there could be multiple edges to the resume block, and we need
5465 // to remove them all.
5466 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5467 BB->removePredecessor(TrivialBB, true);
5468
5469 for (BasicBlock *Pred :
5471 removeUnwindEdge(Pred, DTU);
5472 ++NumInvokes;
5473 }
5474
5475 // In each SimplifyCFG run, only the current processed block can be erased.
5476 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5477 // of erasing TrivialBB, we only remove the branch to the common resume
5478 // block so that we can later erase the resume block since it has no
5479 // predecessors.
5480 TrivialBB->getTerminator()->eraseFromParent();
5481 new UnreachableInst(RI->getContext(), TrivialBB);
5482 if (DTU)
5483 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5484 }
5485
5486 // Delete the resume block if all its predecessors have been removed.
5487 if (pred_empty(BB))
5488 DeleteDeadBlock(BB, DTU);
5489
5490 return !TrivialUnwindBlocks.empty();
5491}
5492
5493// Simplify resume that is only used by a single (non-phi) landing pad.
5494bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5495 BasicBlock *BB = RI->getParent();
5496 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5497 assert(RI->getValue() == LPInst &&
5498 "Resume must unwind the exception that caused control to here");
5499
5500 // Check that there are no other instructions except for debug intrinsics.
5502 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5503 return false;
5504
5505 // Turn all invokes that unwind here into calls and delete the basic block.
5506 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5507 removeUnwindEdge(Pred, DTU);
5508 ++NumInvokes;
5509 }
5510
5511 // The landingpad is now unreachable. Zap it.
5512 DeleteDeadBlock(BB, DTU);
5513 return true;
5514}
5515
5517 // If this is a trivial cleanup pad that executes no instructions, it can be
5518 // eliminated. If the cleanup pad continues to the caller, any predecessor
5519 // that is an EH pad will be updated to continue to the caller and any
5520 // predecessor that terminates with an invoke instruction will have its invoke
5521 // instruction converted to a call instruction. If the cleanup pad being
5522 // simplified does not continue to the caller, each predecessor will be
5523 // updated to continue to the unwind destination of the cleanup pad being
5524 // simplified.
5525 BasicBlock *BB = RI->getParent();
5526 CleanupPadInst *CPInst = RI->getCleanupPad();
5527 if (CPInst->getParent() != BB)
5528 // This isn't an empty cleanup.
5529 return false;
5530
5531 // We cannot kill the pad if it has multiple uses. This typically arises
5532 // from unreachable basic blocks.
5533 if (!CPInst->hasOneUse())
5534 return false;
5535
5536 // Check that there are no other instructions except for benign intrinsics.
5538 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5539 return false;
5540
5541 // If the cleanup return we are simplifying unwinds to the caller, this will
5542 // set UnwindDest to nullptr.
5543 BasicBlock *UnwindDest = RI->getUnwindDest();
5544
5545 // We're about to remove BB from the control flow. Before we do, sink any
5546 // PHINodes into the unwind destination. Doing this before changing the
5547 // control flow avoids some potentially slow checks, since we can currently
5548 // be certain that UnwindDest and BB have no common predecessors (since they
5549 // are both EH pads).
5550 if (UnwindDest) {
5551 // First, go through the PHI nodes in UnwindDest and update any nodes that
5552 // reference the block we are removing
5553 for (PHINode &DestPN : UnwindDest->phis()) {
5554 int Idx = DestPN.getBasicBlockIndex(BB);
5555 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5556 assert(Idx != -1);
5557 // This PHI node has an incoming value that corresponds to a control
5558 // path through the cleanup pad we are removing. If the incoming
5559 // value is in the cleanup pad, it must be a PHINode (because we
5560 // verified above that the block is otherwise empty). Otherwise, the
5561 // value is either a constant or a value that dominates the cleanup
5562 // pad being removed.
5563 //
5564 // Because BB and UnwindDest are both EH pads, all of their
5565 // predecessors must unwind to these blocks, and since no instruction
5566 // can have multiple unwind destinations, there will be no overlap in
5567 // incoming blocks between SrcPN and DestPN.
5568 Value *SrcVal = DestPN.getIncomingValue(Idx);
5569 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5570
5571 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5572 for (auto *Pred : predecessors(BB)) {
5573 Value *Incoming =
5574 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5575 DestPN.addIncoming(Incoming, Pred);
5576 }
5577 }
5578
5579 // Sink any remaining PHI nodes directly into UnwindDest.
5580 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5581 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5582 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5583 // If the PHI node has no uses or all of its uses are in this basic
5584 // block (meaning they are debug or lifetime intrinsics), just leave
5585 // it. It will be erased when we erase BB below.
5586 continue;
5587
5588 // Otherwise, sink this PHI node into UnwindDest.
5589 // Any predecessors to UnwindDest which are not already represented
5590 // must be back edges which inherit the value from the path through
5591 // BB. In this case, the PHI value must reference itself.
5592 for (auto *pred : predecessors(UnwindDest))
5593 if (pred != BB)
5594 PN.addIncoming(&PN, pred);
5595 PN.moveBefore(InsertPt);
5596 // Also, add a dummy incoming value for the original BB itself,
5597 // so that the PHI is well-formed until we drop said predecessor.
5598 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5599 }
5600 }
5601
5602 std::vector<DominatorTree::UpdateType> Updates;
5603
5604 // We use make_early_inc_range here because we will remove all predecessors.
5606 if (UnwindDest == nullptr) {
5607 if (DTU) {
5608 DTU->applyUpdates(Updates);
5609 Updates.clear();
5610 }
5611 removeUnwindEdge(PredBB, DTU);
5612 ++NumInvokes;
5613 } else {
5614 BB->removePredecessor(PredBB);
5615 Instruction *TI = PredBB->getTerminator();
5616 TI->replaceUsesOfWith(BB, UnwindDest);
5617 if (DTU) {
5618 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5619 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5620 }
5621 }
5622 }
5623
5624 if (DTU)
5625 DTU->applyUpdates(Updates);
5626
5627 DeleteDeadBlock(BB, DTU);
5628
5629 return true;
5630}
5631
5632// Try to merge two cleanuppads together.
5634 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5635 // with.
5636 BasicBlock *UnwindDest = RI->getUnwindDest();
5637 if (!UnwindDest)
5638 return false;
5639
5640 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5641 // be safe to merge without code duplication.
5642 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5643 return false;
5644
5645 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5646 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5647 if (!SuccessorCleanupPad)
5648 return false;
5649
5650 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5651 // Replace any uses of the successor cleanupad with the predecessor pad
5652 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5653 // funclet bundle operands.
5654 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5655 // Remove the old cleanuppad.
5656 SuccessorCleanupPad->eraseFromParent();
5657 // Now, we simply replace the cleanupret with a branch to the unwind
5658 // destination.
5659 BranchInst::Create(UnwindDest, RI->getParent());
5660 RI->eraseFromParent();
5661
5662 return true;
5663}
5664
5665bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5666 // It is possible to transiantly have an undef cleanuppad operand because we
5667 // have deleted some, but not all, dead blocks.
5668 // Eventually, this block will be deleted.
5669 if (isa<UndefValue>(RI->getOperand(0)))
5670 return false;
5671
5672 if (mergeCleanupPad(RI))
5673 return true;
5674
5675 if (removeEmptyCleanup(RI, DTU))
5676 return true;
5677
5678 return false;
5679}
5680
5681// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5682bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5683 BasicBlock *BB = UI->getParent();
5684
5685 bool Changed = false;
5686
5687 // Ensure that any debug-info records that used to occur after the Unreachable
5688 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5689 // the block.
5691
5692 // Debug-info records on the unreachable inst itself should be deleted, as
5693 // below we delete everything past the final executable instruction.
5694 UI->dropDbgRecords();
5695
5696 // If there are any instructions immediately before the unreachable that can
5697 // be removed, do so.
5698 while (UI->getIterator() != BB->begin()) {
5700 --BBI;
5701
5703 break; // Can not drop any more instructions. We're done here.
5704 // Otherwise, this instruction can be freely erased,
5705 // even if it is not side-effect free.
5706
5707 // Note that deleting EH's here is in fact okay, although it involves a bit
5708 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5709 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5710 // and we can therefore guarantee this block will be erased.
5711
5712 // If we're deleting this, we're deleting any subsequent debug info, so
5713 // delete DbgRecords.
5714 BBI->dropDbgRecords();
5715
5716 // Delete this instruction (any uses are guaranteed to be dead)
5717 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5718 BBI->eraseFromParent();
5719 Changed = true;
5720 }
5721
5722 // If the unreachable instruction is the first in the block, take a gander
5723 // at all of the predecessors of this instruction, and simplify them.
5724 if (&BB->front() != UI)
5725 return Changed;
5726
5727 std::vector<DominatorTree::UpdateType> Updates;
5728
5729 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5730 for (BasicBlock *Predecessor : Preds) {
5731 Instruction *TI = Predecessor->getTerminator();
5732 IRBuilder<> Builder(TI);
5733 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5734 // We could either have a proper unconditional branch,
5735 // or a degenerate conditional branch with matching destinations.
5736 if (all_of(BI->successors(),
5737 [BB](auto *Successor) { return Successor == BB; })) {
5738 new UnreachableInst(TI->getContext(), TI->getIterator());
5739 TI->eraseFromParent();
5740 Changed = true;
5741 } else {
5742 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5743 Value* Cond = BI->getCondition();
5744 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5745 "The destinations are guaranteed to be different here.");
5746 CallInst *Assumption;
5747 if (BI->getSuccessor(0) == BB) {
5748 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5749 Builder.CreateBr(BI->getSuccessor(1));
5750 } else {
5751 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5752 Assumption = Builder.CreateAssumption(Cond);
5753 Builder.CreateBr(BI->getSuccessor(0));
5754 }
5755 if (Options.AC)
5756 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5757
5759 Changed = true;
5760 }
5761 if (DTU)
5762 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5763 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5764 SwitchInstProfUpdateWrapper SU(*SI);
5765 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5766 if (i->getCaseSuccessor() != BB) {
5767 ++i;
5768 continue;
5769 }
5770 BB->removePredecessor(SU->getParent());
5771 i = SU.removeCase(i);
5772 e = SU->case_end();
5773 Changed = true;
5774 }
5775 // Note that the default destination can't be removed!
5776 if (DTU && SI->getDefaultDest() != BB)
5777 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5778 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5779 if (II->getUnwindDest() == BB) {
5780 if (DTU) {
5781 DTU->applyUpdates(Updates);
5782 Updates.clear();
5783 }
5784 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5785 if (!CI->doesNotThrow())
5786 CI->setDoesNotThrow();
5787 Changed = true;
5788 }
5789 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5790 if (CSI->getUnwindDest() == BB) {
5791 if (DTU) {
5792 DTU->applyUpdates(Updates);
5793 Updates.clear();
5794 }
5795 removeUnwindEdge(TI->getParent(), DTU);
5796 Changed = true;
5797 continue;
5798 }
5799
5800 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5801 E = CSI->handler_end();
5802 I != E; ++I) {
5803 if (*I == BB) {
5804 CSI->removeHandler(I);
5805 --I;
5806 --E;
5807 Changed = true;
5808 }
5809 }
5810 if (DTU)
5811 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5812 if (CSI->getNumHandlers() == 0) {
5813 if (CSI->hasUnwindDest()) {
5814 // Redirect all predecessors of the block containing CatchSwitchInst
5815 // to instead branch to the CatchSwitchInst's unwind destination.
5816 if (DTU) {
5817 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5818 Updates.push_back({DominatorTree::Insert,
5819 PredecessorOfPredecessor,
5820 CSI->getUnwindDest()});
5821 Updates.push_back({DominatorTree::Delete,
5822 PredecessorOfPredecessor, Predecessor});
5823 }
5824 }
5825 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5826 } else {
5827 // Rewrite all preds to unwind to caller (or from invoke to call).
5828 if (DTU) {
5829 DTU->applyUpdates(Updates);
5830 Updates.clear();
5831 }
5832 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5833 for (BasicBlock *EHPred : EHPreds)
5834 removeUnwindEdge(EHPred, DTU);
5835 }
5836 // The catchswitch is no longer reachable.
5837 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5838 CSI->eraseFromParent();
5839 Changed = true;
5840 }
5841 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5842 (void)CRI;
5843 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5844 "Expected to always have an unwind to BB.");
5845 if (DTU)
5846 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5847 new UnreachableInst(TI->getContext(), TI->getIterator());
5848 TI->eraseFromParent();
5849 Changed = true;
5850 }
5851 }
5852
5853 if (DTU)
5854 DTU->applyUpdates(Updates);
5855
5856 // If this block is now dead, remove it.
5857 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5858 DeleteDeadBlock(BB, DTU);
5859 return true;
5860 }
5861
5862 return Changed;
5863}
5864
5873
5874static std::optional<ContiguousCasesResult>
5877 BasicBlock *Dest, BasicBlock *OtherDest) {
5878 assert(Cases.size() >= 1);
5879
5881 const APInt &Min = Cases.back()->getValue();
5882 const APInt &Max = Cases.front()->getValue();
5883 APInt Offset = Max - Min;
5884 size_t ContiguousOffset = Cases.size() - 1;
5885 if (Offset == ContiguousOffset) {
5886 return ContiguousCasesResult{
5887 /*Min=*/Cases.back(),
5888 /*Max=*/Cases.front(),
5889 /*Dest=*/Dest,
5890 /*OtherDest=*/OtherDest,
5891 /*Cases=*/&Cases,
5892 /*OtherCases=*/&OtherCases,
5893 };
5894 }
5895 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5896 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5897 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5898 // contiguous range for the other destination. N.B. If CR is not a full range,
5899 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5900 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5901 assert(Cases.size() >= 2);
5902 auto *It =
5903 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5904 return L->getValue() != R->getValue() + 1;
5905 });
5906 if (It == Cases.end())
5907 return std::nullopt;
5908 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5909 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5910 Cases.size() - 2) {
5911 return ContiguousCasesResult{
5912 /*Min=*/cast<ConstantInt>(
5913 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5914 /*Max=*/
5916 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5917 /*Dest=*/OtherDest,
5918 /*OtherDest=*/Dest,
5919 /*Cases=*/&OtherCases,
5920 /*OtherCases=*/&Cases,
5921 };
5922 }
5923 }
5924 return std::nullopt;
5925}
5926
5928 DomTreeUpdater *DTU,
5929 bool RemoveOrigDefaultBlock = true) {
5930 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5931 auto *BB = Switch->getParent();
5932 auto *OrigDefaultBlock = Switch->getDefaultDest();
5933 if (RemoveOrigDefaultBlock)
5934 OrigDefaultBlock->removePredecessor(BB);
5935 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5936 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5937 OrigDefaultBlock);
5938 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5940 Switch->setDefaultDest(&*NewDefaultBlock);
5941 if (DTU) {
5943 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5944 if (RemoveOrigDefaultBlock &&
5945 !is_contained(successors(BB), OrigDefaultBlock))
5946 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5947 DTU->applyUpdates(Updates);
5948 }
5949}
5950
5951/// Turn a switch into an integer range comparison and branch.
5952/// Switches with more than 2 destinations are ignored.
5953/// Switches with 1 destination are also ignored.
5954bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5955 IRBuilder<> &Builder) {
5956 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5957
5958 bool HasDefault = !SI->defaultDestUnreachable();
5959
5960 auto *BB = SI->getParent();
5961 // Partition the cases into two sets with different destinations.
5962 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5963 BasicBlock *DestB = nullptr;
5966
5967 for (auto Case : SI->cases()) {
5968 BasicBlock *Dest = Case.getCaseSuccessor();
5969 if (!DestA)
5970 DestA = Dest;
5971 if (Dest == DestA) {
5972 CasesA.push_back(Case.getCaseValue());
5973 continue;
5974 }
5975 if (!DestB)
5976 DestB = Dest;
5977 if (Dest == DestB) {
5978 CasesB.push_back(Case.getCaseValue());
5979 continue;
5980 }
5981 return false; // More than two destinations.
5982 }
5983 if (!DestB)
5984 return false; // All destinations are the same and the default is unreachable
5985
5986 assert(DestA && DestB &&
5987 "Single-destination switch should have been folded.");
5988 assert(DestA != DestB);
5989 assert(DestB != SI->getDefaultDest());
5990 assert(!CasesB.empty() && "There must be non-default cases.");
5991 assert(!CasesA.empty() || HasDefault);
5992
5993 // Figure out if one of the sets of cases form a contiguous range.
5994 std::optional<ContiguousCasesResult> ContiguousCases;
5995
5996 // Only one icmp is needed when there is only one case.
5997 if (!HasDefault && CasesA.size() == 1)
5998 ContiguousCases = ContiguousCasesResult{
5999 /*Min=*/CasesA[0],
6000 /*Max=*/CasesA[0],
6001 /*Dest=*/DestA,
6002 /*OtherDest=*/DestB,
6003 /*Cases=*/&CasesA,
6004 /*OtherCases=*/&CasesB,
6005 };
6006 else if (CasesB.size() == 1)
6007 ContiguousCases = ContiguousCasesResult{
6008 /*Min=*/CasesB[0],
6009 /*Max=*/CasesB[0],
6010 /*Dest=*/DestB,
6011 /*OtherDest=*/DestA,
6012 /*Cases=*/&CasesB,
6013 /*OtherCases=*/&CasesA,
6014 };
6015 // Correctness: Cases to the default destination cannot be contiguous cases.
6016 else if (!HasDefault)
6017 ContiguousCases =
6018 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6019
6020 if (!ContiguousCases)
6021 ContiguousCases =
6022 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6023
6024 if (!ContiguousCases)
6025 return false;
6026
6027 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6028
6029 // Start building the compare and branch.
6030
6032 Constant *NumCases = ConstantInt::get(Offset->getType(),
6033 Max->getValue() - Min->getValue() + 1);
6034 BranchInst *NewBI;
6035 if (NumCases->isOneValue()) {
6036 assert(Max->getValue() == Min->getValue());
6037 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6038 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6039 }
6040 // If NumCases overflowed, then all possible values jump to the successor.
6041 else if (NumCases->isNullValue() && !Cases->empty()) {
6042 NewBI = Builder.CreateBr(Dest);
6043 } else {
6044 Value *Sub = SI->getCondition();
6045 if (!Offset->isNullValue())
6046 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6047 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6048 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6049 }
6050
6051 // Update weight for the newly-created conditional branch.
6052 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
6053 SmallVector<uint64_t, 8> Weights;
6054 getBranchWeights(SI, Weights);
6055 if (Weights.size() == 1 + SI->getNumCases()) {
6056 uint64_t TrueWeight = 0;
6057 uint64_t FalseWeight = 0;
6058 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6059 if (SI->getSuccessor(I) == Dest)
6060 TrueWeight += Weights[I];
6061 else
6062 FalseWeight += Weights[I];
6063 }
6064 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6065 TrueWeight /= 2;
6066 FalseWeight /= 2;
6067 }
6068 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6069 /*IsExpected=*/false, /*ElideAllZero=*/true);
6070 }
6071 }
6072
6073 // Prune obsolete incoming values off the successors' PHI nodes.
6074 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6075 unsigned PreviousEdges = Cases->size();
6076 if (Dest == SI->getDefaultDest())
6077 ++PreviousEdges;
6078 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6079 PHI.removeIncomingValue(SI->getParent());
6080 }
6081 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6082 unsigned PreviousEdges = OtherCases->size();
6083 if (OtherDest == SI->getDefaultDest())
6084 ++PreviousEdges;
6085 unsigned E = PreviousEdges - 1;
6086 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6087 if (NewBI->isUnconditional())
6088 ++E;
6089 for (unsigned I = 0; I != E; ++I)
6090 PHI.removeIncomingValue(SI->getParent());
6091 }
6092
6093 // Clean up the default block - it may have phis or other instructions before
6094 // the unreachable terminator.
6095 if (!HasDefault)
6097
6098 auto *UnreachableDefault = SI->getDefaultDest();
6099
6100 // Drop the switch.
6101 SI->eraseFromParent();
6102
6103 if (!HasDefault && DTU)
6104 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6105
6106 return true;
6107}
6108
6109/// Compute masked bits for the condition of a switch
6110/// and use it to remove dead cases.
6112 AssumptionCache *AC,
6113 const DataLayout &DL) {
6114 Value *Cond = SI->getCondition();
6115 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6117 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6118
6119 // We can also eliminate cases by determining that their values are outside of
6120 // the limited range of the condition based on how many significant (non-sign)
6121 // bits are in the condition value.
6122 unsigned MaxSignificantBitsInCond =
6124
6125 // Gather dead cases.
6127 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6128 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6129 for (const auto &Case : SI->cases()) {
6130 auto *Successor = Case.getCaseSuccessor();
6131 if (DTU) {
6132 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6133 if (Inserted)
6134 UniqueSuccessors.push_back(Successor);
6135 ++It->second;
6136 }
6137 ConstantInt *CaseC = Case.getCaseValue();
6138 const APInt &CaseVal = CaseC->getValue();
6139 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6140 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6141 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6142 DeadCases.push_back(CaseC);
6143 if (DTU)
6144 --NumPerSuccessorCases[Successor];
6145 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6146 << " is dead.\n");
6147 } else if (IsKnownValuesValid)
6148 KnownValues.erase(CaseC);
6149 }
6150
6151 // If we can prove that the cases must cover all possible values, the
6152 // default destination becomes dead and we can remove it. If we know some
6153 // of the bits in the value, we can use that to more precisely compute the
6154 // number of possible unique case values.
6155 bool HasDefault = !SI->defaultDestUnreachable();
6156 const unsigned NumUnknownBits =
6157 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6158 assert(NumUnknownBits <= Known.getBitWidth());
6159 if (HasDefault && DeadCases.empty()) {
6160 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6162 return true;
6163 }
6164
6165 if (NumUnknownBits < 64 /* avoid overflow */) {
6166 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6167 if (SI->getNumCases() == AllNumCases) {
6169 return true;
6170 }
6171 // When only one case value is missing, replace default with that case.
6172 // Eliminating the default branch will provide more opportunities for
6173 // optimization, such as lookup tables.
6174 if (SI->getNumCases() == AllNumCases - 1) {
6175 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6176 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6177 if (CondTy->getIntegerBitWidth() > 64 ||
6178 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6179 return false;
6180
6181 uint64_t MissingCaseVal = 0;
6182 for (const auto &Case : SI->cases())
6183 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6184 auto *MissingCase = cast<ConstantInt>(
6185 ConstantInt::get(Cond->getType(), MissingCaseVal));
6187 SIW.addCase(MissingCase, SI->getDefaultDest(),
6188 SIW.getSuccessorWeight(0));
6190 /*RemoveOrigDefaultBlock*/ false);
6191 SIW.setSuccessorWeight(0, 0);
6192 return true;
6193 }
6194 }
6195 }
6196
6197 if (DeadCases.empty())
6198 return false;
6199
6201 for (ConstantInt *DeadCase : DeadCases) {
6202 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6203 assert(CaseI != SI->case_default() &&
6204 "Case was not found. Probably mistake in DeadCases forming.");
6205 // Prune unused values from PHI nodes.
6206 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6207 SIW.removeCase(CaseI);
6208 }
6209
6210 if (DTU) {
6211 std::vector<DominatorTree::UpdateType> Updates;
6212 for (auto *Successor : UniqueSuccessors)
6213 if (NumPerSuccessorCases[Successor] == 0)
6214 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6215 DTU->applyUpdates(Updates);
6216 }
6217
6218 return true;
6219}
6220
6221/// If BB would be eligible for simplification by
6222/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6223/// by an unconditional branch), look at the phi node for BB in the successor
6224/// block and see if the incoming value is equal to CaseValue. If so, return
6225/// the phi node, and set PhiIndex to BB's index in the phi node.
6227 BasicBlock *BB, int *PhiIndex) {
6228 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6229 return nullptr; // BB must be empty to be a candidate for simplification.
6230 if (!BB->getSinglePredecessor())
6231 return nullptr; // BB must be dominated by the switch.
6232
6234 if (!Branch || !Branch->isUnconditional())
6235 return nullptr; // Terminator must be unconditional branch.
6236
6237 BasicBlock *Succ = Branch->getSuccessor(0);
6238
6239 for (PHINode &PHI : Succ->phis()) {
6240 int Idx = PHI.getBasicBlockIndex(BB);
6241 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6242
6243 Value *InValue = PHI.getIncomingValue(Idx);
6244 if (InValue != CaseValue)
6245 continue;
6246
6247 *PhiIndex = Idx;
6248 return &PHI;
6249 }
6250
6251 return nullptr;
6252}
6253
6254/// Try to forward the condition of a switch instruction to a phi node
6255/// dominated by the switch, if that would mean that some of the destination
6256/// blocks of the switch can be folded away. Return true if a change is made.
6258 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6259
6260 ForwardingNodesMap ForwardingNodes;
6261 BasicBlock *SwitchBlock = SI->getParent();
6262 bool Changed = false;
6263 for (const auto &Case : SI->cases()) {
6264 ConstantInt *CaseValue = Case.getCaseValue();
6265 BasicBlock *CaseDest = Case.getCaseSuccessor();
6266
6267 // Replace phi operands in successor blocks that are using the constant case
6268 // value rather than the switch condition variable:
6269 // switchbb:
6270 // switch i32 %x, label %default [
6271 // i32 17, label %succ
6272 // ...
6273 // succ:
6274 // %r = phi i32 ... [ 17, %switchbb ] ...
6275 // -->
6276 // %r = phi i32 ... [ %x, %switchbb ] ...
6277
6278 for (PHINode &Phi : CaseDest->phis()) {
6279 // This only works if there is exactly 1 incoming edge from the switch to
6280 // a phi. If there is >1, that means multiple cases of the switch map to 1
6281 // value in the phi, and that phi value is not the switch condition. Thus,
6282 // this transform would not make sense (the phi would be invalid because
6283 // a phi can't have different incoming values from the same block).
6284 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6285 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6286 count(Phi.blocks(), SwitchBlock) == 1) {
6287 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6288 Changed = true;
6289 }
6290 }
6291
6292 // Collect phi nodes that are indirectly using this switch's case constants.
6293 int PhiIdx;
6294 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6295 ForwardingNodes[Phi].push_back(PhiIdx);
6296 }
6297
6298 for (auto &ForwardingNode : ForwardingNodes) {
6299 PHINode *Phi = ForwardingNode.first;
6300 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6301 // Check if it helps to fold PHI.
6302 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6303 continue;
6304
6305 for (int Index : Indexes)
6306 Phi->setIncomingValue(Index, SI->getCondition());
6307 Changed = true;
6308 }
6309
6310 return Changed;
6311}
6312
6313/// Return true if the backend will be able to handle
6314/// initializing an array of constants like C.
6316 if (C->isThreadDependent())
6317 return false;
6318 if (C->isDLLImportDependent())
6319 return false;
6320
6323 return false;
6324
6325 // Globals cannot contain scalable types.
6326 if (C->getType()->isScalableTy())
6327 return false;
6328
6330 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6331 // materializing the array of constants.
6332 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6333 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6334 return false;
6335 }
6336
6337 if (!TTI.shouldBuildLookupTablesForConstant(C))
6338 return false;
6339
6340 return true;
6341}
6342
6343/// If V is a Constant, return it. Otherwise, try to look up
6344/// its constant value in ConstantPool, returning 0 if it's not there.
6345static Constant *
6348 if (Constant *C = dyn_cast<Constant>(V))
6349 return C;
6350 return ConstantPool.lookup(V);
6351}
6352
6353/// Try to fold instruction I into a constant. This works for
6354/// simple instructions such as binary operations where both operands are
6355/// constant or can be replaced by constants from the ConstantPool. Returns the
6356/// resulting constant on success, 0 otherwise.
6357static Constant *
6361 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6362 if (!A)
6363 return nullptr;
6364 if (A->isAllOnesValue())
6365 return lookupConstant(Select->getTrueValue(), ConstantPool);
6366 if (A->isNullValue())
6367 return lookupConstant(Select->getFalseValue(), ConstantPool);
6368 return nullptr;
6369 }
6370
6372 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6373 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6374 COps.push_back(A);
6375 else
6376 return nullptr;
6377 }
6378
6379 return ConstantFoldInstOperands(I, COps, DL);
6380}
6381
6382/// Try to determine the resulting constant values in phi nodes
6383/// at the common destination basic block, *CommonDest, for one of the case
6384/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6385/// default case), of a switch instruction SI.
6386static bool
6388 BasicBlock **CommonDest,
6389 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6390 const DataLayout &DL, const TargetTransformInfo &TTI) {
6391 // The block from which we enter the common destination.
6392 BasicBlock *Pred = SI->getParent();
6393
6394 // If CaseDest is empty except for some side-effect free instructions through
6395 // which we can constant-propagate the CaseVal, continue to its successor.
6397 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6398 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6399 if (I.isTerminator()) {
6400 // If the terminator is a simple branch, continue to the next block.
6401 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6402 return false;
6403 Pred = CaseDest;
6404 CaseDest = I.getSuccessor(0);
6405 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6406 // Instruction is side-effect free and constant.
6407
6408 // If the instruction has uses outside this block or a phi node slot for
6409 // the block, it is not safe to bypass the instruction since it would then
6410 // no longer dominate all its uses.
6411 for (auto &Use : I.uses()) {
6412 User *User = Use.getUser();
6414 if (I->getParent() == CaseDest)
6415 continue;
6416 if (PHINode *Phi = dyn_cast<PHINode>(User))
6417 if (Phi->getIncomingBlock(Use) == CaseDest)
6418 continue;
6419 return false;
6420 }
6421
6422 ConstantPool.insert(std::make_pair(&I, C));
6423 } else {
6424 break;
6425 }
6426 }
6427
6428 // If we did not have a CommonDest before, use the current one.
6429 if (!*CommonDest)
6430 *CommonDest = CaseDest;
6431 // If the destination isn't the common one, abort.
6432 if (CaseDest != *CommonDest)
6433 return false;
6434
6435 // Get the values for this case from phi nodes in the destination block.
6436 for (PHINode &PHI : (*CommonDest)->phis()) {
6437 int Idx = PHI.getBasicBlockIndex(Pred);
6438 if (Idx == -1)
6439 continue;
6440
6441 Constant *ConstVal =
6442 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6443 if (!ConstVal)
6444 return false;
6445
6446 // Be conservative about which kinds of constants we support.
6447 if (!validLookupTableConstant(ConstVal, TTI))
6448 return false;
6449
6450 Res.push_back(std::make_pair(&PHI, ConstVal));
6451 }
6452
6453 return Res.size() > 0;
6454}
6455
6456// Helper function used to add CaseVal to the list of cases that generate
6457// Result. Returns the updated number of cases that generate this result.
6458static size_t mapCaseToResult(ConstantInt *CaseVal,
6459 SwitchCaseResultVectorTy &UniqueResults,
6460 Constant *Result) {
6461 for (auto &I : UniqueResults) {
6462 if (I.first == Result) {
6463 I.second.push_back(CaseVal);
6464 return I.second.size();
6465 }
6466 }
6467 UniqueResults.push_back(
6468 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6469 return 1;
6470}
6471
6472// Helper function that initializes a map containing
6473// results for the PHI node of the common destination block for a switch
6474// instruction. Returns false if multiple PHI nodes have been found or if
6475// there is not a common destination block for the switch.
6477 BasicBlock *&CommonDest,
6478 SwitchCaseResultVectorTy &UniqueResults,
6479 Constant *&DefaultResult,
6480 const DataLayout &DL,
6481 const TargetTransformInfo &TTI,
6482 uintptr_t MaxUniqueResults) {
6483 for (const auto &I : SI->cases()) {
6484 ConstantInt *CaseVal = I.getCaseValue();
6485
6486 // Resulting value at phi nodes for this case value.
6487 SwitchCaseResultsTy Results;
6488 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6489 DL, TTI))
6490 return false;
6491
6492 // Only one value per case is permitted.
6493 if (Results.size() > 1)
6494 return false;
6495
6496 // Add the case->result mapping to UniqueResults.
6497 const size_t NumCasesForResult =
6498 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6499
6500 // Early out if there are too many cases for this result.
6501 if (NumCasesForResult > MaxSwitchCasesPerResult)
6502 return false;
6503
6504 // Early out if there are too many unique results.
6505 if (UniqueResults.size() > MaxUniqueResults)
6506 return false;
6507
6508 // Check the PHI consistency.
6509 if (!PHI)
6510 PHI = Results[0].first;
6511 else if (PHI != Results[0].first)
6512 return false;
6513 }
6514 // Find the default result value.
6516 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6517 DL, TTI);
6518 // If the default value is not found abort unless the default destination
6519 // is unreachable.
6520 DefaultResult =
6521 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6522
6523 return DefaultResult || SI->defaultDestUnreachable();
6524}
6525
6526// Helper function that checks if it is possible to transform a switch with only
6527// two cases (or two cases + default) that produces a result into a select.
6528// TODO: Handle switches with more than 2 cases that map to the same result.
6529// The branch weights correspond to the provided Condition (i.e. if Condition is
6530// modified from the original SwitchInst, the caller must adjust the weights)
6531static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6532 Constant *DefaultResult, Value *Condition,
6533 IRBuilder<> &Builder, const DataLayout &DL,
6534 ArrayRef<uint32_t> BranchWeights) {
6535 // If we are selecting between only two cases transform into a simple
6536 // select or a two-way select if default is possible.
6537 // Example:
6538 // switch (a) { %0 = icmp eq i32 %a, 10
6539 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6540 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6541 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6542 // }
6543
6544 const bool HasBranchWeights =
6545 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6546
6547 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6548 ResultVector[1].second.size() == 1) {
6549 ConstantInt *FirstCase = ResultVector[0].second[0];
6550 ConstantInt *SecondCase = ResultVector[1].second[0];
6551 Value *SelectValue = ResultVector[1].first;
6552 if (DefaultResult) {
6553 Value *ValueCompare =
6554 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6555 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6556 DefaultResult, "switch.select");
6557 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6558 SI && HasBranchWeights) {
6559 // We start with 3 probabilities, where the numerator is the
6560 // corresponding BranchWeights[i], and the denominator is the sum over
6561 // BranchWeights. We want the probability and negative probability of
6562 // Condition == SecondCase.
6563 assert(BranchWeights.size() == 3);
6565 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6566 /*IsExpected=*/false, /*ElideAllZero=*/true);
6567 }
6568 }
6569 Value *ValueCompare =
6570 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6571 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6572 SelectValue, "switch.select");
6573 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6574 // We may have had a DefaultResult. Base the position of the first and
6575 // second's branch weights accordingly. Also the proability that Condition
6576 // != FirstCase needs to take that into account.
6577 assert(BranchWeights.size() >= 2);
6578 size_t FirstCasePos = (Condition != nullptr);
6579 size_t SecondCasePos = FirstCasePos + 1;
6580 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6582 {BranchWeights[FirstCasePos],
6583 DefaultCase + BranchWeights[SecondCasePos]},
6584 /*IsExpected=*/false, /*ElideAllZero=*/true);
6585 }
6586 return Ret;
6587 }
6588
6589 // Handle the degenerate case where two cases have the same result value.
6590 if (ResultVector.size() == 1 && DefaultResult) {
6591 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6592 unsigned CaseCount = CaseValues.size();
6593 // n bits group cases map to the same result:
6594 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6595 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6596 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6597 if (isPowerOf2_32(CaseCount)) {
6598 ConstantInt *MinCaseVal = CaseValues[0];
6599 // If there are bits that are set exclusively by CaseValues, we
6600 // can transform the switch into a select if the conjunction of
6601 // all the values uniquely identify CaseValues.
6602 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6603
6604 // Find the minimum value and compute the and of all the case values.
6605 for (auto *Case : CaseValues) {
6606 if (Case->getValue().slt(MinCaseVal->getValue()))
6607 MinCaseVal = Case;
6608 AndMask &= Case->getValue();
6609 }
6610 KnownBits Known = computeKnownBits(Condition, DL);
6611
6612 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6613 // Compute the number of bits that are free to vary.
6614 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6615
6616 // Check if the number of values covered by the mask is equal
6617 // to the number of cases.
6618 if (FreeBits == Log2_32(CaseCount)) {
6619 Value *And = Builder.CreateAnd(Condition, AndMask);
6620 Value *Cmp = Builder.CreateICmpEQ(
6621 And, Constant::getIntegerValue(And->getType(), AndMask));
6622 Value *Ret =
6623 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6624 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6625 // We know there's a Default case. We base the resulting branch
6626 // weights off its probability.
6627 assert(BranchWeights.size() >= 2);
6629 *SI,
6630 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6631 /*IsExpected=*/false, /*ElideAllZero=*/true);
6632 }
6633 return Ret;
6634 }
6635 }
6636
6637 // Mark the bits case number touched.
6638 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6639 for (auto *Case : CaseValues)
6640 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6641
6642 // Check if cases with the same result can cover all number
6643 // in touched bits.
6644 if (BitMask.popcount() == Log2_32(CaseCount)) {
6645 if (!MinCaseVal->isNullValue())
6646 Condition = Builder.CreateSub(Condition, MinCaseVal);
6647 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6648 Value *Cmp = Builder.CreateICmpEQ(
6649 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6650 Value *Ret =
6651 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6652 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6653 assert(BranchWeights.size() >= 2);
6655 *SI,
6656 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6657 /*IsExpected=*/false, /*ElideAllZero=*/true);
6658 }
6659 return Ret;
6660 }
6661 }
6662
6663 // Handle the degenerate case where two cases have the same value.
6664 if (CaseValues.size() == 2) {
6665 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6666 "switch.selectcmp.case1");
6667 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6668 "switch.selectcmp.case2");
6669 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6670 Value *Ret =
6671 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6672 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6673 assert(BranchWeights.size() >= 2);
6675 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6676 /*IsExpected=*/false, /*ElideAllZero=*/true);
6677 }
6678 return Ret;
6679 }
6680 }
6681
6682 return nullptr;
6683}
6684
6685// Helper function to cleanup a switch instruction that has been converted into
6686// a select, fixing up PHI nodes and basic blocks.
6688 Value *SelectValue,
6689 IRBuilder<> &Builder,
6690 DomTreeUpdater *DTU) {
6691 std::vector<DominatorTree::UpdateType> Updates;
6692
6693 BasicBlock *SelectBB = SI->getParent();
6694 BasicBlock *DestBB = PHI->getParent();
6695
6696 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6697 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6698 Builder.CreateBr(DestBB);
6699
6700 // Remove the switch.
6701
6702 PHI->removeIncomingValueIf(
6703 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6704 PHI->addIncoming(SelectValue, SelectBB);
6705
6706 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6707 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6708 BasicBlock *Succ = SI->getSuccessor(i);
6709
6710 if (Succ == DestBB)
6711 continue;
6712 Succ->removePredecessor(SelectBB);
6713 if (DTU && RemovedSuccessors.insert(Succ).second)
6714 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6715 }
6716 SI->eraseFromParent();
6717 if (DTU)
6718 DTU->applyUpdates(Updates);
6719}
6720
6721/// If a switch is only used to initialize one or more phi nodes in a common
6722/// successor block with only two different constant values, try to replace the
6723/// switch with a select. Returns true if the fold was made.
6725 DomTreeUpdater *DTU, const DataLayout &DL,
6726 const TargetTransformInfo &TTI) {
6727 Value *const Cond = SI->getCondition();
6728 PHINode *PHI = nullptr;
6729 BasicBlock *CommonDest = nullptr;
6730 Constant *DefaultResult;
6731 SwitchCaseResultVectorTy UniqueResults;
6732 // Collect all the cases that will deliver the same value from the switch.
6733 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6734 DL, TTI, /*MaxUniqueResults*/ 2))
6735 return false;
6736
6737 assert(PHI != nullptr && "PHI for value select not found");
6738 Builder.SetInsertPoint(SI);
6739 SmallVector<uint32_t, 4> BranchWeights;
6741 [[maybe_unused]] auto HasWeights =
6743 assert(!HasWeights == (BranchWeights.empty()));
6744 }
6745 assert(BranchWeights.empty() ||
6746 (BranchWeights.size() >=
6747 UniqueResults.size() + (DefaultResult != nullptr)));
6748
6749 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6750 Builder, DL, BranchWeights);
6751 if (!SelectValue)
6752 return false;
6753
6754 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6755 return true;
6756}
6757
6758namespace {
6759
6760/// This class finds alternatives for switches to ultimately
6761/// replace the switch.
6762class SwitchReplacement {
6763public:
6764 /// Create a helper for optimizations to use as a switch replacement.
6765 /// Find a better representation for the content of Values,
6766 /// using DefaultValue to fill any holes in the table.
6767 SwitchReplacement(
6768 Module &M, uint64_t TableSize, ConstantInt *Offset,
6769 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6770 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6771
6772 /// Build instructions with Builder to retrieve values using Index
6773 /// and replace the switch.
6774 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6775 Function *Func);
6776
6777 /// Return true if a table with TableSize elements of
6778 /// type ElementType would fit in a target-legal register.
6779 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6780 Type *ElementType);
6781
6782 /// Return the default value of the switch.
6783 Constant *getDefaultValue();
6784
6785 /// Return true if the replacement is a lookup table.
6786 bool isLookupTable();
6787
6788 /// Return true if the replacement is a bit map.
6789 bool isBitMap();
6790
6791private:
6792 // Depending on the switch, there are different alternatives.
6793 enum {
6794 // For switches where each case contains the same value, we just have to
6795 // store that single value and return it for each lookup.
6796 SingleValueKind,
6797
6798 // For switches where there is a linear relationship between table index
6799 // and values. We calculate the result with a simple multiplication
6800 // and addition instead of a table lookup.
6801 LinearMapKind,
6802
6803 // For small tables with integer elements, we can pack them into a bitmap
6804 // that fits into a target-legal register. Values are retrieved by
6805 // shift and mask operations.
6806 BitMapKind,
6807
6808 // The table is stored as an array of values. Values are retrieved by load
6809 // instructions from the table.
6810 LookupTableKind
6811 } Kind;
6812
6813 // The default value of the switch.
6814 Constant *DefaultValue;
6815
6816 // The type of the output values.
6817 Type *ValueType;
6818
6819 // For SingleValueKind, this is the single value.
6820 Constant *SingleValue = nullptr;
6821
6822 // For BitMapKind, this is the bitmap.
6823 ConstantInt *BitMap = nullptr;
6824 IntegerType *BitMapElementTy = nullptr;
6825
6826 // For LinearMapKind, these are the constants used to derive the value.
6827 ConstantInt *LinearOffset = nullptr;
6828 ConstantInt *LinearMultiplier = nullptr;
6829 bool LinearMapValWrapped = false;
6830
6831 // For LookupTableKind, this is the table.
6832 Constant *Initializer = nullptr;
6833};
6834
6835} // end anonymous namespace
6836
6837SwitchReplacement::SwitchReplacement(
6838 Module &M, uint64_t TableSize, ConstantInt *Offset,
6839 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6840 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6841 : DefaultValue(DefaultValue) {
6842 assert(Values.size() && "Can't build lookup table without values!");
6843 assert(TableSize >= Values.size() && "Can't fit values in table!");
6844
6845 // If all values in the table are equal, this is that value.
6846 SingleValue = Values.begin()->second;
6847
6848 ValueType = Values.begin()->second->getType();
6849
6850 // Build up the table contents.
6851 SmallVector<Constant *, 64> TableContents(TableSize);
6852 for (const auto &[CaseVal, CaseRes] : Values) {
6853 assert(CaseRes->getType() == ValueType);
6854
6855 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6856 TableContents[Idx] = CaseRes;
6857
6858 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6859 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6860 }
6861
6862 // Fill in any holes in the table with the default result.
6863 if (Values.size() < TableSize) {
6864 assert(DefaultValue &&
6865 "Need a default value to fill the lookup table holes.");
6866 assert(DefaultValue->getType() == ValueType);
6867 for (uint64_t I = 0; I < TableSize; ++I) {
6868 if (!TableContents[I])
6869 TableContents[I] = DefaultValue;
6870 }
6871
6872 // If the default value is poison, all the holes are poison.
6873 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6874
6875 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6876 SingleValue = nullptr;
6877 }
6878
6879 // If each element in the table contains the same value, we only need to store
6880 // that single value.
6881 if (SingleValue) {
6882 Kind = SingleValueKind;
6883 return;
6884 }
6885
6886 // Check if we can derive the value with a linear transformation from the
6887 // table index.
6889 bool LinearMappingPossible = true;
6890 APInt PrevVal;
6891 APInt DistToPrev;
6892 // When linear map is monotonic and signed overflow doesn't happen on
6893 // maximum index, we can attach nsw on Add and Mul.
6894 bool NonMonotonic = false;
6895 assert(TableSize >= 2 && "Should be a SingleValue table.");
6896 // Check if there is the same distance between two consecutive values.
6897 for (uint64_t I = 0; I < TableSize; ++I) {
6898 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6899
6900 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6901 // This is an poison, so it's (probably) a lookup table hole.
6902 // To prevent any regressions from before we switched to using poison as
6903 // the default value, holes will fall back to using the first value.
6904 // This can be removed once we add proper handling for poisons in lookup
6905 // tables.
6906 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6907 }
6908
6909 if (!ConstVal) {
6910 // This is an undef. We could deal with it, but undefs in lookup tables
6911 // are very seldom. It's probably not worth the additional complexity.
6912 LinearMappingPossible = false;
6913 break;
6914 }
6915 const APInt &Val = ConstVal->getValue();
6916 if (I != 0) {
6917 APInt Dist = Val - PrevVal;
6918 if (I == 1) {
6919 DistToPrev = Dist;
6920 } else if (Dist != DistToPrev) {
6921 LinearMappingPossible = false;
6922 break;
6923 }
6924 NonMonotonic |=
6925 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6926 }
6927 PrevVal = Val;
6928 }
6929 if (LinearMappingPossible) {
6930 LinearOffset = cast<ConstantInt>(TableContents[0]);
6931 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6932 APInt M = LinearMultiplier->getValue();
6933 bool MayWrap = true;
6934 if (isIntN(M.getBitWidth(), TableSize - 1))
6935 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6936 LinearMapValWrapped = NonMonotonic || MayWrap;
6937 Kind = LinearMapKind;
6938 return;
6939 }
6940 }
6941
6942 // If the type is integer and the table fits in a register, build a bitmap.
6943 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6945 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6946 for (uint64_t I = TableSize; I > 0; --I) {
6947 TableInt <<= IT->getBitWidth();
6948 // Insert values into the bitmap. Undef values are set to zero.
6949 if (!isa<UndefValue>(TableContents[I - 1])) {
6950 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6951 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6952 }
6953 }
6954 BitMap = ConstantInt::get(M.getContext(), TableInt);
6955 BitMapElementTy = IT;
6956 Kind = BitMapKind;
6957 return;
6958 }
6959
6960 // Store the table in an array.
6961 auto *TableTy = ArrayType::get(ValueType, TableSize);
6962 Initializer = ConstantArray::get(TableTy, TableContents);
6963
6964 Kind = LookupTableKind;
6965}
6966
6967Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6968 const DataLayout &DL, Function *Func) {
6969 switch (Kind) {
6970 case SingleValueKind:
6971 return SingleValue;
6972 case LinearMapKind: {
6973 ++NumLinearMaps;
6974 // Derive the result value from the input value.
6975 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6976 false, "switch.idx.cast");
6977 if (!LinearMultiplier->isOne())
6978 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6979 /*HasNUW = */ false,
6980 /*HasNSW = */ !LinearMapValWrapped);
6981
6982 if (!LinearOffset->isZero())
6983 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6984 /*HasNUW = */ false,
6985 /*HasNSW = */ !LinearMapValWrapped);
6986 return Result;
6987 }
6988 case BitMapKind: {
6989 ++NumBitMaps;
6990 // Type of the bitmap (e.g. i59).
6991 IntegerType *MapTy = BitMap->getIntegerType();
6992
6993 // Cast Index to the same type as the bitmap.
6994 // Note: The Index is <= the number of elements in the table, so
6995 // truncating it to the width of the bitmask is safe.
6996 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6997
6998 // Multiply the shift amount by the element width. NUW/NSW can always be
6999 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
7000 // BitMap's bit width.
7001 ShiftAmt = Builder.CreateMul(
7002 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7003 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7004
7005 // Shift down.
7006 Value *DownShifted =
7007 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7008 // Mask off.
7009 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7010 }
7011 case LookupTableKind: {
7012 ++NumLookupTables;
7013 auto *Table =
7014 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7015 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7016 Initializer, "switch.table." + Func->getName());
7017 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7018 // Set the alignment to that of an array items. We will be only loading one
7019 // value out of it.
7020 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7021 Type *IndexTy = DL.getIndexType(Table->getType());
7022 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7023
7024 if (Index->getType() != IndexTy) {
7025 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7026 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7027 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7028 Zext->setNonNeg(
7029 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7030 }
7031
7032 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7033 Value *GEP =
7034 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7035 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7036 }
7037 }
7038 llvm_unreachable("Unknown helper kind!");
7039}
7040
7041bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7042 uint64_t TableSize,
7043 Type *ElementType) {
7044 auto *IT = dyn_cast<IntegerType>(ElementType);
7045 if (!IT)
7046 return false;
7047 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7048 // are <= 15, we could try to narrow the type.
7049
7050 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7051 if (TableSize >= UINT_MAX / IT->getBitWidth())
7052 return false;
7053 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7054}
7055
7057 const DataLayout &DL) {
7058 // Allow any legal type.
7059 if (TTI.isTypeLegal(Ty))
7060 return true;
7061
7062 auto *IT = dyn_cast<IntegerType>(Ty);
7063 if (!IT)
7064 return false;
7065
7066 // Also allow power of 2 integer types that have at least 8 bits and fit in
7067 // a register. These types are common in frontend languages and targets
7068 // usually support loads of these types.
7069 // TODO: We could relax this to any integer that fits in a register and rely
7070 // on ABI alignment and padding in the table to allow the load to be widened.
7071 // Or we could widen the constants and truncate the load.
7072 unsigned BitWidth = IT->getBitWidth();
7073 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7074 DL.fitsInLegalInteger(IT->getBitWidth());
7075}
7076
7077Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7078
7079bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7080
7081bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7082
7083static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7084 // 40% is the default density for building a jump table in optsize/minsize
7085 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7086 // function was based on.
7087 const uint64_t MinDensity = 40;
7088
7089 if (CaseRange >= UINT64_MAX / 100)
7090 return false; // Avoid multiplication overflows below.
7091
7092 return NumCases * 100 >= CaseRange * MinDensity;
7093}
7094
7096 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7097 uint64_t Range = Diff + 1;
7098 if (Range < Diff)
7099 return false; // Overflow.
7100
7101 return isSwitchDense(Values.size(), Range);
7102}
7103
7104/// Determine whether a lookup table should be built for this switch, based on
7105/// the number of cases, size of the table, and the types of the results.
7106// TODO: We could support larger than legal types by limiting based on the
7107// number of loads required and/or table size. If the constants are small we
7108// could use smaller table entries and extend after the load.
7110 const TargetTransformInfo &TTI,
7111 const DataLayout &DL,
7112 const SmallVector<Type *> &ResultTypes) {
7113 if (SI->getNumCases() > TableSize)
7114 return false; // TableSize overflowed.
7115
7116 bool AllTablesFitInRegister = true;
7117 bool HasIllegalType = false;
7118 for (const auto &Ty : ResultTypes) {
7119 // Saturate this flag to true.
7120 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7121
7122 // Saturate this flag to false.
7123 AllTablesFitInRegister =
7124 AllTablesFitInRegister &&
7125 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7126
7127 // If both flags saturate, we're done. NOTE: This *only* works with
7128 // saturating flags, and all flags have to saturate first due to the
7129 // non-deterministic behavior of iterating over a dense map.
7130 if (HasIllegalType && !AllTablesFitInRegister)
7131 break;
7132 }
7133
7134 // If each table would fit in a register, we should build it anyway.
7135 if (AllTablesFitInRegister)
7136 return true;
7137
7138 // Don't build a table that doesn't fit in-register if it has illegal types.
7139 if (HasIllegalType)
7140 return false;
7141
7142 return isSwitchDense(SI->getNumCases(), TableSize);
7143}
7144
7146 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7147 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7148 const DataLayout &DL, const TargetTransformInfo &TTI) {
7149 if (MinCaseVal.isNullValue())
7150 return true;
7151 if (MinCaseVal.isNegative() ||
7152 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7153 !HasDefaultResults)
7154 return false;
7155 return all_of(ResultTypes, [&](const auto &ResultType) {
7156 return SwitchReplacement::wouldFitInRegister(
7157 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7158 });
7159}
7160
7161/// Try to reuse the switch table index compare. Following pattern:
7162/// \code
7163/// if (idx < tablesize)
7164/// r = table[idx]; // table does not contain default_value
7165/// else
7166/// r = default_value;
7167/// if (r != default_value)
7168/// ...
7169/// \endcode
7170/// Is optimized to:
7171/// \code
7172/// cond = idx < tablesize;
7173/// if (cond)
7174/// r = table[idx];
7175/// else
7176/// r = default_value;
7177/// if (cond)
7178/// ...
7179/// \endcode
7180/// Jump threading will then eliminate the second if(cond).
7182 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7183 Constant *DefaultValue,
7184 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7186 if (!CmpInst)
7187 return;
7188
7189 // We require that the compare is in the same block as the phi so that jump
7190 // threading can do its work afterwards.
7191 if (CmpInst->getParent() != PhiBlock)
7192 return;
7193
7195 if (!CmpOp1)
7196 return;
7197
7198 Value *RangeCmp = RangeCheckBranch->getCondition();
7199 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7200 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7201
7202 // Check if the compare with the default value is constant true or false.
7203 const DataLayout &DL = PhiBlock->getDataLayout();
7205 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7206 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7207 return;
7208
7209 // Check if the compare with the case values is distinct from the default
7210 // compare result.
7211 for (auto ValuePair : Values) {
7213 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7214 if (!CaseConst || CaseConst == DefaultConst ||
7215 (CaseConst != TrueConst && CaseConst != FalseConst))
7216 return;
7217 }
7218
7219 // Check if the branch instruction dominates the phi node. It's a simple
7220 // dominance check, but sufficient for our needs.
7221 // Although this check is invariant in the calling loops, it's better to do it
7222 // at this late stage. Practically we do it at most once for a switch.
7223 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7224 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7225 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7226 return;
7227 }
7228
7229 if (DefaultConst == FalseConst) {
7230 // The compare yields the same result. We can replace it.
7231 CmpInst->replaceAllUsesWith(RangeCmp);
7232 ++NumTableCmpReuses;
7233 } else {
7234 // The compare yields the same result, just inverted. We can replace it.
7235 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7236 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7237 RangeCheckBranch->getIterator());
7238 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7239 ++NumTableCmpReuses;
7240 }
7241}
7242
7243/// If the switch is only used to initialize one or more phi nodes in a common
7244/// successor block with different constant values, replace the switch with
7245/// lookup tables.
7247 DomTreeUpdater *DTU, const DataLayout &DL,
7248 const TargetTransformInfo &TTI,
7249 bool ConvertSwitchToLookupTable) {
7250 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7251
7252 BasicBlock *BB = SI->getParent();
7253 Function *Fn = BB->getParent();
7254
7255 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7256 // split off a dense part and build a lookup table for that.
7257
7258 // FIXME: This creates arrays of GEPs to constant strings, which means each
7259 // GEP needs a runtime relocation in PIC code. We should just build one big
7260 // string and lookup indices into that.
7261
7262 // Ignore switches with less than three cases. Lookup tables will not make
7263 // them faster, so we don't analyze them.
7264 if (SI->getNumCases() < 3)
7265 return false;
7266
7267 // Figure out the corresponding result for each case value and phi node in the
7268 // common destination, as well as the min and max case values.
7269 assert(!SI->cases().empty());
7270 SwitchInst::CaseIt CI = SI->case_begin();
7271 ConstantInt *MinCaseVal = CI->getCaseValue();
7272 ConstantInt *MaxCaseVal = CI->getCaseValue();
7273
7274 BasicBlock *CommonDest = nullptr;
7275
7276 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7278
7280 SmallVector<Type *> ResultTypes;
7282
7283 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7284 ConstantInt *CaseVal = CI->getCaseValue();
7285 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7286 MinCaseVal = CaseVal;
7287 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7288 MaxCaseVal = CaseVal;
7289
7290 // Resulting value at phi nodes for this case value.
7292 ResultsTy Results;
7293 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7294 Results, DL, TTI))
7295 return false;
7296
7297 // Append the result and result types from this case to the list for each
7298 // phi.
7299 for (const auto &I : Results) {
7300 PHINode *PHI = I.first;
7301 Constant *Value = I.second;
7302 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7303 if (Inserted)
7304 PHIs.push_back(PHI);
7305 It->second.push_back(std::make_pair(CaseVal, Value));
7306 ResultTypes.push_back(PHI->getType());
7307 }
7308 }
7309
7310 // If the table has holes, we need a constant result for the default case
7311 // or a bitmask that fits in a register.
7312 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7313 bool HasDefaultResults =
7314 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7315 DefaultResultsList, DL, TTI);
7316 for (const auto &I : DefaultResultsList) {
7317 PHINode *PHI = I.first;
7318 Constant *Result = I.second;
7319 DefaultResults[PHI] = Result;
7320 }
7321
7322 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7323 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7324 uint64_t TableSize;
7325 ConstantInt *TableIndexOffset;
7326 if (UseSwitchConditionAsTableIndex) {
7327 TableSize = MaxCaseVal->getLimitedValue() + 1;
7328 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7329 } else {
7330 TableSize =
7331 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7332
7333 TableIndexOffset = MinCaseVal;
7334 }
7335
7336 // If the default destination is unreachable, or if the lookup table covers
7337 // all values of the conditional variable, branch directly to the lookup table
7338 // BB. Otherwise, check that the condition is within the case range.
7339 uint64_t NumResults = ResultLists[PHIs[0]].size();
7340 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7341
7342 bool TableHasHoles = (NumResults < TableSize);
7343
7344 // If the table has holes but the default destination doesn't produce any
7345 // constant results, the lookup table entries corresponding to the holes will
7346 // contain poison.
7347 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7348
7349 // If the default destination doesn't produce a constant result but is still
7350 // reachable, and the lookup table has holes, we need to use a mask to
7351 // determine if the current index should load from the lookup table or jump
7352 // to the default case.
7353 // The mask is unnecessary if the table has holes but the default destination
7354 // is unreachable, as in that case the holes must also be unreachable.
7355 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7356 if (NeedMask) {
7357 // As an extra penalty for the validity test we require more cases.
7358 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7359 return false;
7360 if (!DL.fitsInLegalInteger(TableSize))
7361 return false;
7362 }
7363
7364 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7365 return false;
7366
7367 // Compute the table index value.
7368 Value *TableIndex;
7369 if (UseSwitchConditionAsTableIndex) {
7370 TableIndex = SI->getCondition();
7371 if (HasDefaultResults) {
7372 // Grow the table to cover all possible index values to avoid the range
7373 // check. It will use the default result to fill in the table hole later,
7374 // so make sure it exist.
7375 ConstantRange CR =
7376 computeConstantRange(TableIndex, /* ForSigned */ false);
7377 // Grow the table shouldn't have any size impact by checking
7378 // wouldFitInRegister.
7379 // TODO: Consider growing the table also when it doesn't fit in a register
7380 // if no optsize is specified.
7381 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7382 if (!CR.isUpperWrapped() &&
7383 all_of(ResultTypes, [&](const auto &ResultType) {
7384 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7385 ResultType);
7386 })) {
7387 // There may be some case index larger than the UpperBound (unreachable
7388 // case), so make sure the table size does not get smaller.
7389 TableSize = std::max(UpperBound, TableSize);
7390 // The default branch is unreachable after we enlarge the lookup table.
7391 // Adjust DefaultIsReachable to reuse code path.
7392 DefaultIsReachable = false;
7393 }
7394 }
7395 }
7396
7397 // Keep track of the switch replacement for each phi
7399 for (PHINode *PHI : PHIs) {
7400 const auto &ResultList = ResultLists[PHI];
7401
7402 Type *ResultType = ResultList.begin()->second->getType();
7403 // Use any value to fill the lookup table holes.
7404 Constant *DefaultVal =
7405 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7406 StringRef FuncName = Fn->getName();
7407 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7408 ResultList, DefaultVal, DL, FuncName);
7409 PhiToReplacementMap.insert({PHI, Replacement});
7410 }
7411
7412 bool AnyLookupTables = any_of(
7413 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7414 bool AnyBitMaps = any_of(PhiToReplacementMap,
7415 [](auto &KV) { return KV.second.isBitMap(); });
7416
7417 // A few conditions prevent the generation of lookup tables:
7418 // 1. The target does not support lookup tables.
7419 // 2. The "no-jump-tables" function attribute is set.
7420 // However, these objections do not apply to other switch replacements, like
7421 // the bitmap, so we only stop here if any of these conditions are met and we
7422 // want to create a LUT. Otherwise, continue with the switch replacement.
7423 if (AnyLookupTables &&
7424 (!TTI.shouldBuildLookupTables() ||
7425 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7426 return false;
7427
7428 // In the early optimization pipeline, disable formation of lookup tables,
7429 // bit maps and mask checks, as they may inhibit further optimization.
7430 if (!ConvertSwitchToLookupTable &&
7431 (AnyLookupTables || AnyBitMaps || NeedMask))
7432 return false;
7433
7434 Builder.SetInsertPoint(SI);
7435 // TableIndex is the switch condition - TableIndexOffset if we don't
7436 // use the condition directly
7437 if (!UseSwitchConditionAsTableIndex) {
7438 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7439 // we can try to attach nsw.
7440 bool MayWrap = true;
7441 if (!DefaultIsReachable) {
7442 APInt Res =
7443 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7444 (void)Res;
7445 }
7446 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7447 "switch.tableidx", /*HasNUW =*/false,
7448 /*HasNSW =*/!MayWrap);
7449 }
7450
7451 std::vector<DominatorTree::UpdateType> Updates;
7452
7453 // Compute the maximum table size representable by the integer type we are
7454 // switching upon.
7455 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7456 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7457 assert(MaxTableSize >= TableSize &&
7458 "It is impossible for a switch to have more entries than the max "
7459 "representable value of its input integer type's size.");
7460
7461 // Create the BB that does the lookups.
7462 Module &Mod = *CommonDest->getParent()->getParent();
7463 BasicBlock *LookupBB = BasicBlock::Create(
7464 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7465
7466 BranchInst *RangeCheckBranch = nullptr;
7467 BranchInst *CondBranch = nullptr;
7468
7469 Builder.SetInsertPoint(SI);
7470 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7471 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7472 Builder.CreateBr(LookupBB);
7473 if (DTU)
7474 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7475 // Note: We call removeProdecessor later since we need to be able to get the
7476 // PHI value for the default case in case we're using a bit mask.
7477 } else {
7478 Value *Cmp = Builder.CreateICmpULT(
7479 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7480 RangeCheckBranch =
7481 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7482 CondBranch = RangeCheckBranch;
7483 if (DTU)
7484 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7485 }
7486
7487 // Populate the BB that does the lookups.
7488 Builder.SetInsertPoint(LookupBB);
7489
7490 if (NeedMask) {
7491 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7492 // re-purposed to do the hole check, and we create a new LookupBB.
7493 BasicBlock *MaskBB = LookupBB;
7494 MaskBB->setName("switch.hole_check");
7495 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7496 CommonDest->getParent(), CommonDest);
7497
7498 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7499 // unnecessary illegal types.
7500 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7501 APInt MaskInt(TableSizePowOf2, 0);
7502 APInt One(TableSizePowOf2, 1);
7503 // Build bitmask; fill in a 1 bit for every case.
7504 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7505 for (const auto &Result : ResultList) {
7506 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7507 .getLimitedValue();
7508 MaskInt |= One << Idx;
7509 }
7510 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7511
7512 // Get the TableIndex'th bit of the bitmask.
7513 // If this bit is 0 (meaning hole) jump to the default destination,
7514 // else continue with table lookup.
7515 IntegerType *MapTy = TableMask->getIntegerType();
7516 Value *MaskIndex =
7517 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7518 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7519 Value *LoBit = Builder.CreateTrunc(
7520 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7521 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7522 if (DTU) {
7523 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7524 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7525 }
7526 Builder.SetInsertPoint(LookupBB);
7527 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7528 }
7529
7530 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7531 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7532 // do not delete PHINodes here.
7533 SI->getDefaultDest()->removePredecessor(BB,
7534 /*KeepOneInputPHIs=*/true);
7535 if (DTU)
7536 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7537 }
7538
7539 for (PHINode *PHI : PHIs) {
7540 const ResultListTy &ResultList = ResultLists[PHI];
7541 auto Replacement = PhiToReplacementMap.at(PHI);
7542 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7543 // Do a small peephole optimization: re-use the switch table compare if
7544 // possible.
7545 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7546 BasicBlock *PhiBlock = PHI->getParent();
7547 // Search for compare instructions which use the phi.
7548 for (auto *User : PHI->users()) {
7549 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7550 Replacement.getDefaultValue(), ResultList);
7551 }
7552 }
7553
7554 PHI->addIncoming(Result, LookupBB);
7555 }
7556
7557 Builder.CreateBr(CommonDest);
7558 if (DTU)
7559 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7560
7561 SmallVector<uint32_t> BranchWeights;
7562 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7563 extractBranchWeights(*SI, BranchWeights);
7564 uint64_t ToLookupWeight = 0;
7565 uint64_t ToDefaultWeight = 0;
7566
7567 // Remove the switch.
7568 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7569 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7570 BasicBlock *Succ = SI->getSuccessor(I);
7571
7572 if (Succ == SI->getDefaultDest()) {
7573 if (HasBranchWeights)
7574 ToDefaultWeight += BranchWeights[I];
7575 continue;
7576 }
7577 Succ->removePredecessor(BB);
7578 if (DTU && RemovedSuccessors.insert(Succ).second)
7579 Updates.push_back({DominatorTree::Delete, BB, Succ});
7580 if (HasBranchWeights)
7581 ToLookupWeight += BranchWeights[I];
7582 }
7583 SI->eraseFromParent();
7584 if (HasBranchWeights)
7585 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7586 /*IsExpected=*/false);
7587 if (DTU)
7588 DTU->applyUpdates(Updates);
7589
7590 if (NeedMask)
7591 ++NumLookupTablesHoles;
7592 return true;
7593}
7594
7595/// Try to transform a switch that has "holes" in it to a contiguous sequence
7596/// of cases.
7597///
7598/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7599/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7600///
7601/// This converts a sparse switch into a dense switch which allows better
7602/// lowering and could also allow transforming into a lookup table.
7604 const DataLayout &DL,
7605 const TargetTransformInfo &TTI) {
7606 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7607 if (CondTy->getIntegerBitWidth() > 64 ||
7608 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7609 return false;
7610 // Only bother with this optimization if there are more than 3 switch cases;
7611 // SDAG will only bother creating jump tables for 4 or more cases.
7612 if (SI->getNumCases() < 4)
7613 return false;
7614
7615 // This transform is agnostic to the signedness of the input or case values. We
7616 // can treat the case values as signed or unsigned. We can optimize more common
7617 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7618 // as signed.
7620 for (const auto &C : SI->cases())
7621 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7622 llvm::sort(Values);
7623
7624 // If the switch is already dense, there's nothing useful to do here.
7625 if (isSwitchDense(Values))
7626 return false;
7627
7628 // First, transform the values such that they start at zero and ascend.
7629 int64_t Base = Values[0];
7630 for (auto &V : Values)
7631 V -= (uint64_t)(Base);
7632
7633 // Now we have signed numbers that have been shifted so that, given enough
7634 // precision, there are no negative values. Since the rest of the transform
7635 // is bitwise only, we switch now to an unsigned representation.
7636
7637 // This transform can be done speculatively because it is so cheap - it
7638 // results in a single rotate operation being inserted.
7639
7640 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7641 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7642 // less than 64.
7643 unsigned Shift = 64;
7644 for (auto &V : Values)
7645 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7646 assert(Shift < 64);
7647 if (Shift > 0)
7648 for (auto &V : Values)
7649 V = (int64_t)((uint64_t)V >> Shift);
7650
7651 if (!isSwitchDense(Values))
7652 // Transform didn't create a dense switch.
7653 return false;
7654
7655 // The obvious transform is to shift the switch condition right and emit a
7656 // check that the condition actually cleanly divided by GCD, i.e.
7657 // C & (1 << Shift - 1) == 0
7658 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7659 //
7660 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7661 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7662 // are nonzero then the switch condition will be very large and will hit the
7663 // default case.
7664
7665 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7666 Builder.SetInsertPoint(SI);
7667 Value *Sub =
7668 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7669 Value *Rot = Builder.CreateIntrinsic(
7670 Ty, Intrinsic::fshl,
7671 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7672 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7673
7674 for (auto Case : SI->cases()) {
7675 auto *Orig = Case.getCaseValue();
7676 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7677 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7678 }
7679 return true;
7680}
7681
7682/// Tries to transform the switch when the condition is umin with a constant.
7683/// In that case, the default branch can be replaced by the constant's branch.
7684/// This method also removes dead cases when the simplification cannot replace
7685/// the default branch.
7686///
7687/// For example:
7688/// switch(umin(a, 3)) {
7689/// case 0:
7690/// case 1:
7691/// case 2:
7692/// case 3:
7693/// case 4:
7694/// // ...
7695/// default:
7696/// unreachable
7697/// }
7698///
7699/// Transforms into:
7700///
7701/// switch(a) {
7702/// case 0:
7703/// case 1:
7704/// case 2:
7705/// default:
7706/// // This is case 3
7707/// }
7709 Value *A;
7711
7712 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7713 return false;
7714
7717 BasicBlock *BB = SIW->getParent();
7718
7719 // Dead cases are removed even when the simplification fails.
7720 // A case is dead when its value is higher than the Constant.
7721 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7722 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7723 ++I;
7724 continue;
7725 }
7726 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7727 DeadCaseBB->removePredecessor(BB);
7728 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7729 I = SIW.removeCase(I);
7730 E = SIW->case_end();
7731 }
7732
7733 auto Case = SI->findCaseValue(Constant);
7734 // If the case value is not found, `findCaseValue` returns the default case.
7735 // In this scenario, since there is no explicit `case 3:`, the simplification
7736 // fails. The simplification also fails when the switch’s default destination
7737 // is reachable.
7738 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7739 if (DTU)
7740 DTU->applyUpdates(Updates);
7741 return !Updates.empty();
7742 }
7743
7744 BasicBlock *Unreachable = SI->getDefaultDest();
7745 SIW.replaceDefaultDest(Case);
7746 SIW.removeCase(Case);
7747 SIW->setCondition(A);
7748
7749 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7750
7751 if (DTU)
7752 DTU->applyUpdates(Updates);
7753
7754 return true;
7755}
7756
7757/// Tries to transform switch of powers of two to reduce switch range.
7758/// For example, switch like:
7759/// switch (C) { case 1: case 2: case 64: case 128: }
7760/// will be transformed to:
7761/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7762///
7763/// This transformation allows better lowering and may transform the switch
7764/// instruction into a sequence of bit manipulation and a smaller
7765/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7766/// address of the jump target, and indirectly jump to it).
7768 DomTreeUpdater *DTU,
7769 const DataLayout &DL,
7770 const TargetTransformInfo &TTI) {
7771 Value *Condition = SI->getCondition();
7772 LLVMContext &Context = SI->getContext();
7773 auto *CondTy = cast<IntegerType>(Condition->getType());
7774
7775 if (CondTy->getIntegerBitWidth() > 64 ||
7776 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7777 return false;
7778
7779 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7780 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7781 {Condition, ConstantInt::getTrue(Context)});
7782 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7783 TTI::TCC_Basic * 2)
7784 return false;
7785
7786 // Only bother with this optimization if there are more than 3 switch cases.
7787 // SDAG will start emitting jump tables for 4 or more cases.
7788 if (SI->getNumCases() < 4)
7789 return false;
7790
7791 // Check that switch cases are powers of two.
7793 for (const auto &Case : SI->cases()) {
7794 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7795 if (llvm::has_single_bit(CaseValue))
7796 Values.push_back(CaseValue);
7797 else
7798 return false;
7799 }
7800
7801 // isSwichDense requires case values to be sorted.
7802 llvm::sort(Values);
7803 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7804 llvm::countr_zero(Values.front()) + 1))
7805 // Transform is unable to generate dense switch.
7806 return false;
7807
7808 Builder.SetInsertPoint(SI);
7809
7810 if (!SI->defaultDestUnreachable()) {
7811 // Let non-power-of-two inputs jump to the default case, when the latter is
7812 // reachable.
7813 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7814 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7815
7816 auto *OrigBB = SI->getParent();
7817 auto *DefaultCaseBB = SI->getDefaultDest();
7818 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7819 auto It = OrigBB->getTerminator()->getIterator();
7820 SmallVector<uint32_t> Weights;
7821 auto HasWeights =
7823 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7824 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7825 // IsPow2 covers a subset of the cases in which we'd go to the default
7826 // label. The other is those powers of 2 that don't appear in the case
7827 // statement. We don't know the distribution of the values coming in, so
7828 // the safest is to split 50-50 the original probability to `default`.
7829 uint64_t OrigDenominator =
7831 SmallVector<uint64_t> NewWeights(2);
7832 NewWeights[1] = Weights[0] / 2;
7833 NewWeights[0] = OrigDenominator - NewWeights[1];
7834 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7835 // The probability of executing the default block stays constant. It was
7836 // p_d = Weights[0] / OrigDenominator
7837 // we rewrite as W/D
7838 // We want to find the probability of the default branch of the switch
7839 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7840 // i.e. the original probability is the probability we go to the default
7841 // branch from the BI branch, or we take the default branch on the SI.
7842 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7843 // This matches using W/2 for the default branch probability numerator and
7844 // D-W/2 as the denominator.
7845 Weights[0] = NewWeights[1];
7846 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7847 for (auto &W : drop_begin(Weights))
7848 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7849
7850 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7851 }
7852 // BI is handling the default case for SI, and so should share its DebugLoc.
7853 BI->setDebugLoc(SI->getDebugLoc());
7854 It->eraseFromParent();
7855
7856 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7857 if (DTU)
7858 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7859 }
7860
7861 // Replace each case with its trailing zeros number.
7862 for (auto &Case : SI->cases()) {
7863 auto *OrigValue = Case.getCaseValue();
7864 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7865 OrigValue->getValue().countr_zero()));
7866 }
7867
7868 // Replace condition with its trailing zeros number.
7869 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7870 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7871
7872 SI->setCondition(ConditionTrailingZeros);
7873
7874 return true;
7875}
7876
7877/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7878/// the same destination.
7880 DomTreeUpdater *DTU) {
7881 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7882 if (!Cmp || !Cmp->hasOneUse())
7883 return false;
7884
7886 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7887 if (!HasWeights)
7888 Weights.resize(4); // Avoid checking HasWeights everywhere.
7889
7890 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7891 int64_t Res;
7892 BasicBlock *Succ, *OtherSucc;
7893 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7894 BasicBlock *Unreachable = nullptr;
7895
7896 if (SI->getNumCases() == 2) {
7897 // Find which of 1, 0 or -1 is missing (handled by default dest).
7898 SmallSet<int64_t, 3> Missing;
7899 Missing.insert(1);
7900 Missing.insert(0);
7901 Missing.insert(-1);
7902
7903 Succ = SI->getDefaultDest();
7904 SuccWeight = Weights[0];
7905 OtherSucc = nullptr;
7906 for (auto &Case : SI->cases()) {
7907 std::optional<int64_t> Val =
7908 Case.getCaseValue()->getValue().trySExtValue();
7909 if (!Val)
7910 return false;
7911 if (!Missing.erase(*Val))
7912 return false;
7913 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7914 return false;
7915 OtherSucc = Case.getCaseSuccessor();
7916 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7917 }
7918
7919 assert(Missing.size() == 1 && "Should have one case left");
7920 Res = *Missing.begin();
7921 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7922 // Normalize so that Succ is taken once and OtherSucc twice.
7923 Unreachable = SI->getDefaultDest();
7924 Succ = OtherSucc = nullptr;
7925 for (auto &Case : SI->cases()) {
7926 BasicBlock *NewSucc = Case.getCaseSuccessor();
7927 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7928 if (!OtherSucc || OtherSucc == NewSucc) {
7929 OtherSucc = NewSucc;
7930 OtherSuccWeight += Weight;
7931 } else if (!Succ) {
7932 Succ = NewSucc;
7933 SuccWeight = Weight;
7934 } else if (Succ == NewSucc) {
7935 std::swap(Succ, OtherSucc);
7936 std::swap(SuccWeight, OtherSuccWeight);
7937 } else
7938 return false;
7939 }
7940 for (auto &Case : SI->cases()) {
7941 std::optional<int64_t> Val =
7942 Case.getCaseValue()->getValue().trySExtValue();
7943 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7944 return false;
7945 if (Case.getCaseSuccessor() == Succ) {
7946 Res = *Val;
7947 break;
7948 }
7949 }
7950 } else {
7951 return false;
7952 }
7953
7954 // Determine predicate for the missing case.
7956 switch (Res) {
7957 case 1:
7958 Pred = ICmpInst::ICMP_UGT;
7959 break;
7960 case 0:
7961 Pred = ICmpInst::ICMP_EQ;
7962 break;
7963 case -1:
7964 Pred = ICmpInst::ICMP_ULT;
7965 break;
7966 }
7967 if (Cmp->isSigned())
7968 Pred = ICmpInst::getSignedPredicate(Pred);
7969
7970 MDNode *NewWeights = nullptr;
7971 if (HasWeights)
7972 NewWeights = MDBuilder(SI->getContext())
7973 .createBranchWeights(SuccWeight, OtherSuccWeight);
7974
7975 BasicBlock *BB = SI->getParent();
7976 Builder.SetInsertPoint(SI->getIterator());
7977 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7978 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7979 SI->getMetadata(LLVMContext::MD_unpredictable));
7980 OtherSucc->removePredecessor(BB);
7981 if (Unreachable)
7982 Unreachable->removePredecessor(BB);
7983 SI->eraseFromParent();
7984 Cmp->eraseFromParent();
7985 if (DTU && Unreachable)
7986 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7987 return true;
7988}
7989
7990/// Checking whether two cases of SI are equal depends on the contents of the
7991/// BasicBlock and the incoming values of their successor PHINodes.
7992/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7993/// calling this function on each BasicBlock every time isEqual is called,
7994/// especially since the same BasicBlock may be passed as an argument multiple
7995/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7996/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7997/// of the incoming values.
8002
8005 return static_cast<SwitchSuccWrapper *>(
8007 }
8009 return static_cast<SwitchSuccWrapper *>(
8011 }
8012 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8013 BasicBlock *Succ = SSW->Dest;
8015 assert(BI->isUnconditional() &&
8016 "Only supporting unconditional branches for now");
8017 assert(BI->getNumSuccessors() == 1 &&
8018 "Expected unconditional branches to have one successor");
8019 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8020
8021 // Since we assume the BB is just a single BranchInst with a single
8022 // successor, we hash as the BB and the incoming Values of its successor
8023 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8024 // including the incoming PHI values leads to better performance.
8025 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8026 // time and passing it in SwitchSuccWrapper, but this slowed down the
8027 // average compile time without having any impact on the worst case compile
8028 // time.
8029 BasicBlock *BB = BI->getSuccessor(0);
8030 SmallVector<Value *> PhiValsForBB;
8031 for (PHINode &Phi : BB->phis())
8032 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
8033
8034 return hash_combine(BB, hash_combine_range(PhiValsForBB));
8035 }
8036 static bool isEqual(const SwitchSuccWrapper *LHS,
8037 const SwitchSuccWrapper *RHS) {
8040 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8041 return LHS == RHS;
8042
8043 BasicBlock *A = LHS->Dest;
8044 BasicBlock *B = RHS->Dest;
8045
8046 // FIXME: we checked that the size of A and B are both 1 in
8047 // simplifyDuplicateSwitchArms to make the Case list smaller to
8048 // improve performance. If we decide to support BasicBlocks with more
8049 // than just a single instruction, we need to check that A.size() ==
8050 // B.size() here, and we need to check more than just the BranchInsts
8051 // for equality.
8052
8053 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
8054 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
8055 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8056 "Only supporting unconditional branches for now");
8057 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
8058 return false;
8059
8060 // Need to check that PHIs in successor have matching values
8061 BasicBlock *Succ = ABI->getSuccessor(0);
8062 for (PHINode &Phi : Succ->phis()) {
8063 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8064 if (PredIVs[A] != PredIVs[B])
8065 return false;
8066 }
8067
8068 return true;
8069 }
8070};
8071
8072bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8073 DomTreeUpdater *DTU) {
8074 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8075 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8076 // an entire PHI at once after the loop, opposed to calling
8077 // getIncomingValueForBlock inside this loop, since each call to
8078 // getIncomingValueForBlock is O(|Preds|).
8084 Cases.reserve(SI->getNumSuccessors());
8085
8086 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8087 BasicBlock *BB = SI->getSuccessor(I);
8088
8089 // FIXME: Support more than just a single BranchInst. One way we could do
8090 // this is by taking a hashing approach of all insts in BB.
8091 if (BB->size() != 1)
8092 continue;
8093
8094 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8095 // on other kinds of terminators. We decide to only support unconditional
8096 // branches for now for compile time reasons.
8097 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
8098 if (!BI || BI->isConditional())
8099 continue;
8100
8101 if (!Seen.insert(BB).second) {
8102 auto It = BBToSuccessorIndexes.find(BB);
8103 if (It != BBToSuccessorIndexes.end())
8104 It->second.emplace_back(I);
8105 continue;
8106 }
8107
8108 // FIXME: This case needs some extra care because the terminators other than
8109 // SI need to be updated. For now, consider only backedges to the SI.
8110 if (BB->getUniquePredecessor() != SI->getParent())
8111 continue;
8112
8113 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8114 for (BasicBlock *Succ : BI->successors())
8116
8117 // Add the successor only if not previously visited.
8118 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
8119 BBToSuccessorIndexes[BB].emplace_back(I);
8120 }
8121
8122 // Precompute a data structure to improve performance of isEqual for
8123 // SwitchSuccWrapper.
8124 PhiPredIVs.reserve(Phis.size());
8125 for (PHINode *Phi : Phis) {
8126 auto &IVs =
8127 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8128 for (auto &IV : Phi->incoming_values())
8129 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8130 }
8131
8132 // Build a set such that if the SwitchSuccWrapper exists in the set and
8133 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8134 // which is not in the set should be replaced with the one in the set. If the
8135 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8136 // other SwitchSuccWrappers can check against it in the same manner. We use
8137 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8138 // around information to isEquality, getHashValue, and when doing the
8139 // replacement with better performance.
8140 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8141 ReplaceWith.reserve(Cases.size());
8142
8144 Updates.reserve(ReplaceWith.size());
8145 bool MadeChange = false;
8146 for (auto &SSW : Cases) {
8147 // SSW is a candidate for simplification. If we find a duplicate BB,
8148 // replace it.
8149 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
8150 if (!Inserted) {
8151 // We know that SI's parent BB no longer dominates the old case successor
8152 // since we are making it dead.
8153 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
8154 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
8155 for (unsigned Idx : Successors)
8156 SI->setSuccessor(Idx, (*It)->Dest);
8157 MadeChange = true;
8158 }
8159 }
8160
8161 if (DTU)
8162 DTU->applyUpdates(Updates);
8163
8164 return MadeChange;
8165}
8166
8167bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8168 BasicBlock *BB = SI->getParent();
8169
8170 if (isValueEqualityComparison(SI)) {
8171 // If we only have one predecessor, and if it is a branch on this value,
8172 // see if that predecessor totally determines the outcome of this switch.
8173 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8174 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8175 return requestResimplify();
8176
8177 Value *Cond = SI->getCondition();
8178 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8179 if (simplifySwitchOnSelect(SI, Select))
8180 return requestResimplify();
8181
8182 // If the block only contains the switch, see if we can fold the block
8183 // away into any preds.
8184 if (SI == &*BB->instructionsWithoutDebug(false).begin())
8185 if (foldValueComparisonIntoPredecessors(SI, Builder))
8186 return requestResimplify();
8187 }
8188
8189 // Try to transform the switch into an icmp and a branch.
8190 // The conversion from switch to comparison may lose information on
8191 // impossible switch values, so disable it early in the pipeline.
8192 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8193 return requestResimplify();
8194
8195 // Remove unreachable cases.
8196 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8197 return requestResimplify();
8198
8199 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8200 return requestResimplify();
8201
8202 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8203 return requestResimplify();
8204
8205 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8206 return requestResimplify();
8207
8208 // The conversion of switches to arithmetic or lookup table is disabled in
8209 // the early optimization pipeline, as it may lose information or make the
8210 // resulting code harder to analyze.
8211 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8212 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8213 Options.ConvertSwitchToLookupTable))
8214 return requestResimplify();
8215
8216 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8217 return requestResimplify();
8218
8219 if (reduceSwitchRange(SI, Builder, DL, TTI))
8220 return requestResimplify();
8221
8222 if (HoistCommon &&
8223 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8224 return requestResimplify();
8225
8226 if (simplifyDuplicateSwitchArms(SI, DTU))
8227 return requestResimplify();
8228
8229 if (simplifySwitchWhenUMin(SI, DTU))
8230 return requestResimplify();
8231
8232 return false;
8233}
8234
8235bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8236 BasicBlock *BB = IBI->getParent();
8237 bool Changed = false;
8238 SmallVector<uint32_t> BranchWeights;
8239 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8240 extractBranchWeights(*IBI, BranchWeights);
8241
8242 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8243 if (HasBranchWeights)
8244 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8245 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8246
8247 // Eliminate redundant destinations.
8248 SmallPtrSet<Value *, 8> Succs;
8249 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8250 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8251 BasicBlock *Dest = IBI->getDestination(I);
8252 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8253 if (!Dest->hasAddressTaken())
8254 RemovedSuccs.insert(Dest);
8255 Dest->removePredecessor(BB);
8256 IBI->removeDestination(I);
8257 --I;
8258 --E;
8259 Changed = true;
8260 }
8261 }
8262
8263 if (DTU) {
8264 std::vector<DominatorTree::UpdateType> Updates;
8265 Updates.reserve(RemovedSuccs.size());
8266 for (auto *RemovedSucc : RemovedSuccs)
8267 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8268 DTU->applyUpdates(Updates);
8269 }
8270
8271 if (IBI->getNumDestinations() == 0) {
8272 // If the indirectbr has no successors, change it to unreachable.
8273 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8275 return true;
8276 }
8277
8278 if (IBI->getNumDestinations() == 1) {
8279 // If the indirectbr has one successor, change it to a direct branch.
8282 return true;
8283 }
8284 if (HasBranchWeights) {
8285 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8286 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8287 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8288 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8289 }
8290 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8291 if (simplifyIndirectBrOnSelect(IBI, SI))
8292 return requestResimplify();
8293 }
8294 return Changed;
8295}
8296
8297/// Given an block with only a single landing pad and a unconditional branch
8298/// try to find another basic block which this one can be merged with. This
8299/// handles cases where we have multiple invokes with unique landing pads, but
8300/// a shared handler.
8301///
8302/// We specifically choose to not worry about merging non-empty blocks
8303/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8304/// practice, the optimizer produces empty landing pad blocks quite frequently
8305/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8306/// sinking in this file)
8307///
8308/// This is primarily a code size optimization. We need to avoid performing
8309/// any transform which might inhibit optimization (such as our ability to
8310/// specialize a particular handler via tail commoning). We do this by not
8311/// merging any blocks which require us to introduce a phi. Since the same
8312/// values are flowing through both blocks, we don't lose any ability to
8313/// specialize. If anything, we make such specialization more likely.
8314///
8315/// TODO - This transformation could remove entries from a phi in the target
8316/// block when the inputs in the phi are the same for the two blocks being
8317/// merged. In some cases, this could result in removal of the PHI entirely.
8319 BasicBlock *BB, DomTreeUpdater *DTU) {
8320 auto Succ = BB->getUniqueSuccessor();
8321 assert(Succ);
8322 // If there's a phi in the successor block, we'd likely have to introduce
8323 // a phi into the merged landing pad block.
8324 if (isa<PHINode>(*Succ->begin()))
8325 return false;
8326
8327 for (BasicBlock *OtherPred : predecessors(Succ)) {
8328 if (BB == OtherPred)
8329 continue;
8330 BasicBlock::iterator I = OtherPred->begin();
8332 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8333 continue;
8334 ++I;
8336 if (!BI2 || !BI2->isIdenticalTo(BI))
8337 continue;
8338
8339 std::vector<DominatorTree::UpdateType> Updates;
8340
8341 // We've found an identical block. Update our predecessors to take that
8342 // path instead and make ourselves dead.
8344 for (BasicBlock *Pred : UniquePreds) {
8345 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8346 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8347 "unexpected successor");
8348 II->setUnwindDest(OtherPred);
8349 if (DTU) {
8350 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8351 Updates.push_back({DominatorTree::Delete, Pred, BB});
8352 }
8353 }
8354
8356 for (BasicBlock *Succ : UniqueSuccs) {
8357 Succ->removePredecessor(BB);
8358 if (DTU)
8359 Updates.push_back({DominatorTree::Delete, BB, Succ});
8360 }
8361
8362 IRBuilder<> Builder(BI);
8363 Builder.CreateUnreachable();
8364 BI->eraseFromParent();
8365 if (DTU)
8366 DTU->applyUpdates(Updates);
8367 return true;
8368 }
8369 return false;
8370}
8371
8372bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8373 IRBuilder<> &Builder) {
8374 BasicBlock *BB = BI->getParent();
8375 BasicBlock *Succ = BI->getSuccessor(0);
8376
8377 // If the Terminator is the only non-phi instruction, simplify the block.
8378 // If LoopHeader is provided, check if the block or its successor is a loop
8379 // header. (This is for early invocations before loop simplify and
8380 // vectorization to keep canonical loop forms for nested loops. These blocks
8381 // can be eliminated when the pass is invoked later in the back-end.)
8382 // Note that if BB has only one predecessor then we do not introduce new
8383 // backedge, so we can eliminate BB.
8384 bool NeedCanonicalLoop =
8385 Options.NeedCanonicalLoop &&
8386 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8387 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8389 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8390 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8391 return true;
8392
8393 // If the only instruction in the block is a seteq/setne comparison against a
8394 // constant, try to simplify the block.
8395 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8396 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8397 ++I;
8398 if (I->isTerminator() &&
8399 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8400 return true;
8401 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8402 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8403 Builder))
8404 return true;
8405 }
8406 }
8407
8408 // See if we can merge an empty landing pad block with another which is
8409 // equivalent.
8410 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8411 ++I;
8412 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8413 return true;
8414 }
8415
8416 // If this basic block is ONLY a compare and a branch, and if a predecessor
8417 // branches to us and our successor, fold the comparison into the
8418 // predecessor and use logical operations to update the incoming value
8419 // for PHI nodes in common successor.
8420 if (Options.SpeculateBlocks &&
8421 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8422 Options.BonusInstThreshold))
8423 return requestResimplify();
8424 return false;
8425}
8426
8428 BasicBlock *PredPred = nullptr;
8429 for (auto *P : predecessors(BB)) {
8430 BasicBlock *PPred = P->getSinglePredecessor();
8431 if (!PPred || (PredPred && PredPred != PPred))
8432 return nullptr;
8433 PredPred = PPred;
8434 }
8435 return PredPred;
8436}
8437
8438/// Fold the following pattern:
8439/// bb0:
8440/// br i1 %cond1, label %bb1, label %bb2
8441/// bb1:
8442/// br i1 %cond2, label %bb3, label %bb4
8443/// bb2:
8444/// br i1 %cond2, label %bb4, label %bb3
8445/// bb3:
8446/// ...
8447/// bb4:
8448/// ...
8449/// into
8450/// bb0:
8451/// %cond = xor i1 %cond1, %cond2
8452/// br i1 %cond, label %bb4, label %bb3
8453/// bb3:
8454/// ...
8455/// bb4:
8456/// ...
8457/// NOTE: %cond2 always dominates the terminator of bb0.
8459 BasicBlock *BB = BI->getParent();
8460 BasicBlock *BB1 = BI->getSuccessor(0);
8461 BasicBlock *BB2 = BI->getSuccessor(1);
8462 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8463 if (Succ == BB)
8464 return false;
8465 if (&Succ->front() != Succ->getTerminator())
8466 return false;
8467 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8468 if (!SuccBI || !SuccBI->isConditional())
8469 return false;
8470 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8471 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8472 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8473 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8474 };
8475 BranchInst *BB1BI, *BB2BI;
8476 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8477 return false;
8478
8479 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8480 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8481 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8482 return false;
8483
8484 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8485 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8486 IRBuilder<> Builder(BI);
8487 BI->setCondition(
8488 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8489 BB1->removePredecessor(BB);
8490 BI->setSuccessor(0, BB4);
8491 BB2->removePredecessor(BB);
8492 BI->setSuccessor(1, BB3);
8493 if (DTU) {
8495 Updates.push_back({DominatorTree::Delete, BB, BB1});
8496 Updates.push_back({DominatorTree::Insert, BB, BB4});
8497 Updates.push_back({DominatorTree::Delete, BB, BB2});
8498 Updates.push_back({DominatorTree::Insert, BB, BB3});
8499
8500 DTU->applyUpdates(Updates);
8501 }
8502 bool HasWeight = false;
8503 uint64_t BBTWeight, BBFWeight;
8504 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8505 HasWeight = true;
8506 else
8507 BBTWeight = BBFWeight = 1;
8508 uint64_t BB1TWeight, BB1FWeight;
8509 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8510 HasWeight = true;
8511 else
8512 BB1TWeight = BB1FWeight = 1;
8513 uint64_t BB2TWeight, BB2FWeight;
8514 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8515 HasWeight = true;
8516 else
8517 BB2TWeight = BB2FWeight = 1;
8518 if (HasWeight) {
8519 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8520 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8521 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8522 /*ElideAllZero=*/true);
8523 }
8524 return true;
8525}
8526
8527bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8528 assert(
8530 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8531 "Tautological conditional branch should have been eliminated already.");
8532
8533 BasicBlock *BB = BI->getParent();
8534 if (!Options.SimplifyCondBranch ||
8535 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8536 return false;
8537
8538 // Conditional branch
8539 if (isValueEqualityComparison(BI)) {
8540 // If we only have one predecessor, and if it is a branch on this value,
8541 // see if that predecessor totally determines the outcome of this
8542 // switch.
8543 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8544 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8545 return requestResimplify();
8546
8547 // This block must be empty, except for the setcond inst, if it exists.
8548 // Ignore dbg and pseudo intrinsics.
8549 auto I = BB->instructionsWithoutDebug(true).begin();
8550 if (&*I == BI) {
8551 if (foldValueComparisonIntoPredecessors(BI, Builder))
8552 return requestResimplify();
8553 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8554 ++I;
8555 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8556 return requestResimplify();
8557 }
8558 }
8559
8560 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8561 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8562 return true;
8563
8564 // If this basic block has dominating predecessor blocks and the dominating
8565 // blocks' conditions imply BI's condition, we know the direction of BI.
8566 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8567 if (Imp) {
8568 // Turn this into a branch on constant.
8569 auto *OldCond = BI->getCondition();
8570 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8571 : ConstantInt::getFalse(BB->getContext());
8572 BI->setCondition(TorF);
8574 return requestResimplify();
8575 }
8576
8577 // If this basic block is ONLY a compare and a branch, and if a predecessor
8578 // branches to us and one of our successors, fold the comparison into the
8579 // predecessor and use logical operations to pick the right destination.
8580 if (Options.SpeculateBlocks &&
8581 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8582 Options.BonusInstThreshold))
8583 return requestResimplify();
8584
8585 // We have a conditional branch to two blocks that are only reachable
8586 // from BI. We know that the condbr dominates the two blocks, so see if
8587 // there is any identical code in the "then" and "else" blocks. If so, we
8588 // can hoist it up to the branching block.
8589 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8590 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8591 if (HoistCommon &&
8592 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8593 return requestResimplify();
8594
8595 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8596 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8597 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8598 auto CanSpeculateConditionalLoadsStores = [&]() {
8599 for (auto *Succ : successors(BB)) {
8600 for (Instruction &I : *Succ) {
8601 if (I.isTerminator()) {
8602 if (I.getNumSuccessors() > 1)
8603 return false;
8604 continue;
8605 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8606 SpeculatedConditionalLoadsStores.size() ==
8608 return false;
8609 }
8610 SpeculatedConditionalLoadsStores.push_back(&I);
8611 }
8612 }
8613 return !SpeculatedConditionalLoadsStores.empty();
8614 };
8615
8616 if (CanSpeculateConditionalLoadsStores()) {
8617 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8618 std::nullopt, nullptr);
8619 return requestResimplify();
8620 }
8621 }
8622 } else {
8623 // If Successor #1 has multiple preds, we may be able to conditionally
8624 // execute Successor #0 if it branches to Successor #1.
8625 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8626 if (Succ0TI->getNumSuccessors() == 1 &&
8627 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8628 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8629 return requestResimplify();
8630 }
8631 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8632 // If Successor #0 has multiple preds, we may be able to conditionally
8633 // execute Successor #1 if it branches to Successor #0.
8634 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8635 if (Succ1TI->getNumSuccessors() == 1 &&
8636 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8637 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8638 return requestResimplify();
8639 }
8640
8641 // If this is a branch on something for which we know the constant value in
8642 // predecessors (e.g. a phi node in the current block), thread control
8643 // through this block.
8644 if (foldCondBranchOnValueKnownInPredecessor(BI))
8645 return requestResimplify();
8646
8647 // Scan predecessor blocks for conditional branches.
8648 for (BasicBlock *Pred : predecessors(BB))
8649 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8650 if (PBI != BI && PBI->isConditional())
8651 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8652 return requestResimplify();
8653
8654 // Look for diamond patterns.
8655 if (MergeCondStores)
8656 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8657 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8658 if (PBI != BI && PBI->isConditional())
8659 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8660 return requestResimplify();
8661
8662 // Look for nested conditional branches.
8663 if (mergeNestedCondBranch(BI, DTU))
8664 return requestResimplify();
8665
8666 return false;
8667}
8668
8669/// Check if passing a value to an instruction will cause undefined behavior.
8670static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8671 assert(V->getType() == I->getType() && "Mismatched types");
8673 if (!C)
8674 return false;
8675
8676 if (I->use_empty())
8677 return false;
8678
8679 if (C->isNullValue() || isa<UndefValue>(C)) {
8680 // Only look at the first use we can handle, avoid hurting compile time with
8681 // long uselists
8682 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8683 auto *Use = cast<Instruction>(U.getUser());
8684 // Change this list when we want to add new instructions.
8685 switch (Use->getOpcode()) {
8686 default:
8687 return false;
8688 case Instruction::GetElementPtr:
8689 case Instruction::Ret:
8690 case Instruction::BitCast:
8691 case Instruction::Load:
8692 case Instruction::Store:
8693 case Instruction::Call:
8694 case Instruction::CallBr:
8695 case Instruction::Invoke:
8696 case Instruction::UDiv:
8697 case Instruction::URem:
8698 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8699 // implemented to avoid code complexity as it is unclear how useful such
8700 // logic is.
8701 case Instruction::SDiv:
8702 case Instruction::SRem:
8703 return true;
8704 }
8705 });
8706 if (FindUse == I->use_end())
8707 return false;
8708 auto &Use = *FindUse;
8709 auto *User = cast<Instruction>(Use.getUser());
8710 // Bail out if User is not in the same BB as I or User == I or User comes
8711 // before I in the block. The latter two can be the case if User is a
8712 // PHI node.
8713 if (User->getParent() != I->getParent() || User == I ||
8714 User->comesBefore(I))
8715 return false;
8716
8717 // Now make sure that there are no instructions in between that can alter
8718 // control flow (eg. calls)
8719 auto InstrRange =
8720 make_range(std::next(I->getIterator()), User->getIterator());
8721 if (any_of(InstrRange, [](Instruction &I) {
8723 }))
8724 return false;
8725
8726 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8728 if (GEP->getPointerOperand() == I) {
8729 // The type of GEP may differ from the type of base pointer.
8730 // Bail out on vector GEPs, as they are not handled by other checks.
8731 if (GEP->getType()->isVectorTy())
8732 return false;
8733 // The current base address is null, there are four cases to consider:
8734 // getelementptr (TY, null, 0) -> null
8735 // getelementptr (TY, null, not zero) -> may be modified
8736 // getelementptr inbounds (TY, null, 0) -> null
8737 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8738 // undefined?
8739 if (!GEP->hasAllZeroIndices() &&
8740 (!GEP->isInBounds() ||
8741 NullPointerIsDefined(GEP->getFunction(),
8742 GEP->getPointerAddressSpace())))
8743 PtrValueMayBeModified = true;
8744 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8745 }
8746
8747 // Look through return.
8748 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8749 bool HasNoUndefAttr =
8750 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8751 // Return undefined to a noundef return value is undefined.
8752 if (isa<UndefValue>(C) && HasNoUndefAttr)
8753 return true;
8754 // Return null to a nonnull+noundef return value is undefined.
8755 if (C->isNullValue() && HasNoUndefAttr &&
8756 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8757 return !PtrValueMayBeModified;
8758 }
8759 }
8760
8761 // Load from null is undefined.
8762 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8763 if (!LI->isVolatile())
8764 return !NullPointerIsDefined(LI->getFunction(),
8765 LI->getPointerAddressSpace());
8766
8767 // Store to null is undefined.
8769 if (!SI->isVolatile())
8770 return (!NullPointerIsDefined(SI->getFunction(),
8771 SI->getPointerAddressSpace())) &&
8772 SI->getPointerOperand() == I;
8773
8774 // llvm.assume(false/undef) always triggers immediate UB.
8775 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8776 // Ignore assume operand bundles.
8777 if (I == Assume->getArgOperand(0))
8778 return true;
8779 }
8780
8781 if (auto *CB = dyn_cast<CallBase>(User)) {
8782 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8783 return false;
8784 // A call to null is undefined.
8785 if (CB->getCalledOperand() == I)
8786 return true;
8787
8788 if (CB->isArgOperand(&Use)) {
8789 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8790 // Passing null to a nonnnull+noundef argument is undefined.
8792 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8793 return !PtrValueMayBeModified;
8794 // Passing undef to a noundef argument is undefined.
8795 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8796 return true;
8797 }
8798 }
8799 // Div/Rem by zero is immediate UB
8800 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8801 return true;
8802 }
8803 return false;
8804}
8805
8806/// If BB has an incoming value that will always trigger undefined behavior
8807/// (eg. null pointer dereference), remove the branch leading here.
8809 DomTreeUpdater *DTU,
8810 AssumptionCache *AC) {
8811 for (PHINode &PHI : BB->phis())
8812 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8813 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8814 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8815 Instruction *T = Predecessor->getTerminator();
8816 IRBuilder<> Builder(T);
8817 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8818 BB->removePredecessor(Predecessor);
8819 // Turn unconditional branches into unreachables and remove the dead
8820 // destination from conditional branches.
8821 if (BI->isUnconditional())
8822 Builder.CreateUnreachable();
8823 else {
8824 // Preserve guarding condition in assume, because it might not be
8825 // inferrable from any dominating condition.
8826 Value *Cond = BI->getCondition();
8827 CallInst *Assumption;
8828 if (BI->getSuccessor(0) == BB)
8829 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8830 else
8831 Assumption = Builder.CreateAssumption(Cond);
8832 if (AC)
8833 AC->registerAssumption(cast<AssumeInst>(Assumption));
8834 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8835 : BI->getSuccessor(0));
8836 }
8837 BI->eraseFromParent();
8838 if (DTU)
8839 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8840 return true;
8841 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8842 // Redirect all branches leading to UB into
8843 // a newly created unreachable block.
8844 BasicBlock *Unreachable = BasicBlock::Create(
8845 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8846 Builder.SetInsertPoint(Unreachable);
8847 // The new block contains only one instruction: Unreachable
8848 Builder.CreateUnreachable();
8849 for (const auto &Case : SI->cases())
8850 if (Case.getCaseSuccessor() == BB) {
8851 BB->removePredecessor(Predecessor);
8852 Case.setSuccessor(Unreachable);
8853 }
8854 if (SI->getDefaultDest() == BB) {
8855 BB->removePredecessor(Predecessor);
8856 SI->setDefaultDest(Unreachable);
8857 }
8858
8859 if (DTU)
8860 DTU->applyUpdates(
8861 { { DominatorTree::Insert, Predecessor, Unreachable },
8862 { DominatorTree::Delete, Predecessor, BB } });
8863 return true;
8864 }
8865 }
8866
8867 return false;
8868}
8869
8870bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8871 bool Changed = false;
8872
8873 assert(BB && BB->getParent() && "Block not embedded in function!");
8874 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8875
8876 // Remove basic blocks that have no predecessors (except the entry block)...
8877 // or that just have themself as a predecessor. These are unreachable.
8878 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8879 BB->getSinglePredecessor() == BB) {
8880 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8881 DeleteDeadBlock(BB, DTU);
8882 return true;
8883 }
8884
8885 // Check to see if we can constant propagate this terminator instruction
8886 // away...
8887 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8888 /*TLI=*/nullptr, DTU);
8889
8890 // Check for and eliminate duplicate PHI nodes in this block.
8892
8893 // Check for and remove branches that will always cause undefined behavior.
8895 return requestResimplify();
8896
8897 // Merge basic blocks into their predecessor if there is only one distinct
8898 // pred, and if there is only one distinct successor of the predecessor, and
8899 // if there are no PHI nodes.
8900 if (MergeBlockIntoPredecessor(BB, DTU))
8901 return true;
8902
8903 if (SinkCommon && Options.SinkCommonInsts)
8904 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8905 mergeCompatibleInvokes(BB, DTU)) {
8906 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8907 // so we may now how duplicate PHI's.
8908 // Let's rerun EliminateDuplicatePHINodes() first,
8909 // before foldTwoEntryPHINode() potentially converts them into select's,
8910 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8911 return true;
8912 }
8913
8914 IRBuilder<> Builder(BB);
8915
8916 if (Options.SpeculateBlocks &&
8917 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8918 // If there is a trivial two-entry PHI node in this basic block, and we can
8919 // eliminate it, do so now.
8920 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8921 if (PN->getNumIncomingValues() == 2)
8922 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8923 Options.SpeculateUnpredictables))
8924 return true;
8925 }
8926
8928 Builder.SetInsertPoint(Terminator);
8929 switch (Terminator->getOpcode()) {
8930 case Instruction::UncondBr:
8931 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
8932 break;
8933 case Instruction::CondBr:
8934 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
8935 break;
8936 case Instruction::Resume:
8937 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8938 break;
8939 case Instruction::CleanupRet:
8940 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8941 break;
8942 case Instruction::Switch:
8943 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8944 break;
8945 case Instruction::Unreachable:
8946 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8947 break;
8948 case Instruction::IndirectBr:
8949 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8950 break;
8951 }
8952
8953 return Changed;
8954}
8955
8956bool SimplifyCFGOpt::run(BasicBlock *BB) {
8957 bool Changed = false;
8958
8959 // Repeated simplify BB as long as resimplification is requested.
8960 do {
8961 Resimplify = false;
8962
8963 // Perform one round of simplifcation. Resimplify flag will be set if
8964 // another iteration is requested.
8965 Changed |= simplifyOnce(BB);
8966 } while (Resimplify);
8967
8968 return Changed;
8969}
8970
8973 ArrayRef<WeakVH> LoopHeaders) {
8974 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8975 Options)
8976 .run(BB);
8977}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1589
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1952
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
back - Get the last element.
Definition ArrayRef.h:151
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:539
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:696
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:493
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:491
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:668
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:939
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
bool isConditional() const
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:781
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1130
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:563
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2324
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2072
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2627
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1516
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1952
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1812
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1223
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2308
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1200
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1854
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1867
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1406
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2166
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2040
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1194
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2249
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2418
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1576
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1440
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1445
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:907
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:832
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
bool use_empty() const
Definition Value.h:347
iterator_range< use_iterator > uses()
Definition Value.h:381
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition Metadata.cpp:64
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:538
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1702
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1791
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1158
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3116
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3398
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3905
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1596
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:355
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1527
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:312
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276