LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306 SelectInst *Select,
307 IRBuilder<> &Builder);
308 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
309 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
310 Instruction *TI, Instruction *I1,
311 SmallVectorImpl<Instruction *> &OtherSuccTIs,
312 ArrayRef<BasicBlock *> UniqueSuccessors);
313 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
314 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
315 BasicBlock *TrueBB, BasicBlock *FalseBB,
316 uint32_t TrueWeight, uint32_t FalseWeight);
317 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
318 const DataLayout &DL);
319 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
320 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
321 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
853 if (BI->isConditional())
854 Cond = dyn_cast<Instruction>(BI->getCondition());
855 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
856 Cond = dyn_cast<Instruction>(IBI->getAddress());
857 }
858
859 TI->eraseFromParent();
860 if (Cond)
862}
863
864/// Return true if the specified terminator checks
865/// to see if a value is equal to constant integer value.
866Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
867 Value *CV = nullptr;
868 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
869 // Do not permit merging of large switch instructions into their
870 // predecessors unless there is only one predecessor.
871 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
872 CV = SI->getCondition();
873 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
874 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
875 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
876 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
877 CV = ICI->getOperand(0);
878 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
879 if (Trunc->hasNoUnsignedWrap())
880 CV = Trunc->getOperand(0);
881 }
882 }
883
884 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
885 if (CV) {
886 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
887 Value *Ptr = PTII->getPointerOperand();
888 if (DL.hasUnstableRepresentation(Ptr->getType()))
889 return CV;
890 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
891 CV = Ptr;
892 }
893 }
894 return CV;
895}
896
897/// Given a value comparison instruction,
898/// decode all of the 'cases' that it represents and return the 'default' block.
899BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
900 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
901 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
902 Cases.reserve(SI->getNumCases());
903 for (auto Case : SI->cases())
904 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
905 Case.getCaseSuccessor()));
906 return SI->getDefaultDest();
907 }
908
909 BranchInst *BI = cast<BranchInst>(TI);
910 Value *Cond = BI->getCondition();
911 ICmpInst::Predicate Pred;
912 ConstantInt *C;
913 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
914 Pred = ICI->getPredicate();
915 C = getConstantInt(ICI->getOperand(1), DL);
916 } else {
917 Pred = ICmpInst::ICMP_NE;
918 auto *Trunc = cast<TruncInst>(Cond);
919 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
920 }
921 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
922 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
923 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
924}
925
926/// Given a vector of bb/value pairs, remove any entries
927/// in the list that match the specified block.
928static void
930 std::vector<ValueEqualityComparisonCase> &Cases) {
931 llvm::erase(Cases, BB);
932}
933
934/// Return true if there are any keys in C1 that exist in C2 as well.
935static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
936 std::vector<ValueEqualityComparisonCase> &C2) {
937 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
938
939 // Make V1 be smaller than V2.
940 if (V1->size() > V2->size())
941 std::swap(V1, V2);
942
943 if (V1->empty())
944 return false;
945 if (V1->size() == 1) {
946 // Just scan V2.
947 ConstantInt *TheVal = (*V1)[0].Value;
948 for (const ValueEqualityComparisonCase &VECC : *V2)
949 if (TheVal == VECC.Value)
950 return true;
951 }
952
953 // Otherwise, just sort both lists and compare element by element.
954 array_pod_sort(V1->begin(), V1->end());
955 array_pod_sort(V2->begin(), V2->end());
956 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
957 while (i1 != e1 && i2 != e2) {
958 if ((*V1)[i1].Value == (*V2)[i2].Value)
959 return true;
960 if ((*V1)[i1].Value < (*V2)[i2].Value)
961 ++i1;
962 else
963 ++i2;
964 }
965 return false;
966}
967
968/// If TI is known to be a terminator instruction and its block is known to
969/// only have a single predecessor block, check to see if that predecessor is
970/// also a value comparison with the same value, and if that comparison
971/// determines the outcome of this comparison. If so, simplify TI. This does a
972/// very limited form of jump threading.
973bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
974 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
975 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
976 if (!PredVal)
977 return false; // Not a value comparison in predecessor.
978
979 Value *ThisVal = isValueEqualityComparison(TI);
980 assert(ThisVal && "This isn't a value comparison!!");
981 if (ThisVal != PredVal)
982 return false; // Different predicates.
983
984 // TODO: Preserve branch weight metadata, similarly to how
985 // foldValueComparisonIntoPredecessors preserves it.
986
987 // Find out information about when control will move from Pred to TI's block.
988 std::vector<ValueEqualityComparisonCase> PredCases;
989 BasicBlock *PredDef =
990 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
991 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
992
993 // Find information about how control leaves this block.
994 std::vector<ValueEqualityComparisonCase> ThisCases;
995 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
996 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
997
998 // If TI's block is the default block from Pred's comparison, potentially
999 // simplify TI based on this knowledge.
1000 if (PredDef == TI->getParent()) {
1001 // If we are here, we know that the value is none of those cases listed in
1002 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1003 // can simplify TI.
1004 if (!valuesOverlap(PredCases, ThisCases))
1005 return false;
1006
1007 if (isa<BranchInst>(TI)) {
1008 // Okay, one of the successors of this condbr is dead. Convert it to a
1009 // uncond br.
1010 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1011 // Insert the new branch.
1012 Instruction *NI = Builder.CreateBr(ThisDef);
1013 (void)NI;
1014
1015 // Remove PHI node entries for the dead edge.
1016 ThisCases[0].Dest->removePredecessor(PredDef);
1017
1018 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1019 << "Through successor TI: " << *TI << "Leaving: " << *NI
1020 << "\n");
1021
1023
1024 if (DTU)
1025 DTU->applyUpdates(
1026 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1027
1028 return true;
1029 }
1030
1031 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1032 // Okay, TI has cases that are statically dead, prune them away.
1033 SmallPtrSet<Constant *, 16> DeadCases;
1034 for (const ValueEqualityComparisonCase &Case : PredCases)
1035 DeadCases.insert(Case.Value);
1036
1037 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1038 << "Through successor TI: " << *TI);
1039
1040 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1041 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1042 --i;
1043 auto *Successor = i->getCaseSuccessor();
1044 if (DTU)
1045 ++NumPerSuccessorCases[Successor];
1046 if (DeadCases.count(i->getCaseValue())) {
1047 Successor->removePredecessor(PredDef);
1048 SI.removeCase(i);
1049 if (DTU)
1050 --NumPerSuccessorCases[Successor];
1051 }
1052 }
1053
1054 if (DTU) {
1055 std::vector<DominatorTree::UpdateType> Updates;
1056 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1057 if (I.second == 0)
1058 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1059 DTU->applyUpdates(Updates);
1060 }
1061
1062 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1063 return true;
1064 }
1065
1066 // Otherwise, TI's block must correspond to some matched value. Find out
1067 // which value (or set of values) this is.
1068 ConstantInt *TIV = nullptr;
1069 BasicBlock *TIBB = TI->getParent();
1070 for (const auto &[Value, Dest] : PredCases)
1071 if (Dest == TIBB) {
1072 if (TIV)
1073 return false; // Cannot handle multiple values coming to this block.
1074 TIV = Value;
1075 }
1076 assert(TIV && "No edge from pred to succ?");
1077
1078 // Okay, we found the one constant that our value can be if we get into TI's
1079 // BB. Find out which successor will unconditionally be branched to.
1080 BasicBlock *TheRealDest = nullptr;
1081 for (const auto &[Value, Dest] : ThisCases)
1082 if (Value == TIV) {
1083 TheRealDest = Dest;
1084 break;
1085 }
1086
1087 // If not handled by any explicit cases, it is handled by the default case.
1088 if (!TheRealDest)
1089 TheRealDest = ThisDef;
1090
1091 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1092
1093 // Remove PHI node entries for dead edges.
1094 BasicBlock *CheckEdge = TheRealDest;
1095 for (BasicBlock *Succ : successors(TIBB))
1096 if (Succ != CheckEdge) {
1097 if (Succ != TheRealDest)
1098 RemovedSuccs.insert(Succ);
1099 Succ->removePredecessor(TIBB);
1100 } else
1101 CheckEdge = nullptr;
1102
1103 // Insert the new branch.
1104 Instruction *NI = Builder.CreateBr(TheRealDest);
1105 (void)NI;
1106
1107 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1108 << "Through successor TI: " << *TI << "Leaving: " << *NI
1109 << "\n");
1110
1112 if (DTU) {
1113 SmallVector<DominatorTree::UpdateType, 2> Updates;
1114 Updates.reserve(RemovedSuccs.size());
1115 for (auto *RemovedSucc : RemovedSuccs)
1116 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1117 DTU->applyUpdates(Updates);
1118 }
1119 return true;
1120}
1121
1122namespace {
1123
1124/// This class implements a stable ordering of constant
1125/// integers that does not depend on their address. This is important for
1126/// applications that sort ConstantInt's to ensure uniqueness.
1127struct ConstantIntOrdering {
1128 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1129 return LHS->getValue().ult(RHS->getValue());
1130 }
1131};
1132
1133} // end anonymous namespace
1134
1136 ConstantInt *const *P2) {
1137 const ConstantInt *LHS = *P1;
1138 const ConstantInt *RHS = *P2;
1139 if (LHS == RHS)
1140 return 0;
1141 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1142}
1143
1144/// Get Weights of a given terminator, the default weight is at the front
1145/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1146/// metadata.
1148 SmallVectorImpl<uint64_t> &Weights) {
1149 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1150 assert(MD && "Invalid branch-weight metadata");
1151 extractFromBranchWeightMD64(MD, Weights);
1152
1153 // If TI is a conditional eq, the default case is the false case,
1154 // and the corresponding branch-weight data is at index 2. We swap the
1155 // default weight to be the first entry.
1156 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1157 assert(Weights.size() == 2);
1158 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1159 if (!ICI)
1160 return;
1161
1162 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1163 std::swap(Weights.front(), Weights.back());
1164 }
1165}
1166
1168 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1169 Instruction *PTI = PredBlock->getTerminator();
1170
1171 // If we have bonus instructions, clone them into the predecessor block.
1172 // Note that there may be multiple predecessor blocks, so we cannot move
1173 // bonus instructions to a predecessor block.
1174 for (Instruction &BonusInst : *BB) {
1175 if (BonusInst.isTerminator())
1176 continue;
1177
1178 Instruction *NewBonusInst = BonusInst.clone();
1179
1180 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1181 // Unless the instruction has the same !dbg location as the original
1182 // branch, drop it. When we fold the bonus instructions we want to make
1183 // sure we reset their debug locations in order to avoid stepping on
1184 // dead code caused by folding dead branches.
1185 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1186 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1187 mapAtomInstance(DL, VMap);
1188 }
1189
1190 RemapInstruction(NewBonusInst, VMap,
1192
1193 // If we speculated an instruction, we need to drop any metadata that may
1194 // result in undefined behavior, as the metadata might have been valid
1195 // only given the branch precondition.
1196 // Similarly strip attributes on call parameters that may cause UB in
1197 // location the call is moved to.
1198 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1199
1200 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1201 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1202 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1204
1205 NewBonusInst->takeName(&BonusInst);
1206 BonusInst.setName(NewBonusInst->getName() + ".old");
1207 VMap[&BonusInst] = NewBonusInst;
1208
1209 // Update (liveout) uses of bonus instructions,
1210 // now that the bonus instruction has been cloned into predecessor.
1211 // Note that we expect to be in a block-closed SSA form for this to work!
1212 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1213 auto *UI = cast<Instruction>(U.getUser());
1214 auto *PN = dyn_cast<PHINode>(UI);
1215 if (!PN) {
1216 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1217 "If the user is not a PHI node, then it should be in the same "
1218 "block as, and come after, the original bonus instruction.");
1219 continue; // Keep using the original bonus instruction.
1220 }
1221 // Is this the block-closed SSA form PHI node?
1222 if (PN->getIncomingBlock(U) == BB)
1223 continue; // Great, keep using the original bonus instruction.
1224 // The only other alternative is an "use" when coming from
1225 // the predecessor block - here we should refer to the cloned bonus instr.
1226 assert(PN->getIncomingBlock(U) == PredBlock &&
1227 "Not in block-closed SSA form?");
1228 U.set(NewBonusInst);
1229 }
1230 }
1231
1232 // Key Instructions: We may have propagated atom info into the pred. If the
1233 // pred's terminator already has atom info do nothing as merging would drop
1234 // one atom group anyway. If it doesn't, propagte the remapped atom group
1235 // from BB's terminator.
1236 if (auto &PredDL = PTI->getDebugLoc()) {
1237 auto &DL = BB->getTerminator()->getDebugLoc();
1238 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1239 PredDL.isSameSourceLocation(DL)) {
1240 PTI->setDebugLoc(DL);
1241 RemapSourceAtom(PTI, VMap);
1242 }
1243 }
1244}
1245
1246bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1247 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1248 BasicBlock *BB = TI->getParent();
1249 BasicBlock *Pred = PTI->getParent();
1250
1252
1253 // Figure out which 'cases' to copy from SI to PSI.
1254 std::vector<ValueEqualityComparisonCase> BBCases;
1255 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1256
1257 std::vector<ValueEqualityComparisonCase> PredCases;
1258 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1259
1260 // Based on whether the default edge from PTI goes to BB or not, fill in
1261 // PredCases and PredDefault with the new switch cases we would like to
1262 // build.
1263 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1264
1265 // Update the branch weight metadata along the way
1266 SmallVector<uint64_t, 8> Weights;
1267 bool PredHasWeights = hasBranchWeightMD(*PTI);
1268 bool SuccHasWeights = hasBranchWeightMD(*TI);
1269
1270 if (PredHasWeights) {
1271 getBranchWeights(PTI, Weights);
1272 // branch-weight metadata is inconsistent here.
1273 if (Weights.size() != 1 + PredCases.size())
1274 PredHasWeights = SuccHasWeights = false;
1275 } else if (SuccHasWeights)
1276 // If there are no predecessor weights but there are successor weights,
1277 // populate Weights with 1, which will later be scaled to the sum of
1278 // successor's weights
1279 Weights.assign(1 + PredCases.size(), 1);
1280
1281 SmallVector<uint64_t, 8> SuccWeights;
1282 if (SuccHasWeights) {
1283 getBranchWeights(TI, SuccWeights);
1284 // branch-weight metadata is inconsistent here.
1285 if (SuccWeights.size() != 1 + BBCases.size())
1286 PredHasWeights = SuccHasWeights = false;
1287 } else if (PredHasWeights)
1288 SuccWeights.assign(1 + BBCases.size(), 1);
1289
1290 if (PredDefault == BB) {
1291 // If this is the default destination from PTI, only the edges in TI
1292 // that don't occur in PTI, or that branch to BB will be activated.
1293 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1294 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1295 if (PredCases[i].Dest != BB)
1296 PTIHandled.insert(PredCases[i].Value);
1297 else {
1298 // The default destination is BB, we don't need explicit targets.
1299 std::swap(PredCases[i], PredCases.back());
1300
1301 if (PredHasWeights || SuccHasWeights) {
1302 // Increase weight for the default case.
1303 Weights[0] += Weights[i + 1];
1304 std::swap(Weights[i + 1], Weights.back());
1305 Weights.pop_back();
1306 }
1307
1308 PredCases.pop_back();
1309 --i;
1310 --e;
1311 }
1312
1313 // Reconstruct the new switch statement we will be building.
1314 if (PredDefault != BBDefault) {
1315 PredDefault->removePredecessor(Pred);
1316 if (DTU && PredDefault != BB)
1317 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1318 PredDefault = BBDefault;
1319 ++NewSuccessors[BBDefault];
1320 }
1321
1322 unsigned CasesFromPred = Weights.size();
1323 uint64_t ValidTotalSuccWeight = 0;
1324 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1325 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1326 PredCases.push_back(BBCases[i]);
1327 ++NewSuccessors[BBCases[i].Dest];
1328 if (SuccHasWeights || PredHasWeights) {
1329 // The default weight is at index 0, so weight for the ith case
1330 // should be at index i+1. Scale the cases from successor by
1331 // PredDefaultWeight (Weights[0]).
1332 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1333 ValidTotalSuccWeight += SuccWeights[i + 1];
1334 }
1335 }
1336
1337 if (SuccHasWeights || PredHasWeights) {
1338 ValidTotalSuccWeight += SuccWeights[0];
1339 // Scale the cases from predecessor by ValidTotalSuccWeight.
1340 for (unsigned i = 1; i < CasesFromPred; ++i)
1341 Weights[i] *= ValidTotalSuccWeight;
1342 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1343 Weights[0] *= SuccWeights[0];
1344 }
1345 } else {
1346 // If this is not the default destination from PSI, only the edges
1347 // in SI that occur in PSI with a destination of BB will be
1348 // activated.
1349 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1350 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1351 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1352 if (PredCases[i].Dest == BB) {
1353 PTIHandled.insert(PredCases[i].Value);
1354
1355 if (PredHasWeights || SuccHasWeights) {
1356 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1357 std::swap(Weights[i + 1], Weights.back());
1358 Weights.pop_back();
1359 }
1360
1361 std::swap(PredCases[i], PredCases.back());
1362 PredCases.pop_back();
1363 --i;
1364 --e;
1365 }
1366
1367 // Okay, now we know which constants were sent to BB from the
1368 // predecessor. Figure out where they will all go now.
1369 for (const ValueEqualityComparisonCase &Case : BBCases)
1370 if (PTIHandled.count(Case.Value)) {
1371 // If this is one we are capable of getting...
1372 if (PredHasWeights || SuccHasWeights)
1373 Weights.push_back(WeightsForHandled[Case.Value]);
1374 PredCases.push_back(Case);
1375 ++NewSuccessors[Case.Dest];
1376 PTIHandled.erase(Case.Value); // This constant is taken care of
1377 }
1378
1379 // If there are any constants vectored to BB that TI doesn't handle,
1380 // they must go to the default destination of TI.
1381 for (ConstantInt *I : PTIHandled) {
1382 if (PredHasWeights || SuccHasWeights)
1383 Weights.push_back(WeightsForHandled[I]);
1384 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1385 ++NewSuccessors[BBDefault];
1386 }
1387 }
1388
1389 // Okay, at this point, we know which new successor Pred will get. Make
1390 // sure we update the number of entries in the PHI nodes for these
1391 // successors.
1392 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1393 if (DTU) {
1394 SuccsOfPred = {llvm::from_range, successors(Pred)};
1395 Updates.reserve(Updates.size() + NewSuccessors.size());
1396 }
1397 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1398 NewSuccessors) {
1399 for (auto I : seq(NewSuccessor.second)) {
1400 (void)I;
1401 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1402 }
1403 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1404 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1405 }
1406
1407 Builder.SetInsertPoint(PTI);
1408 // Convert pointer to int before we switch.
1409 if (CV->getType()->isPointerTy()) {
1410 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1411 "Should not end up here with unstable pointers");
1412 CV =
1413 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1414 }
1415
1416 // Now that the successors are updated, create the new Switch instruction.
1417 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1418 NewSI->setDebugLoc(PTI->getDebugLoc());
1419 for (ValueEqualityComparisonCase &V : PredCases)
1420 NewSI->addCase(V.Value, V.Dest);
1421
1422 if (PredHasWeights || SuccHasWeights)
1423 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1424 /*ElideAllZero=*/true);
1425
1427
1428 // Okay, last check. If BB is still a successor of PSI, then we must
1429 // have an infinite loop case. If so, add an infinitely looping block
1430 // to handle the case to preserve the behavior of the code.
1431 BasicBlock *InfLoopBlock = nullptr;
1432 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1433 if (NewSI->getSuccessor(i) == BB) {
1434 if (!InfLoopBlock) {
1435 // Insert it at the end of the function, because it's either code,
1436 // or it won't matter if it's hot. :)
1437 InfLoopBlock =
1438 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1439 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1440 if (DTU)
1441 Updates.push_back(
1442 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1443 }
1444 NewSI->setSuccessor(i, InfLoopBlock);
1445 }
1446
1447 if (DTU) {
1448 if (InfLoopBlock)
1449 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1450
1451 Updates.push_back({DominatorTree::Delete, Pred, BB});
1452
1453 DTU->applyUpdates(Updates);
1454 }
1455
1456 ++NumFoldValueComparisonIntoPredecessors;
1457 return true;
1458}
1459
1460/// The specified terminator is a value equality comparison instruction
1461/// (either a switch or a branch on "X == c").
1462/// See if any of the predecessors of the terminator block are value comparisons
1463/// on the same value. If so, and if safe to do so, fold them together.
1464bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1465 IRBuilder<> &Builder) {
1466 BasicBlock *BB = TI->getParent();
1467 Value *CV = isValueEqualityComparison(TI); // CondVal
1468 assert(CV && "Not a comparison?");
1469
1470 bool Changed = false;
1471
1472 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1473 while (!Preds.empty()) {
1474 BasicBlock *Pred = Preds.pop_back_val();
1475 Instruction *PTI = Pred->getTerminator();
1476
1477 // Don't try to fold into itself.
1478 if (Pred == BB)
1479 continue;
1480
1481 // See if the predecessor is a comparison with the same value.
1482 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1483 if (PCV != CV)
1484 continue;
1485
1486 SmallSetVector<BasicBlock *, 4> FailBlocks;
1487 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1488 for (auto *Succ : FailBlocks) {
1489 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1490 return false;
1491 }
1492 }
1493
1494 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1495 Changed = true;
1496 }
1497 return Changed;
1498}
1499
1500// If we would need to insert a select that uses the value of this invoke
1501// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1502// need to do this), we can't hoist the invoke, as there is nowhere to put the
1503// select in this case.
1505 Instruction *I1, Instruction *I2) {
1506 for (BasicBlock *Succ : successors(BB1)) {
1507 for (const PHINode &PN : Succ->phis()) {
1508 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1509 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1510 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1511 return false;
1512 }
1513 }
1514 }
1515 return true;
1516}
1517
1518// Get interesting characteristics of instructions that
1519// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1520// instructions can be reordered across.
1526
1528 unsigned Flags = 0;
1529 if (I->mayReadFromMemory())
1530 Flags |= SkipReadMem;
1531 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1532 // inalloca) across stacksave/stackrestore boundaries.
1533 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1534 Flags |= SkipSideEffect;
1536 Flags |= SkipImplicitControlFlow;
1537 return Flags;
1538}
1539
1540// Returns true if it is safe to reorder an instruction across preceding
1541// instructions in a basic block.
1542static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1543 // Don't reorder a store over a load.
1544 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1545 return false;
1546
1547 // If we have seen an instruction with side effects, it's unsafe to reorder an
1548 // instruction which reads memory or itself has side effects.
1549 if ((Flags & SkipSideEffect) &&
1550 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1551 return false;
1552
1553 // Reordering across an instruction which does not necessarily transfer
1554 // control to the next instruction is speculation.
1556 return false;
1557
1558 // Hoisting of llvm.deoptimize is only legal together with the next return
1559 // instruction, which this pass is not always able to do.
1560 if (auto *CB = dyn_cast<CallBase>(I))
1561 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1562 return false;
1563
1564 // It's also unsafe/illegal to hoist an instruction above its instruction
1565 // operands
1566 BasicBlock *BB = I->getParent();
1567 for (Value *Op : I->operands()) {
1568 if (auto *J = dyn_cast<Instruction>(Op))
1569 if (J->getParent() == BB)
1570 return false;
1571 }
1572
1573 return true;
1574}
1575
1576static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1577
1578/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1579/// instructions \p I1 and \p I2 can and should be hoisted.
1581 const TargetTransformInfo &TTI) {
1582 // If we're going to hoist a call, make sure that the two instructions
1583 // we're commoning/hoisting are both marked with musttail, or neither of
1584 // them is marked as such. Otherwise, we might end up in a situation where
1585 // we hoist from a block where the terminator is a `ret` to a block where
1586 // the terminator is a `br`, and `musttail` calls expect to be followed by
1587 // a return.
1588 auto *C1 = dyn_cast<CallInst>(I1);
1589 auto *C2 = dyn_cast<CallInst>(I2);
1590 if (C1 && C2)
1591 if (C1->isMustTailCall() != C2->isMustTailCall())
1592 return false;
1593
1594 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1595 return false;
1596
1597 // If any of the two call sites has nomerge or convergent attribute, stop
1598 // hoisting.
1599 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1600 if (CB1->cannotMerge() || CB1->isConvergent())
1601 return false;
1602 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1603 if (CB2->cannotMerge() || CB2->isConvergent())
1604 return false;
1605
1606 return true;
1607}
1608
1609/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1610/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1611/// hoistCommonCodeFromSuccessors. e.g. The input:
1612/// I1 DVRs: { x, z },
1613/// OtherInsts: { I2 DVRs: { x, y, z } }
1614/// would result in hoisting only DbgVariableRecord x.
1616 Instruction *TI, Instruction *I1,
1617 SmallVectorImpl<Instruction *> &OtherInsts) {
1618 if (!I1->hasDbgRecords())
1619 return;
1620 using CurrentAndEndIt =
1621 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1622 // Vector of {Current, End} iterators.
1624 Itrs.reserve(OtherInsts.size() + 1);
1625 // Helper lambdas for lock-step checks:
1626 // Return true if this Current == End.
1627 auto atEnd = [](const CurrentAndEndIt &Pair) {
1628 return Pair.first == Pair.second;
1629 };
1630 // Return true if all Current are identical.
1631 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1632 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1634 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1635 });
1636 };
1637
1638 // Collect the iterators.
1639 Itrs.push_back(
1640 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1641 for (Instruction *Other : OtherInsts) {
1642 if (!Other->hasDbgRecords())
1643 return;
1644 Itrs.push_back(
1645 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1646 }
1647
1648 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1649 // the lock-step DbgRecord are identical, hoist all of them to TI.
1650 // This replicates the dbg.* intrinsic behaviour in
1651 // hoistCommonCodeFromSuccessors.
1652 while (none_of(Itrs, atEnd)) {
1653 bool HoistDVRs = allIdentical(Itrs);
1654 for (CurrentAndEndIt &Pair : Itrs) {
1655 // Increment Current iterator now as we may be about to move the
1656 // DbgRecord.
1657 DbgRecord &DR = *Pair.first++;
1658 if (HoistDVRs) {
1659 DR.removeFromParent();
1660 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1661 }
1662 }
1663 }
1664}
1665
1667 const Instruction *I2) {
1668 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1669 return true;
1670
1671 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1672 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1673 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1674 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1675 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1676
1677 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1678 return I1->getOperand(0) == I2->getOperand(1) &&
1679 I1->getOperand(1) == I2->getOperand(0) &&
1680 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1681 }
1682
1683 return false;
1684}
1685
1686/// If the target supports conditional faulting,
1687/// we look for the following pattern:
1688/// \code
1689/// BB:
1690/// ...
1691/// %cond = icmp ult %x, %y
1692/// br i1 %cond, label %TrueBB, label %FalseBB
1693/// FalseBB:
1694/// store i32 1, ptr %q, align 4
1695/// ...
1696/// TrueBB:
1697/// %maskedloadstore = load i32, ptr %b, align 4
1698/// store i32 %maskedloadstore, ptr %p, align 4
1699/// ...
1700/// \endcode
1701///
1702/// and transform it into:
1703///
1704/// \code
1705/// BB:
1706/// ...
1707/// %cond = icmp ult %x, %y
1708/// %maskedloadstore = cload i32, ptr %b, %cond
1709/// cstore i32 %maskedloadstore, ptr %p, %cond
1710/// cstore i32 1, ptr %q, ~%cond
1711/// br i1 %cond, label %TrueBB, label %FalseBB
1712/// FalseBB:
1713/// ...
1714/// TrueBB:
1715/// ...
1716/// \endcode
1717///
1718/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1719/// e.g.
1720///
1721/// \code
1722/// %vcond = bitcast i1 %cond to <1 x i1>
1723/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1724/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1725/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1726/// call void @llvm.masked.store.v1i32.p0
1727/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1728/// %cond.not = xor i1 %cond, true
1729/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1730/// call void @llvm.masked.store.v1i32.p0
1731/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1732/// \endcode
1733///
1734/// So we need to turn hoisted load/store into cload/cstore.
1735///
1736/// \param BI The branch instruction.
1737/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1738/// will be speculated.
1739/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1741 BranchInst *BI,
1742 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1743 std::optional<bool> Invert, Instruction *Sel) {
1744 auto &Context = BI->getParent()->getContext();
1745 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1746 auto *Cond = BI->getOperand(0);
1747 // Construct the condition if needed.
1748 BasicBlock *BB = BI->getParent();
1749 Value *Mask = nullptr;
1750 Value *MaskFalse = nullptr;
1751 Value *MaskTrue = nullptr;
1752 if (Invert.has_value()) {
1753 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1754 Mask = Builder.CreateBitCast(
1755 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1756 VCondTy);
1757 } else {
1758 IRBuilder<> Builder(BI);
1759 MaskFalse = Builder.CreateBitCast(
1760 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1761 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1762 }
1763 auto PeekThroughBitcasts = [](Value *V) {
1764 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1765 V = BitCast->getOperand(0);
1766 return V;
1767 };
1768 for (auto *I : SpeculatedConditionalLoadsStores) {
1769 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1770 if (!Invert.has_value())
1771 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1772 // We currently assume conditional faulting load/store is supported for
1773 // scalar types only when creating new instructions. This can be easily
1774 // extended for vector types in the future.
1775 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1776 auto *Op0 = I->getOperand(0);
1777 CallInst *MaskedLoadStore = nullptr;
1778 if (auto *LI = dyn_cast<LoadInst>(I)) {
1779 // Handle Load.
1780 auto *Ty = I->getType();
1781 PHINode *PN = nullptr;
1782 Value *PassThru = nullptr;
1783 if (Invert.has_value())
1784 for (User *U : I->users()) {
1785 if ((PN = dyn_cast<PHINode>(U))) {
1786 PassThru = Builder.CreateBitCast(
1787 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1788 FixedVectorType::get(Ty, 1));
1789 } else if (auto *Ins = cast<Instruction>(U);
1790 Sel && Ins->getParent() == BB) {
1791 // This happens when store or/and a speculative instruction between
1792 // load and store were hoisted to the BB. Make sure the masked load
1793 // inserted before its use.
1794 // We assume there's one of such use.
1795 Builder.SetInsertPoint(Ins);
1796 }
1797 }
1798 MaskedLoadStore = Builder.CreateMaskedLoad(
1799 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1800 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1801 if (PN)
1802 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1803 I->replaceAllUsesWith(NewLoadStore);
1804 } else {
1805 // Handle Store.
1806 auto *StoredVal = Builder.CreateBitCast(
1807 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1808 MaskedLoadStore = Builder.CreateMaskedStore(
1809 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1810 }
1811 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1812 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1813 //
1814 // !nonnull, !align : Not support pointer type, no need to keep.
1815 // !range: Load type is changed from scalar to vector, but the metadata on
1816 // vector specifies a per-element range, so the semantics stay the
1817 // same. Keep it.
1818 // !annotation: Not impact semantics. Keep it.
1819 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1820 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1821 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1822 // FIXME: DIAssignID is not supported for masked store yet.
1823 // (Verifier::visitDIAssignIDMetadata)
1825 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1826 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1827 });
1828 MaskedLoadStore->copyMetadata(*I);
1829 I->eraseFromParent();
1830 }
1831}
1832
1834 const TargetTransformInfo &TTI) {
1835 // Not handle volatile or atomic.
1836 bool IsStore = false;
1837 if (auto *L = dyn_cast<LoadInst>(I)) {
1838 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1839 return false;
1840 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1841 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1842 return false;
1843 IsStore = true;
1844 } else
1845 return false;
1846
1847 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1848 // That's why we have the alignment limitation.
1849 // FIXME: Update the prototype of the intrinsics?
1850 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1852}
1853
1854/// Hoist any common code in the successor blocks up into the block. This
1855/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1856/// given, only perform hoisting in case all successors blocks contain matching
1857/// instructions only. In that case, all instructions can be hoisted and the
1858/// original branch will be replaced and selects for PHIs are added.
1859bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1860 bool AllInstsEqOnly) {
1861 // This does very trivial matching, with limited scanning, to find identical
1862 // instructions in the two blocks. In particular, we don't want to get into
1863 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1864 // such, we currently just scan for obviously identical instructions in an
1865 // identical order, possibly separated by the same number of non-identical
1866 // instructions.
1867 BasicBlock *BB = TI->getParent();
1868 unsigned int SuccSize = succ_size(BB);
1869 if (SuccSize < 2)
1870 return false;
1871
1872 // If either of the blocks has it's address taken, then we can't do this fold,
1873 // because the code we'd hoist would no longer run when we jump into the block
1874 // by it's address.
1875 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1876 for (auto *Succ : UniqueSuccessors) {
1877 if (Succ->hasAddressTaken())
1878 return false;
1879 // Use getUniquePredecessor instead of getSinglePredecessor to support
1880 // multi-cases successors in switch.
1881 if (Succ->getUniquePredecessor())
1882 continue;
1883 // If Succ has >1 predecessors, continue to check if the Succ contains only
1884 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1885 // can relax the condition based on the assumptiom that the program would
1886 // never enter Succ and trigger such an UB.
1887 if (isa<UnreachableInst>(*Succ->begin()))
1888 continue;
1889 return false;
1890 }
1891 // The second of pair is a SkipFlags bitmask.
1892 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1893 SmallVector<SuccIterPair, 8> SuccIterPairs;
1894 for (auto *Succ : UniqueSuccessors) {
1895 BasicBlock::iterator SuccItr = Succ->begin();
1896 if (isa<PHINode>(*SuccItr))
1897 return false;
1898 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1899 }
1900
1901 if (AllInstsEqOnly) {
1902 // Check if all instructions in the successor blocks match. This allows
1903 // hoisting all instructions and removing the blocks we are hoisting from,
1904 // so does not add any new instructions.
1905
1906 // Check if sizes and terminators of all successors match.
1907 unsigned Size0 = UniqueSuccessors[0]->size();
1908 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1909 bool AllSame =
1910 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1911 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1912 Succ->size() == Size0;
1913 });
1914 if (!AllSame)
1915 return false;
1916 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1917 while (LRI.isValid()) {
1918 Instruction *I0 = (*LRI)[0];
1919 if (any_of(*LRI, [I0](Instruction *I) {
1920 return !areIdenticalUpToCommutativity(I0, I);
1921 })) {
1922 return false;
1923 }
1924 --LRI;
1925 }
1926 // Now we know that all instructions in all successors can be hoisted. Let
1927 // the loop below handle the hoisting.
1928 }
1929
1930 // Count how many instructions were not hoisted so far. There's a limit on how
1931 // many instructions we skip, serving as a compilation time control as well as
1932 // preventing excessive increase of life ranges.
1933 unsigned NumSkipped = 0;
1934 // If we find an unreachable instruction at the beginning of a basic block, we
1935 // can still hoist instructions from the rest of the basic blocks.
1936 if (SuccIterPairs.size() > 2) {
1937 erase_if(SuccIterPairs,
1938 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1939 if (SuccIterPairs.size() < 2)
1940 return false;
1941 }
1942
1943 bool Changed = false;
1944
1945 for (;;) {
1946 auto *SuccIterPairBegin = SuccIterPairs.begin();
1947 auto &BB1ItrPair = *SuccIterPairBegin++;
1948 auto OtherSuccIterPairRange =
1949 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1950 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1951
1952 Instruction *I1 = &*BB1ItrPair.first;
1953
1954 bool AllInstsAreIdentical = true;
1955 bool HasTerminator = I1->isTerminator();
1956 for (auto &SuccIter : OtherSuccIterRange) {
1957 Instruction *I2 = &*SuccIter;
1958 HasTerminator |= I2->isTerminator();
1959 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1960 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1961 AllInstsAreIdentical = false;
1962 }
1963
1964 SmallVector<Instruction *, 8> OtherInsts;
1965 for (auto &SuccIter : OtherSuccIterRange)
1966 OtherInsts.push_back(&*SuccIter);
1967
1968 // If we are hoisting the terminator instruction, don't move one (making a
1969 // broken BB), instead clone it, and remove BI.
1970 if (HasTerminator) {
1971 // Even if BB, which contains only one unreachable instruction, is ignored
1972 // at the beginning of the loop, we can hoist the terminator instruction.
1973 // If any instructions remain in the block, we cannot hoist terminators.
1974 if (NumSkipped || !AllInstsAreIdentical) {
1975 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1976 return Changed;
1977 }
1978
1979 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1980 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1981 Changed;
1982 }
1983
1984 if (AllInstsAreIdentical) {
1985 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1986 AllInstsAreIdentical =
1987 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1988 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1989 Instruction *I2 = &*Pair.first;
1990 unsigned SkipFlagsBB2 = Pair.second;
1991 // Even if the instructions are identical, it may not
1992 // be safe to hoist them if we have skipped over
1993 // instructions with side effects or their operands
1994 // weren't hoisted.
1995 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1997 });
1998 }
1999
2000 if (AllInstsAreIdentical) {
2001 BB1ItrPair.first++;
2002 // For a normal instruction, we just move one to right before the
2003 // branch, then replace all uses of the other with the first. Finally,
2004 // we remove the now redundant second instruction.
2005 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2006 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2007 // and leave any that were not hoisted behind (by calling moveBefore
2008 // rather than moveBeforePreserving).
2009 I1->moveBefore(TI->getIterator());
2010 for (auto &SuccIter : OtherSuccIterRange) {
2011 Instruction *I2 = &*SuccIter++;
2012 assert(I2 != I1);
2013 if (!I2->use_empty())
2014 I2->replaceAllUsesWith(I1);
2015 I1->andIRFlags(I2);
2016 if (auto *CB = dyn_cast<CallBase>(I1)) {
2017 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2018 assert(Success && "We should not be trying to hoist callbases "
2019 "with non-intersectable attributes");
2020 // For NDEBUG Compile.
2021 (void)Success;
2022 }
2023
2024 combineMetadataForCSE(I1, I2, true);
2025 // I1 and I2 are being combined into a single instruction. Its debug
2026 // location is the merged locations of the original instructions.
2027 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2028 I2->eraseFromParent();
2029 }
2030 if (!Changed)
2031 NumHoistCommonCode += SuccIterPairs.size();
2032 Changed = true;
2033 NumHoistCommonInstrs += SuccIterPairs.size();
2034 } else {
2035 if (NumSkipped >= HoistCommonSkipLimit) {
2036 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2037 return Changed;
2038 }
2039 // We are about to skip over a pair of non-identical instructions. Record
2040 // if any have characteristics that would prevent reordering instructions
2041 // across them.
2042 for (auto &SuccIterPair : SuccIterPairs) {
2043 Instruction *I = &*SuccIterPair.first++;
2044 SuccIterPair.second |= skippedInstrFlags(I);
2045 }
2046 ++NumSkipped;
2047 }
2048 }
2049}
2050
2051bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2052 Instruction *TI, Instruction *I1,
2053 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2054 ArrayRef<BasicBlock *> UniqueSuccessors) {
2055
2056 auto *BI = dyn_cast<BranchInst>(TI);
2057
2058 bool Changed = false;
2059 BasicBlock *TIParent = TI->getParent();
2060 BasicBlock *BB1 = I1->getParent();
2061
2062 // Use only for an if statement.
2063 auto *I2 = *OtherSuccTIs.begin();
2064 auto *BB2 = I2->getParent();
2065 if (BI) {
2066 assert(OtherSuccTIs.size() == 1);
2067 assert(BI->getSuccessor(0) == I1->getParent());
2068 assert(BI->getSuccessor(1) == I2->getParent());
2069 }
2070
2071 // In the case of an if statement, we try to hoist an invoke.
2072 // FIXME: Can we define a safety predicate for CallBr?
2073 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2074 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2075 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2076 return false;
2077
2078 // TODO: callbr hoisting currently disabled pending further study.
2079 if (isa<CallBrInst>(I1))
2080 return false;
2081
2082 for (BasicBlock *Succ : successors(BB1)) {
2083 for (PHINode &PN : Succ->phis()) {
2084 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2085 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2086 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2087 if (BB1V == BB2V)
2088 continue;
2089
2090 // In the case of an if statement, check for
2091 // passingValueIsAlwaysUndefined here because we would rather eliminate
2092 // undefined control flow then converting it to a select.
2093 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2095 return false;
2096 }
2097 }
2098 }
2099
2100 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2101 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2102 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2103 // Clone the terminator and hoist it into the pred, without any debug info.
2104 Instruction *NT = I1->clone();
2105 NT->insertInto(TIParent, TI->getIterator());
2106 if (!NT->getType()->isVoidTy()) {
2107 I1->replaceAllUsesWith(NT);
2108 for (Instruction *OtherSuccTI : OtherSuccTIs)
2109 OtherSuccTI->replaceAllUsesWith(NT);
2110 NT->takeName(I1);
2111 }
2112 Changed = true;
2113 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2114
2115 // Ensure terminator gets a debug location, even an unknown one, in case
2116 // it involves inlinable calls.
2118 Locs.push_back(I1->getDebugLoc());
2119 for (auto *OtherSuccTI : OtherSuccTIs)
2120 Locs.push_back(OtherSuccTI->getDebugLoc());
2121 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2122
2123 // PHIs created below will adopt NT's merged DebugLoc.
2124 IRBuilder<NoFolder> Builder(NT);
2125
2126 // In the case of an if statement, hoisting one of the terminators from our
2127 // successor is a great thing. Unfortunately, the successors of the if/else
2128 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2129 // must agree for all PHI nodes, so we insert select instruction to compute
2130 // the final result.
2131 if (BI) {
2132 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2133 for (BasicBlock *Succ : successors(BB1)) {
2134 for (PHINode &PN : Succ->phis()) {
2135 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2136 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2137 if (BB1V == BB2V)
2138 continue;
2139
2140 // These values do not agree. Insert a select instruction before NT
2141 // that determines the right value.
2142 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2143 if (!SI) {
2144 // Propagate fast-math-flags from phi node to its replacement select.
2146 BI->getCondition(), BB1V, BB2V,
2147 isa<FPMathOperator>(PN) ? &PN : nullptr,
2148 BB1V->getName() + "." + BB2V->getName(), BI));
2149 }
2150
2151 // Make the PHI node use the select for all incoming values for BB1/BB2
2152 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2153 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2154 PN.setIncomingValue(i, SI);
2155 }
2156 }
2157 }
2158
2160
2161 // Update any PHI nodes in our new successors.
2162 for (BasicBlock *Succ : successors(BB1)) {
2163 addPredecessorToBlock(Succ, TIParent, BB1);
2164 if (DTU)
2165 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2166 }
2167
2168 if (DTU) {
2169 // TI might be a switch with multi-cases destination, so we need to care for
2170 // the duplication of successors.
2171 for (BasicBlock *Succ : UniqueSuccessors)
2172 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2173 }
2174
2176 if (DTU)
2177 DTU->applyUpdates(Updates);
2178 return Changed;
2179}
2180
2181// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2182// into variables.
2184 int OpIdx) {
2185 // Divide/Remainder by constant is typically much cheaper than by variable.
2186 if (I->isIntDivRem())
2187 return OpIdx != 1;
2188 return !isa<IntrinsicInst>(I);
2189}
2190
2191// All instructions in Insts belong to different blocks that all unconditionally
2192// branch to a common successor. Analyze each instruction and return true if it
2193// would be possible to sink them into their successor, creating one common
2194// instruction instead. For every value that would be required to be provided by
2195// PHI node (because an operand varies in each input block), add to PHIOperands.
2198 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2199 // Prune out obviously bad instructions to move. Each instruction must have
2200 // the same number of uses, and we check later that the uses are consistent.
2201 std::optional<unsigned> NumUses;
2202 for (auto *I : Insts) {
2203 // These instructions may change or break semantics if moved.
2204 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2205 I->getType()->isTokenTy())
2206 return false;
2207
2208 // Do not try to sink an instruction in an infinite loop - it can cause
2209 // this algorithm to infinite loop.
2210 if (I->getParent()->getSingleSuccessor() == I->getParent())
2211 return false;
2212
2213 // Conservatively return false if I is an inline-asm instruction. Sinking
2214 // and merging inline-asm instructions can potentially create arguments
2215 // that cannot satisfy the inline-asm constraints.
2216 // If the instruction has nomerge or convergent attribute, return false.
2217 if (const auto *C = dyn_cast<CallBase>(I))
2218 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2219 return false;
2220
2221 if (!NumUses)
2222 NumUses = I->getNumUses();
2223 else if (NumUses != I->getNumUses())
2224 return false;
2225 }
2226
2227 const Instruction *I0 = Insts.front();
2228 const auto I0MMRA = MMRAMetadata(*I0);
2229 for (auto *I : Insts) {
2230 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2231 return false;
2232
2233 // Treat MMRAs conservatively. This pass can be quite aggressive and
2234 // could drop a lot of MMRAs otherwise.
2235 if (MMRAMetadata(*I) != I0MMRA)
2236 return false;
2237 }
2238
2239 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2240 // then the other phi operands must match the instructions from Insts. This
2241 // also has to hold true for any phi nodes that would be created as a result
2242 // of sinking. Both of these cases are represented by PhiOperands.
2243 for (const Use &U : I0->uses()) {
2244 auto It = PHIOperands.find(&U);
2245 if (It == PHIOperands.end())
2246 // There may be uses in other blocks when sinking into a loop header.
2247 return false;
2248 if (!equal(Insts, It->second))
2249 return false;
2250 }
2251
2252 // For calls to be sinkable, they must all be indirect, or have same callee.
2253 // I.e. if we have two direct calls to different callees, we don't want to
2254 // turn that into an indirect call. Likewise, if we have an indirect call,
2255 // and a direct call, we don't actually want to have a single indirect call.
2256 if (isa<CallBase>(I0)) {
2257 auto IsIndirectCall = [](const Instruction *I) {
2258 return cast<CallBase>(I)->isIndirectCall();
2259 };
2260 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2261 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2262 if (HaveIndirectCalls) {
2263 if (!AllCallsAreIndirect)
2264 return false;
2265 } else {
2266 // All callees must be identical.
2267 Value *Callee = nullptr;
2268 for (const Instruction *I : Insts) {
2269 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2270 if (!Callee)
2271 Callee = CurrCallee;
2272 else if (Callee != CurrCallee)
2273 return false;
2274 }
2275 }
2276 }
2277
2278 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2279 Value *Op = I0->getOperand(OI);
2280 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2281 assert(I->getNumOperands() == I0->getNumOperands());
2282 return I->getOperand(OI) == I0->getOperand(OI);
2283 };
2284 if (!all_of(Insts, SameAsI0)) {
2287 // We can't create a PHI from this GEP.
2288 return false;
2289 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2290 for (auto *I : Insts)
2291 Ops.push_back(I->getOperand(OI));
2292 }
2293 }
2294 return true;
2295}
2296
2297// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2298// instruction of every block in Blocks to their common successor, commoning
2299// into one instruction.
2301 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2302
2303 // canSinkInstructions returning true guarantees that every block has at
2304 // least one non-terminator instruction.
2306 for (auto *BB : Blocks) {
2307 Instruction *I = BB->getTerminator();
2308 I = I->getPrevNode();
2309 Insts.push_back(I);
2310 }
2311
2312 // We don't need to do any more checking here; canSinkInstructions should
2313 // have done it all for us.
2314 SmallVector<Value*, 4> NewOperands;
2315 Instruction *I0 = Insts.front();
2316 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2317 // This check is different to that in canSinkInstructions. There, we
2318 // cared about the global view once simplifycfg (and instcombine) have
2319 // completed - it takes into account PHIs that become trivially
2320 // simplifiable. However here we need a more local view; if an operand
2321 // differs we create a PHI and rely on instcombine to clean up the very
2322 // small mess we may make.
2323 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2324 return I->getOperand(O) != I0->getOperand(O);
2325 });
2326 if (!NeedPHI) {
2327 NewOperands.push_back(I0->getOperand(O));
2328 continue;
2329 }
2330
2331 // Create a new PHI in the successor block and populate it.
2332 auto *Op = I0->getOperand(O);
2333 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2334 auto *PN =
2335 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2336 PN->insertBefore(BBEnd->begin());
2337 for (auto *I : Insts)
2338 PN->addIncoming(I->getOperand(O), I->getParent());
2339 NewOperands.push_back(PN);
2340 }
2341
2342 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2343 // and move it to the start of the successor block.
2344 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2345 I0->getOperandUse(O).set(NewOperands[O]);
2346
2347 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2348
2349 // Update metadata and IR flags, and merge debug locations.
2350 for (auto *I : Insts)
2351 if (I != I0) {
2352 // The debug location for the "common" instruction is the merged locations
2353 // of all the commoned instructions. We start with the original location
2354 // of the "common" instruction and iteratively merge each location in the
2355 // loop below.
2356 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2357 // However, as N-way merge for CallInst is rare, so we use simplified API
2358 // instead of using complex API for N-way merge.
2359 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2360 combineMetadataForCSE(I0, I, true);
2361 I0->andIRFlags(I);
2362 if (auto *CB = dyn_cast<CallBase>(I0)) {
2363 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2364 assert(Success && "We should not be trying to sink callbases "
2365 "with non-intersectable attributes");
2366 // For NDEBUG Compile.
2367 (void)Success;
2368 }
2369 }
2370
2371 for (User *U : make_early_inc_range(I0->users())) {
2372 // canSinkLastInstruction checked that all instructions are only used by
2373 // phi nodes in a way that allows replacing the phi node with the common
2374 // instruction.
2375 auto *PN = cast<PHINode>(U);
2376 PN->replaceAllUsesWith(I0);
2377 PN->eraseFromParent();
2378 }
2379
2380 // Finally nuke all instructions apart from the common instruction.
2381 for (auto *I : Insts) {
2382 if (I == I0)
2383 continue;
2384 // The remaining uses are debug users, replace those with the common inst.
2385 // In most (all?) cases this just introduces a use-before-def.
2386 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2387 I->replaceAllUsesWith(I0);
2388 I->eraseFromParent();
2389 }
2390}
2391
2392/// Check whether BB's predecessors end with unconditional branches. If it is
2393/// true, sink any common code from the predecessors to BB.
2395 DomTreeUpdater *DTU) {
2396 // We support two situations:
2397 // (1) all incoming arcs are unconditional
2398 // (2) there are non-unconditional incoming arcs
2399 //
2400 // (2) is very common in switch defaults and
2401 // else-if patterns;
2402 //
2403 // if (a) f(1);
2404 // else if (b) f(2);
2405 //
2406 // produces:
2407 //
2408 // [if]
2409 // / \
2410 // [f(1)] [if]
2411 // | | \
2412 // | | |
2413 // | [f(2)]|
2414 // \ | /
2415 // [ end ]
2416 //
2417 // [end] has two unconditional predecessor arcs and one conditional. The
2418 // conditional refers to the implicit empty 'else' arc. This conditional
2419 // arc can also be caused by an empty default block in a switch.
2420 //
2421 // In this case, we attempt to sink code from all *unconditional* arcs.
2422 // If we can sink instructions from these arcs (determined during the scan
2423 // phase below) we insert a common successor for all unconditional arcs and
2424 // connect that to [end], to enable sinking:
2425 //
2426 // [if]
2427 // / \
2428 // [x(1)] [if]
2429 // | | \
2430 // | | \
2431 // | [x(2)] |
2432 // \ / |
2433 // [sink.split] |
2434 // \ /
2435 // [ end ]
2436 //
2437 SmallVector<BasicBlock*,4> UnconditionalPreds;
2438 bool HaveNonUnconditionalPredecessors = false;
2439 for (auto *PredBB : predecessors(BB)) {
2440 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2441 if (PredBr && PredBr->isUnconditional())
2442 UnconditionalPreds.push_back(PredBB);
2443 else
2444 HaveNonUnconditionalPredecessors = true;
2445 }
2446 if (UnconditionalPreds.size() < 2)
2447 return false;
2448
2449 // We take a two-step approach to tail sinking. First we scan from the end of
2450 // each block upwards in lockstep. If the n'th instruction from the end of each
2451 // block can be sunk, those instructions are added to ValuesToSink and we
2452 // carry on. If we can sink an instruction but need to PHI-merge some operands
2453 // (because they're not identical in each instruction) we add these to
2454 // PHIOperands.
2455 // We prepopulate PHIOperands with the phis that already exist in BB.
2457 for (PHINode &PN : BB->phis()) {
2459 for (const Use &U : PN.incoming_values())
2460 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2461 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2462 for (BasicBlock *Pred : UnconditionalPreds)
2463 Ops.push_back(*IncomingVals[Pred]);
2464 }
2465
2466 int ScanIdx = 0;
2467 SmallPtrSet<Value*,4> InstructionsToSink;
2468 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2469 while (LRI.isValid() &&
2470 canSinkInstructions(*LRI, PHIOperands)) {
2471 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2472 << "\n");
2473 InstructionsToSink.insert_range(*LRI);
2474 ++ScanIdx;
2475 --LRI;
2476 }
2477
2478 // If no instructions can be sunk, early-return.
2479 if (ScanIdx == 0)
2480 return false;
2481
2482 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2483
2484 if (!followedByDeoptOrUnreachable) {
2485 // Check whether this is the pointer operand of a load/store.
2486 auto IsMemOperand = [](Use &U) {
2487 auto *I = cast<Instruction>(U.getUser());
2488 if (isa<LoadInst>(I))
2489 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2490 if (isa<StoreInst>(I))
2491 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2492 return false;
2493 };
2494
2495 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2496 // actually sink before encountering instruction that is unprofitable to
2497 // sink?
2498 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2499 unsigned NumPHIInsts = 0;
2500 for (Use &U : (*LRI)[0]->operands()) {
2501 auto It = PHIOperands.find(&U);
2502 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2503 return InstructionsToSink.contains(V);
2504 })) {
2505 ++NumPHIInsts;
2506 // Do not separate a load/store from the gep producing the address.
2507 // The gep can likely be folded into the load/store as an addressing
2508 // mode. Additionally, a load of a gep is easier to analyze than a
2509 // load of a phi.
2510 if (IsMemOperand(U) &&
2511 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2512 return false;
2513 // FIXME: this check is overly optimistic. We may end up not sinking
2514 // said instruction, due to the very same profitability check.
2515 // See @creating_too_many_phis in sink-common-code.ll.
2516 }
2517 }
2518 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2519 return NumPHIInsts <= 1;
2520 };
2521
2522 // We've determined that we are going to sink last ScanIdx instructions,
2523 // and recorded them in InstructionsToSink. Now, some instructions may be
2524 // unprofitable to sink. But that determination depends on the instructions
2525 // that we are going to sink.
2526
2527 // First, forward scan: find the first instruction unprofitable to sink,
2528 // recording all the ones that are profitable to sink.
2529 // FIXME: would it be better, after we detect that not all are profitable.
2530 // to either record the profitable ones, or erase the unprofitable ones?
2531 // Maybe we need to choose (at runtime) the one that will touch least
2532 // instrs?
2533 LRI.reset();
2534 int Idx = 0;
2535 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2536 while (Idx < ScanIdx) {
2537 if (!ProfitableToSinkInstruction(LRI)) {
2538 // Too many PHIs would be created.
2539 LLVM_DEBUG(
2540 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2541 break;
2542 }
2543 InstructionsProfitableToSink.insert_range(*LRI);
2544 --LRI;
2545 ++Idx;
2546 }
2547
2548 // If no instructions can be sunk, early-return.
2549 if (Idx == 0)
2550 return false;
2551
2552 // Did we determine that (only) some instructions are unprofitable to sink?
2553 if (Idx < ScanIdx) {
2554 // Okay, some instructions are unprofitable.
2555 ScanIdx = Idx;
2556 InstructionsToSink = InstructionsProfitableToSink;
2557
2558 // But, that may make other instructions unprofitable, too.
2559 // So, do a backward scan, do any earlier instructions become
2560 // unprofitable?
2561 assert(
2562 !ProfitableToSinkInstruction(LRI) &&
2563 "We already know that the last instruction is unprofitable to sink");
2564 ++LRI;
2565 --Idx;
2566 while (Idx >= 0) {
2567 // If we detect that an instruction becomes unprofitable to sink,
2568 // all earlier instructions won't be sunk either,
2569 // so preemptively keep InstructionsProfitableToSink in sync.
2570 // FIXME: is this the most performant approach?
2571 for (auto *I : *LRI)
2572 InstructionsProfitableToSink.erase(I);
2573 if (!ProfitableToSinkInstruction(LRI)) {
2574 // Everything starting with this instruction won't be sunk.
2575 ScanIdx = Idx;
2576 InstructionsToSink = InstructionsProfitableToSink;
2577 }
2578 ++LRI;
2579 --Idx;
2580 }
2581 }
2582
2583 // If no instructions can be sunk, early-return.
2584 if (ScanIdx == 0)
2585 return false;
2586 }
2587
2588 bool Changed = false;
2589
2590 if (HaveNonUnconditionalPredecessors) {
2591 if (!followedByDeoptOrUnreachable) {
2592 // It is always legal to sink common instructions from unconditional
2593 // predecessors. However, if not all predecessors are unconditional,
2594 // this transformation might be pessimizing. So as a rule of thumb,
2595 // don't do it unless we'd sink at least one non-speculatable instruction.
2596 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2597 LRI.reset();
2598 int Idx = 0;
2599 bool Profitable = false;
2600 while (Idx < ScanIdx) {
2601 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2602 Profitable = true;
2603 break;
2604 }
2605 --LRI;
2606 ++Idx;
2607 }
2608 if (!Profitable)
2609 return false;
2610 }
2611
2612 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2613 // We have a conditional edge and we're going to sink some instructions.
2614 // Insert a new block postdominating all blocks we're going to sink from.
2615 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2616 // Edges couldn't be split.
2617 return false;
2618 Changed = true;
2619 }
2620
2621 // Now that we've analyzed all potential sinking candidates, perform the
2622 // actual sink. We iteratively sink the last non-terminator of the source
2623 // blocks into their common successor unless doing so would require too
2624 // many PHI instructions to be generated (currently only one PHI is allowed
2625 // per sunk instruction).
2626 //
2627 // We can use InstructionsToSink to discount values needing PHI-merging that will
2628 // actually be sunk in a later iteration. This allows us to be more
2629 // aggressive in what we sink. This does allow a false positive where we
2630 // sink presuming a later value will also be sunk, but stop half way through
2631 // and never actually sink it which means we produce more PHIs than intended.
2632 // This is unlikely in practice though.
2633 int SinkIdx = 0;
2634 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2635 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2636 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2637 << "\n");
2638
2639 // Because we've sunk every instruction in turn, the current instruction to
2640 // sink is always at index 0.
2641 LRI.reset();
2642
2643 sinkLastInstruction(UnconditionalPreds);
2644 NumSinkCommonInstrs++;
2645 Changed = true;
2646 }
2647 if (SinkIdx != 0)
2648 ++NumSinkCommonCode;
2649 return Changed;
2650}
2651
2652namespace {
2653
2654struct CompatibleSets {
2655 using SetTy = SmallVector<InvokeInst *, 2>;
2656
2658
2659 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2660
2661 SetTy &getCompatibleSet(InvokeInst *II);
2662
2663 void insert(InvokeInst *II);
2664};
2665
2666CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2667 // Perform a linear scan over all the existing sets, see if the new `invoke`
2668 // is compatible with any particular set. Since we know that all the `invokes`
2669 // within a set are compatible, only check the first `invoke` in each set.
2670 // WARNING: at worst, this has quadratic complexity.
2671 for (CompatibleSets::SetTy &Set : Sets) {
2672 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2673 return Set;
2674 }
2675
2676 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2677 return Sets.emplace_back();
2678}
2679
2680void CompatibleSets::insert(InvokeInst *II) {
2681 getCompatibleSet(II).emplace_back(II);
2682}
2683
2684bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2685 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2686
2687 // Can we theoretically merge these `invoke`s?
2688 auto IsIllegalToMerge = [](InvokeInst *II) {
2689 return II->cannotMerge() || II->isInlineAsm();
2690 };
2691 if (any_of(Invokes, IsIllegalToMerge))
2692 return false;
2693
2694 // Either both `invoke`s must be direct,
2695 // or both `invoke`s must be indirect.
2696 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2697 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2698 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2699 if (HaveIndirectCalls) {
2700 if (!AllCallsAreIndirect)
2701 return false;
2702 } else {
2703 // All callees must be identical.
2704 Value *Callee = nullptr;
2705 for (InvokeInst *II : Invokes) {
2706 Value *CurrCallee = II->getCalledOperand();
2707 assert(CurrCallee && "There is always a called operand.");
2708 if (!Callee)
2709 Callee = CurrCallee;
2710 else if (Callee != CurrCallee)
2711 return false;
2712 }
2713 }
2714
2715 // Either both `invoke`s must not have a normal destination,
2716 // or both `invoke`s must have a normal destination,
2717 auto HasNormalDest = [](InvokeInst *II) {
2718 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2719 };
2720 if (any_of(Invokes, HasNormalDest)) {
2721 // Do not merge `invoke` that does not have a normal destination with one
2722 // that does have a normal destination, even though doing so would be legal.
2723 if (!all_of(Invokes, HasNormalDest))
2724 return false;
2725
2726 // All normal destinations must be identical.
2727 BasicBlock *NormalBB = nullptr;
2728 for (InvokeInst *II : Invokes) {
2729 BasicBlock *CurrNormalBB = II->getNormalDest();
2730 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2731 if (!NormalBB)
2732 NormalBB = CurrNormalBB;
2733 else if (NormalBB != CurrNormalBB)
2734 return false;
2735 }
2736
2737 // In the normal destination, the incoming values for these two `invoke`s
2738 // must be compatible.
2739 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2741 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2742 &EquivalenceSet))
2743 return false;
2744 }
2745
2746#ifndef NDEBUG
2747 // All unwind destinations must be identical.
2748 // We know that because we have started from said unwind destination.
2749 BasicBlock *UnwindBB = nullptr;
2750 for (InvokeInst *II : Invokes) {
2751 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2752 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2753 if (!UnwindBB)
2754 UnwindBB = CurrUnwindBB;
2755 else
2756 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2757 }
2758#endif
2759
2760 // In the unwind destination, the incoming values for these two `invoke`s
2761 // must be compatible.
2763 Invokes.front()->getUnwindDest(),
2764 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2765 return false;
2766
2767 // Ignoring arguments, these `invoke`s must be identical,
2768 // including operand bundles.
2769 const InvokeInst *II0 = Invokes.front();
2770 for (auto *II : Invokes.drop_front())
2771 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2772 return false;
2773
2774 // Can we theoretically form the data operands for the merged `invoke`?
2775 auto IsIllegalToMergeArguments = [](auto Ops) {
2776 Use &U0 = std::get<0>(Ops);
2777 Use &U1 = std::get<1>(Ops);
2778 if (U0 == U1)
2779 return false;
2781 U0.getOperandNo());
2782 };
2783 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2784 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2785 IsIllegalToMergeArguments))
2786 return false;
2787
2788 return true;
2789}
2790
2791} // namespace
2792
2793// Merge all invokes in the provided set, all of which are compatible
2794// as per the `CompatibleSets::shouldBelongToSameSet()`.
2796 DomTreeUpdater *DTU) {
2797 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2798
2800 if (DTU)
2801 Updates.reserve(2 + 3 * Invokes.size());
2802
2803 bool HasNormalDest =
2804 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2805
2806 // Clone one of the invokes into a new basic block.
2807 // Since they are all compatible, it doesn't matter which invoke is cloned.
2808 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2809 InvokeInst *II0 = Invokes.front();
2810 BasicBlock *II0BB = II0->getParent();
2811 BasicBlock *InsertBeforeBlock =
2812 II0->getParent()->getIterator()->getNextNode();
2813 Function *Func = II0BB->getParent();
2814 LLVMContext &Ctx = II0->getContext();
2815
2816 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2817 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2818
2819 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2820 // NOTE: all invokes have the same attributes, so no handling needed.
2821 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2822
2823 if (!HasNormalDest) {
2824 // This set does not have a normal destination,
2825 // so just form a new block with unreachable terminator.
2826 BasicBlock *MergedNormalDest = BasicBlock::Create(
2827 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2828 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2829 UI->setDebugLoc(DebugLoc::getTemporary());
2830 MergedInvoke->setNormalDest(MergedNormalDest);
2831 }
2832
2833 // The unwind destination, however, remainds identical for all invokes here.
2834
2835 return MergedInvoke;
2836 }();
2837
2838 if (DTU) {
2839 // Predecessor blocks that contained these invokes will now branch to
2840 // the new block that contains the merged invoke, ...
2841 for (InvokeInst *II : Invokes)
2842 Updates.push_back(
2843 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2844
2845 // ... which has the new `unreachable` block as normal destination,
2846 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2847 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2848 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2849 SuccBBOfMergedInvoke});
2850
2851 // Since predecessor blocks now unconditionally branch to a new block,
2852 // they no longer branch to their original successors.
2853 for (InvokeInst *II : Invokes)
2854 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2855 Updates.push_back(
2856 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2857 }
2858
2859 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2860
2861 // Form the merged operands for the merged invoke.
2862 for (Use &U : MergedInvoke->operands()) {
2863 // Only PHI together the indirect callees and data operands.
2864 if (MergedInvoke->isCallee(&U)) {
2865 if (!IsIndirectCall)
2866 continue;
2867 } else if (!MergedInvoke->isDataOperand(&U))
2868 continue;
2869
2870 // Don't create trivial PHI's with all-identical incoming values.
2871 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2872 return II->getOperand(U.getOperandNo()) != U.get();
2873 });
2874 if (!NeedPHI)
2875 continue;
2876
2877 // Form a PHI out of all the data ops under this index.
2879 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2880 for (InvokeInst *II : Invokes)
2881 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2882
2883 U.set(PN);
2884 }
2885
2886 // We've ensured that each PHI node has compatible (identical) incoming values
2887 // when coming from each of the `invoke`s in the current merge set,
2888 // so update the PHI nodes accordingly.
2889 for (BasicBlock *Succ : successors(MergedInvoke))
2890 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2891 /*ExistPred=*/Invokes.front()->getParent());
2892
2893 // And finally, replace the original `invoke`s with an unconditional branch
2894 // to the block with the merged `invoke`. Also, give that merged `invoke`
2895 // the merged debugloc of all the original `invoke`s.
2896 DILocation *MergedDebugLoc = nullptr;
2897 for (InvokeInst *II : Invokes) {
2898 // Compute the debug location common to all the original `invoke`s.
2899 if (!MergedDebugLoc)
2900 MergedDebugLoc = II->getDebugLoc();
2901 else
2902 MergedDebugLoc =
2903 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2904
2905 // And replace the old `invoke` with an unconditionally branch
2906 // to the block with the merged `invoke`.
2907 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2908 OrigSuccBB->removePredecessor(II->getParent());
2909 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2910 // The unconditional branch is part of the replacement for the original
2911 // invoke, so should use its DebugLoc.
2912 BI->setDebugLoc(II->getDebugLoc());
2913 bool Success = MergedInvoke->tryIntersectAttributes(II);
2914 assert(Success && "Merged invokes with incompatible attributes");
2915 // For NDEBUG Compile
2916 (void)Success;
2917 II->replaceAllUsesWith(MergedInvoke);
2918 II->eraseFromParent();
2919 ++NumInvokesMerged;
2920 }
2921 MergedInvoke->setDebugLoc(MergedDebugLoc);
2922 ++NumInvokeSetsFormed;
2923
2924 if (DTU)
2925 DTU->applyUpdates(Updates);
2926}
2927
2928/// If this block is a `landingpad` exception handling block, categorize all
2929/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2930/// being "mergeable" together, and then merge invokes in each set together.
2931///
2932/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2933/// [...] [...]
2934/// | |
2935/// [invoke0] [invoke1]
2936/// / \ / \
2937/// [cont0] [landingpad] [cont1]
2938/// to:
2939/// [...] [...]
2940/// \ /
2941/// [invoke]
2942/// / \
2943/// [cont] [landingpad]
2944///
2945/// But of course we can only do that if the invokes share the `landingpad`,
2946/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2947/// and the invoked functions are "compatible".
2950 return false;
2951
2952 bool Changed = false;
2953
2954 // FIXME: generalize to all exception handling blocks?
2955 if (!BB->isLandingPad())
2956 return Changed;
2957
2958 CompatibleSets Grouper;
2959
2960 // Record all the predecessors of this `landingpad`. As per verifier,
2961 // the only allowed predecessor is the unwind edge of an `invoke`.
2962 // We want to group "compatible" `invokes` into the same set to be merged.
2963 for (BasicBlock *PredBB : predecessors(BB))
2964 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2965
2966 // And now, merge `invoke`s that were grouped togeter.
2967 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2968 if (Invokes.size() < 2)
2969 continue;
2970 Changed = true;
2971 mergeCompatibleInvokesImpl(Invokes, DTU);
2972 }
2973
2974 return Changed;
2975}
2976
2977namespace {
2978/// Track ephemeral values, which should be ignored for cost-modelling
2979/// purposes. Requires walking instructions in reverse order.
2980class EphemeralValueTracker {
2981 SmallPtrSet<const Instruction *, 32> EphValues;
2982
2983 bool isEphemeral(const Instruction *I) {
2984 if (isa<AssumeInst>(I))
2985 return true;
2986 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2987 all_of(I->users(), [&](const User *U) {
2988 return EphValues.count(cast<Instruction>(U));
2989 });
2990 }
2991
2992public:
2993 bool track(const Instruction *I) {
2994 if (isEphemeral(I)) {
2995 EphValues.insert(I);
2996 return true;
2997 }
2998 return false;
2999 }
3000
3001 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3002};
3003} // namespace
3004
3005/// Determine if we can hoist sink a sole store instruction out of a
3006/// conditional block.
3007///
3008/// We are looking for code like the following:
3009/// BrBB:
3010/// store i32 %add, i32* %arrayidx2
3011/// ... // No other stores or function calls (we could be calling a memory
3012/// ... // function).
3013/// %cmp = icmp ult %x, %y
3014/// br i1 %cmp, label %EndBB, label %ThenBB
3015/// ThenBB:
3016/// store i32 %add5, i32* %arrayidx2
3017/// br label EndBB
3018/// EndBB:
3019/// ...
3020/// We are going to transform this into:
3021/// BrBB:
3022/// store i32 %add, i32* %arrayidx2
3023/// ... //
3024/// %cmp = icmp ult %x, %y
3025/// %add.add5 = select i1 %cmp, i32 %add, %add5
3026/// store i32 %add.add5, i32* %arrayidx2
3027/// ...
3028///
3029/// \return The pointer to the value of the previous store if the store can be
3030/// hoisted into the predecessor block. 0 otherwise.
3032 BasicBlock *StoreBB, BasicBlock *EndBB) {
3033 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3034 if (!StoreToHoist)
3035 return nullptr;
3036
3037 // Volatile or atomic.
3038 if (!StoreToHoist->isSimple())
3039 return nullptr;
3040
3041 Value *StorePtr = StoreToHoist->getPointerOperand();
3042 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3043
3044 // Look for a store to the same pointer in BrBB.
3045 unsigned MaxNumInstToLookAt = 9;
3046 // Skip pseudo probe intrinsic calls which are not really killing any memory
3047 // accesses.
3048 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3049 if (!MaxNumInstToLookAt)
3050 break;
3051 --MaxNumInstToLookAt;
3052
3053 // Could be calling an instruction that affects memory like free().
3054 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3055 return nullptr;
3056
3057 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3058 // Found the previous store to same location and type. Make sure it is
3059 // simple, to avoid introducing a spurious non-atomic write after an
3060 // atomic write.
3061 if (SI->getPointerOperand() == StorePtr &&
3062 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3063 SI->getAlign() >= StoreToHoist->getAlign())
3064 // Found the previous store, return its value operand.
3065 return SI->getValueOperand();
3066 return nullptr; // Unknown store.
3067 }
3068
3069 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3070 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3071 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3072 Value *Obj = getUnderlyingObject(StorePtr);
3073 bool ExplicitlyDereferenceableOnly;
3074 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3076 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3078 (!ExplicitlyDereferenceableOnly ||
3079 isDereferenceablePointer(StorePtr, StoreTy,
3080 LI->getDataLayout()))) {
3081 // Found a previous load, return it.
3082 return LI;
3083 }
3084 }
3085 // The load didn't work out, but we may still find a store.
3086 }
3087 }
3088
3089 return nullptr;
3090}
3091
3092/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3093/// converted to selects.
3095 BasicBlock *EndBB,
3096 unsigned &SpeculatedInstructions,
3097 InstructionCost &Cost,
3098 const TargetTransformInfo &TTI) {
3100 BB->getParent()->hasMinSize()
3103
3104 bool HaveRewritablePHIs = false;
3105 for (PHINode &PN : EndBB->phis()) {
3106 Value *OrigV = PN.getIncomingValueForBlock(BB);
3107 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3108
3109 // FIXME: Try to remove some of the duplication with
3110 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3111 if (ThenV == OrigV)
3112 continue;
3113
3114 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3115 CmpInst::makeCmpResultType(PN.getType()),
3117
3118 // Don't convert to selects if we could remove undefined behavior instead.
3119 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3121 return false;
3122
3123 HaveRewritablePHIs = true;
3124 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3125 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3126 if (!OrigCE && !ThenCE)
3127 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3128
3129 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3130 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3131 InstructionCost MaxCost =
3133 if (OrigCost + ThenCost > MaxCost)
3134 return false;
3135
3136 // Account for the cost of an unfolded ConstantExpr which could end up
3137 // getting expanded into Instructions.
3138 // FIXME: This doesn't account for how many operations are combined in the
3139 // constant expression.
3140 ++SpeculatedInstructions;
3141 if (SpeculatedInstructions > 1)
3142 return false;
3143 }
3144
3145 return HaveRewritablePHIs;
3146}
3147
3149 std::optional<bool> Invert,
3150 const TargetTransformInfo &TTI) {
3151 // If the branch is non-unpredictable, and is predicted to *not* branch to
3152 // the `then` block, then avoid speculating it.
3153 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3154 return true;
3155
3156 uint64_t TWeight, FWeight;
3157 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3158 return true;
3159
3160 if (!Invert.has_value())
3161 return false;
3162
3163 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3164 BranchProbability BIEndProb =
3165 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3166 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3167 return BIEndProb < Likely;
3168}
3169
3170/// Speculate a conditional basic block flattening the CFG.
3171///
3172/// Note that this is a very risky transform currently. Speculating
3173/// instructions like this is most often not desirable. Instead, there is an MI
3174/// pass which can do it with full awareness of the resource constraints.
3175/// However, some cases are "obvious" and we should do directly. An example of
3176/// this is speculating a single, reasonably cheap instruction.
3177///
3178/// There is only one distinct advantage to flattening the CFG at the IR level:
3179/// it makes very common but simplistic optimizations such as are common in
3180/// instcombine and the DAG combiner more powerful by removing CFG edges and
3181/// modeling their effects with easier to reason about SSA value graphs.
3182///
3183///
3184/// An illustration of this transform is turning this IR:
3185/// \code
3186/// BB:
3187/// %cmp = icmp ult %x, %y
3188/// br i1 %cmp, label %EndBB, label %ThenBB
3189/// ThenBB:
3190/// %sub = sub %x, %y
3191/// br label BB2
3192/// EndBB:
3193/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3194/// ...
3195/// \endcode
3196///
3197/// Into this IR:
3198/// \code
3199/// BB:
3200/// %cmp = icmp ult %x, %y
3201/// %sub = sub %x, %y
3202/// %cond = select i1 %cmp, 0, %sub
3203/// ...
3204/// \endcode
3205///
3206/// \returns true if the conditional block is removed.
3207bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3208 BasicBlock *ThenBB) {
3209 if (!Options.SpeculateBlocks)
3210 return false;
3211
3212 // Be conservative for now. FP select instruction can often be expensive.
3213 Value *BrCond = BI->getCondition();
3214 if (isa<FCmpInst>(BrCond))
3215 return false;
3216
3217 BasicBlock *BB = BI->getParent();
3218 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3219 InstructionCost Budget =
3221
3222 // If ThenBB is actually on the false edge of the conditional branch, remember
3223 // to swap the select operands later.
3224 bool Invert = false;
3225 if (ThenBB != BI->getSuccessor(0)) {
3226 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3227 Invert = true;
3228 }
3229 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3230
3231 if (!isProfitableToSpeculate(BI, Invert, TTI))
3232 return false;
3233
3234 // Keep a count of how many times instructions are used within ThenBB when
3235 // they are candidates for sinking into ThenBB. Specifically:
3236 // - They are defined in BB, and
3237 // - They have no side effects, and
3238 // - All of their uses are in ThenBB.
3239 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3240
3241 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3242
3243 unsigned SpeculatedInstructions = 0;
3244 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3245 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3246 Value *SpeculatedStoreValue = nullptr;
3247 StoreInst *SpeculatedStore = nullptr;
3248 EphemeralValueTracker EphTracker;
3249 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3250 // Skip pseudo probes. The consequence is we lose track of the branch
3251 // probability for ThenBB, which is fine since the optimization here takes
3252 // place regardless of the branch probability.
3253 if (isa<PseudoProbeInst>(I)) {
3254 // The probe should be deleted so that it will not be over-counted when
3255 // the samples collected on the non-conditional path are counted towards
3256 // the conditional path. We leave it for the counts inference algorithm to
3257 // figure out a proper count for an unknown probe.
3258 SpeculatedPseudoProbes.push_back(&I);
3259 continue;
3260 }
3261
3262 // Ignore ephemeral values, they will be dropped by the transform.
3263 if (EphTracker.track(&I))
3264 continue;
3265
3266 // Only speculatively execute a single instruction (not counting the
3267 // terminator) for now.
3268 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3270 SpeculatedConditionalLoadsStores.size() <
3272 // Not count load/store into cost if target supports conditional faulting
3273 // b/c it's cheap to speculate it.
3274 if (IsSafeCheapLoadStore)
3275 SpeculatedConditionalLoadsStores.push_back(&I);
3276 else
3277 ++SpeculatedInstructions;
3278
3279 if (SpeculatedInstructions > 1)
3280 return false;
3281
3282 // Don't hoist the instruction if it's unsafe or expensive.
3283 if (!IsSafeCheapLoadStore &&
3285 !(HoistCondStores && !SpeculatedStoreValue &&
3286 (SpeculatedStoreValue =
3287 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3288 return false;
3289 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3292 return false;
3293
3294 // Store the store speculation candidate.
3295 if (!SpeculatedStore && SpeculatedStoreValue)
3296 SpeculatedStore = cast<StoreInst>(&I);
3297
3298 // Do not hoist the instruction if any of its operands are defined but not
3299 // used in BB. The transformation will prevent the operand from
3300 // being sunk into the use block.
3301 for (Use &Op : I.operands()) {
3303 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3304 continue; // Not a candidate for sinking.
3305
3306 ++SinkCandidateUseCounts[OpI];
3307 }
3308 }
3309
3310 // Consider any sink candidates which are only used in ThenBB as costs for
3311 // speculation. Note, while we iterate over a DenseMap here, we are summing
3312 // and so iteration order isn't significant.
3313 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3314 if (Inst->hasNUses(Count)) {
3315 ++SpeculatedInstructions;
3316 if (SpeculatedInstructions > 1)
3317 return false;
3318 }
3319
3320 // Check that we can insert the selects and that it's not too expensive to do
3321 // so.
3322 bool Convert =
3323 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3325 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3326 SpeculatedInstructions, Cost, TTI);
3327 if (!Convert || Cost > Budget)
3328 return false;
3329
3330 // If we get here, we can hoist the instruction and if-convert.
3331 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3332
3333 Instruction *Sel = nullptr;
3334 // Insert a select of the value of the speculated store.
3335 if (SpeculatedStoreValue) {
3336 IRBuilder<NoFolder> Builder(BI);
3337 Value *OrigV = SpeculatedStore->getValueOperand();
3338 Value *TrueV = SpeculatedStore->getValueOperand();
3339 Value *FalseV = SpeculatedStoreValue;
3340 if (Invert)
3341 std::swap(TrueV, FalseV);
3342 Value *S = Builder.CreateSelect(
3343 BrCond, TrueV, FalseV, "spec.store.select", BI);
3344 Sel = cast<Instruction>(S);
3345 SpeculatedStore->setOperand(0, S);
3346 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3347 SpeculatedStore->getDebugLoc());
3348 // The value stored is still conditional, but the store itself is now
3349 // unconditonally executed, so we must be sure that any linked dbg.assign
3350 // intrinsics are tracking the new stored value (the result of the
3351 // select). If we don't, and the store were to be removed by another pass
3352 // (e.g. DSE), then we'd eventually end up emitting a location describing
3353 // the conditional value, unconditionally.
3354 //
3355 // === Before this transformation ===
3356 // pred:
3357 // store %one, %x.dest, !DIAssignID !1
3358 // dbg.assign %one, "x", ..., !1, ...
3359 // br %cond if.then
3360 //
3361 // if.then:
3362 // store %two, %x.dest, !DIAssignID !2
3363 // dbg.assign %two, "x", ..., !2, ...
3364 //
3365 // === After this transformation ===
3366 // pred:
3367 // store %one, %x.dest, !DIAssignID !1
3368 // dbg.assign %one, "x", ..., !1
3369 /// ...
3370 // %merge = select %cond, %two, %one
3371 // store %merge, %x.dest, !DIAssignID !2
3372 // dbg.assign %merge, "x", ..., !2
3373 for (DbgVariableRecord *DbgAssign :
3374 at::getDVRAssignmentMarkers(SpeculatedStore))
3375 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3376 DbgAssign->replaceVariableLocationOp(OrigV, S);
3377 }
3378
3379 // Metadata can be dependent on the condition we are hoisting above.
3380 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3381 // to avoid making it appear as if the condition is a constant, which would
3382 // be misleading while debugging.
3383 // Similarly strip attributes that maybe dependent on condition we are
3384 // hoisting above.
3385 for (auto &I : make_early_inc_range(*ThenBB)) {
3386 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3387 I.dropLocation();
3388 }
3389 I.dropUBImplyingAttrsAndMetadata();
3390
3391 // Drop ephemeral values.
3392 if (EphTracker.contains(&I)) {
3393 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3394 I.eraseFromParent();
3395 }
3396 }
3397
3398 // Hoist the instructions.
3399 // Drop DbgVariableRecords attached to these instructions.
3400 for (auto &It : *ThenBB)
3401 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3402 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3403 // equivalent).
3404 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3405 !DVR || !DVR->isDbgAssign())
3406 It.dropOneDbgRecord(&DR);
3407 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3408 std::prev(ThenBB->end()));
3409
3410 if (!SpeculatedConditionalLoadsStores.empty())
3411 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3412 Sel);
3413
3414 // Insert selects and rewrite the PHI operands.
3415 IRBuilder<NoFolder> Builder(BI);
3416 for (PHINode &PN : EndBB->phis()) {
3417 unsigned OrigI = PN.getBasicBlockIndex(BB);
3418 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3419 Value *OrigV = PN.getIncomingValue(OrigI);
3420 Value *ThenV = PN.getIncomingValue(ThenI);
3421
3422 // Skip PHIs which are trivial.
3423 if (OrigV == ThenV)
3424 continue;
3425
3426 // Create a select whose true value is the speculatively executed value and
3427 // false value is the pre-existing value. Swap them if the branch
3428 // destinations were inverted.
3429 Value *TrueV = ThenV, *FalseV = OrigV;
3430 if (Invert)
3431 std::swap(TrueV, FalseV);
3432 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3433 PN.setIncomingValue(OrigI, V);
3434 PN.setIncomingValue(ThenI, V);
3435 }
3436
3437 // Remove speculated pseudo probes.
3438 for (Instruction *I : SpeculatedPseudoProbes)
3439 I->eraseFromParent();
3440
3441 ++NumSpeculations;
3442 return true;
3443}
3444
3446
3447// Return false if number of blocks searched is too much.
3448static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3449 BlocksSet &ReachesNonLocalUses) {
3450 if (BB == DefBB)
3451 return true;
3452 if (!ReachesNonLocalUses.insert(BB).second)
3453 return true;
3454
3455 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3456 return false;
3457 for (BasicBlock *Pred : predecessors(BB))
3458 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3459 return false;
3460 return true;
3461}
3462
3463/// Return true if we can thread a branch across this block.
3465 BlocksSet &NonLocalUseBlocks) {
3466 int Size = 0;
3467 EphemeralValueTracker EphTracker;
3468
3469 // Walk the loop in reverse so that we can identify ephemeral values properly
3470 // (values only feeding assumes).
3471 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3472 // Can't fold blocks that contain noduplicate or convergent calls.
3473 if (CallInst *CI = dyn_cast<CallInst>(&I))
3474 if (CI->cannotDuplicate() || CI->isConvergent())
3475 return false;
3476
3477 // Ignore ephemeral values which are deleted during codegen.
3478 // We will delete Phis while threading, so Phis should not be accounted in
3479 // block's size.
3480 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3481 if (Size++ > MaxSmallBlockSize)
3482 return false; // Don't clone large BB's.
3483 }
3484
3485 // Record blocks with non-local uses of values defined in the current basic
3486 // block.
3487 for (User *U : I.users()) {
3489 BasicBlock *UsedInBB = UI->getParent();
3490 if (UsedInBB == BB) {
3491 if (isa<PHINode>(UI))
3492 return false;
3493 } else
3494 NonLocalUseBlocks.insert(UsedInBB);
3495 }
3496
3497 // Looks ok, continue checking.
3498 }
3499
3500 return true;
3501}
3502
3504 BasicBlock *To) {
3505 // Don't look past the block defining the value, we might get the value from
3506 // a previous loop iteration.
3507 auto *I = dyn_cast<Instruction>(V);
3508 if (I && I->getParent() == To)
3509 return nullptr;
3510
3511 // We know the value if the From block branches on it.
3512 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3513 if (BI && BI->isConditional() && BI->getCondition() == V &&
3514 BI->getSuccessor(0) != BI->getSuccessor(1))
3515 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3517
3518 return nullptr;
3519}
3520
3521/// If we have a conditional branch on something for which we know the constant
3522/// value in predecessors (e.g. a phi node in the current block), thread edges
3523/// from the predecessor to their ultimate destination.
3524static std::optional<bool>
3526 const DataLayout &DL,
3527 AssumptionCache *AC) {
3529 BasicBlock *BB = BI->getParent();
3530 Value *Cond = BI->getCondition();
3532 if (PN && PN->getParent() == BB) {
3533 // Degenerate case of a single entry PHI.
3534 if (PN->getNumIncomingValues() == 1) {
3536 return true;
3537 }
3538
3539 for (Use &U : PN->incoming_values())
3540 if (auto *CB = dyn_cast<ConstantInt>(U))
3541 KnownValues[CB].insert(PN->getIncomingBlock(U));
3542 } else {
3543 for (BasicBlock *Pred : predecessors(BB)) {
3544 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3545 KnownValues[CB].insert(Pred);
3546 }
3547 }
3548
3549 if (KnownValues.empty())
3550 return false;
3551
3552 // Now we know that this block has multiple preds and two succs.
3553 // Check that the block is small enough and record which non-local blocks use
3554 // values defined in the block.
3555
3556 BlocksSet NonLocalUseBlocks;
3557 BlocksSet ReachesNonLocalUseBlocks;
3558 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3559 return false;
3560
3561 // Jump-threading can only be done to destinations where no values defined
3562 // in BB are live.
3563
3564 // Quickly check if both destinations have uses. If so, jump-threading cannot
3565 // be done.
3566 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3567 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3568 return false;
3569
3570 // Search backward from NonLocalUseBlocks to find which blocks
3571 // reach non-local uses.
3572 for (BasicBlock *UseBB : NonLocalUseBlocks)
3573 // Give up if too many blocks are searched.
3574 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3575 return false;
3576
3577 for (const auto &Pair : KnownValues) {
3578 ConstantInt *CB = Pair.first;
3579 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3580 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3581
3582 // Okay, we now know that all edges from PredBB should be revectored to
3583 // branch to RealDest.
3584 if (RealDest == BB)
3585 continue; // Skip self loops.
3586
3587 // Skip if the predecessor's terminator is an indirect branch.
3588 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3589 return isa<IndirectBrInst>(PredBB->getTerminator());
3590 }))
3591 continue;
3592
3593 // Only revector to RealDest if no values defined in BB are live.
3594 if (ReachesNonLocalUseBlocks.contains(RealDest))
3595 continue;
3596
3597 LLVM_DEBUG({
3598 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3599 << " has value " << *Pair.first << " in predecessors:\n";
3600 for (const BasicBlock *PredBB : Pair.second)
3601 dbgs() << " " << PredBB->getName() << "\n";
3602 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3603 });
3604
3605 // Split the predecessors we are threading into a new edge block. We'll
3606 // clone the instructions into this block, and then redirect it to RealDest.
3607 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3608
3609 // TODO: These just exist to reduce test diff, we can drop them if we like.
3610 EdgeBB->setName(RealDest->getName() + ".critedge");
3611 EdgeBB->moveBefore(RealDest);
3612
3613 // Update PHI nodes.
3614 addPredecessorToBlock(RealDest, EdgeBB, BB);
3615
3616 // BB may have instructions that are being threaded over. Clone these
3617 // instructions into EdgeBB. We know that there will be no uses of the
3618 // cloned instructions outside of EdgeBB.
3619 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3620 ValueToValueMapTy TranslateMap; // Track translated values.
3621 TranslateMap[Cond] = CB;
3622
3623 // RemoveDIs: track instructions that we optimise away while folding, so
3624 // that we can copy DbgVariableRecords from them later.
3625 BasicBlock::iterator SrcDbgCursor = BB->begin();
3626 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3627 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3628 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3629 continue;
3630 }
3631 // Clone the instruction.
3632 Instruction *N = BBI->clone();
3633 // Insert the new instruction into its new home.
3634 N->insertInto(EdgeBB, InsertPt);
3635
3636 if (BBI->hasName())
3637 N->setName(BBI->getName() + ".c");
3638
3639 // Update operands due to translation.
3640 // Key Instructions: Remap all the atom groups.
3641 if (const DebugLoc &DL = BBI->getDebugLoc())
3642 mapAtomInstance(DL, TranslateMap);
3643 RemapInstruction(N, TranslateMap,
3645
3646 // Check for trivial simplification.
3647 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3648 if (!BBI->use_empty())
3649 TranslateMap[&*BBI] = V;
3650 if (!N->mayHaveSideEffects()) {
3651 N->eraseFromParent(); // Instruction folded away, don't need actual
3652 // inst
3653 N = nullptr;
3654 }
3655 } else {
3656 if (!BBI->use_empty())
3657 TranslateMap[&*BBI] = N;
3658 }
3659 if (N) {
3660 // Copy all debug-info attached to instructions from the last we
3661 // successfully clone, up to this instruction (they might have been
3662 // folded away).
3663 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3664 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3665 SrcDbgCursor = std::next(BBI);
3666 // Clone debug-info on this instruction too.
3667 N->cloneDebugInfoFrom(&*BBI);
3668
3669 // Register the new instruction with the assumption cache if necessary.
3670 if (auto *Assume = dyn_cast<AssumeInst>(N))
3671 if (AC)
3672 AC->registerAssumption(Assume);
3673 }
3674 }
3675
3676 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3677 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3678 InsertPt->cloneDebugInfoFrom(BI);
3679
3680 BB->removePredecessor(EdgeBB);
3681 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3682 EdgeBI->setSuccessor(0, RealDest);
3683 EdgeBI->setDebugLoc(BI->getDebugLoc());
3684
3685 if (DTU) {
3687 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3688 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3689 DTU->applyUpdates(Updates);
3690 }
3691
3692 // For simplicity, we created a separate basic block for the edge. Merge
3693 // it back into the predecessor if possible. This not only avoids
3694 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3695 // bypass the check for trivial cycles above.
3696 MergeBlockIntoPredecessor(EdgeBB, DTU);
3697
3698 // Signal repeat, simplifying any other constants.
3699 return std::nullopt;
3700 }
3701
3702 return false;
3703}
3704
3705bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3706 // Note: If BB is a loop header then there is a risk that threading introduces
3707 // a non-canonical loop by moving a back edge. So we avoid this optimization
3708 // for loop headers if NeedCanonicalLoop is set.
3709 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3710 return false;
3711
3712 std::optional<bool> Result;
3713 bool EverChanged = false;
3714 do {
3715 // Note that None means "we changed things, but recurse further."
3716 Result =
3718 EverChanged |= Result == std::nullopt || *Result;
3719 } while (Result == std::nullopt);
3720 return EverChanged;
3721}
3722
3723/// Given a BB that starts with the specified two-entry PHI node,
3724/// see if we can eliminate it.
3727 const DataLayout &DL,
3728 bool SpeculateUnpredictables) {
3729 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3730 // statement", which has a very simple dominance structure. Basically, we
3731 // are trying to find the condition that is being branched on, which
3732 // subsequently causes this merge to happen. We really want control
3733 // dependence information for this check, but simplifycfg can't keep it up
3734 // to date, and this catches most of the cases we care about anyway.
3735 BasicBlock *BB = PN->getParent();
3736
3737 BasicBlock *IfTrue, *IfFalse;
3738 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3739 if (!DomBI)
3740 return false;
3741 Value *IfCond = DomBI->getCondition();
3742 // Don't bother if the branch will be constant folded trivially.
3743 if (isa<ConstantInt>(IfCond))
3744 return false;
3745
3746 BasicBlock *DomBlock = DomBI->getParent();
3749 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3750 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3751 });
3752 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3753 "Will have either one or two blocks to speculate.");
3754
3755 // If the branch is non-unpredictable, see if we either predictably jump to
3756 // the merge bb (if we have only a single 'then' block), or if we predictably
3757 // jump to one specific 'then' block (if we have two of them).
3758 // It isn't beneficial to speculatively execute the code
3759 // from the block that we know is predictably not entered.
3760 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3761 if (!IsUnpredictable) {
3762 uint64_t TWeight, FWeight;
3763 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3764 (TWeight + FWeight) != 0) {
3765 BranchProbability BITrueProb =
3766 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3767 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3768 BranchProbability BIFalseProb = BITrueProb.getCompl();
3769 if (IfBlocks.size() == 1) {
3770 BranchProbability BIBBProb =
3771 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3772 if (BIBBProb >= Likely)
3773 return false;
3774 } else {
3775 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3776 return false;
3777 }
3778 }
3779 }
3780
3781 // Don't try to fold an unreachable block. For example, the phi node itself
3782 // can't be the candidate if-condition for a select that we want to form.
3783 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3784 if (IfCondPhiInst->getParent() == BB)
3785 return false;
3786
3787 // Okay, we found that we can merge this two-entry phi node into a select.
3788 // Doing so would require us to fold *all* two entry phi nodes in this block.
3789 // At some point this becomes non-profitable (particularly if the target
3790 // doesn't support cmov's). Only do this transformation if there are two or
3791 // fewer PHI nodes in this block.
3792 unsigned NumPhis = 0;
3793 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3794 if (NumPhis > 2)
3795 return false;
3796
3797 // Loop over the PHI's seeing if we can promote them all to select
3798 // instructions. While we are at it, keep track of the instructions
3799 // that need to be moved to the dominating block.
3800 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3801 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3802 InstructionCost Cost = 0;
3803 InstructionCost Budget =
3805 if (SpeculateUnpredictables && IsUnpredictable)
3806 Budget += TTI.getBranchMispredictPenalty();
3807
3808 bool Changed = false;
3809 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3810 PHINode *PN = cast<PHINode>(II++);
3811 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3812 PN->replaceAllUsesWith(V);
3813 PN->eraseFromParent();
3814 Changed = true;
3815 continue;
3816 }
3817
3818 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3819 AggressiveInsts, Cost, Budget, TTI, AC,
3820 ZeroCostInstructions) ||
3821 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3822 AggressiveInsts, Cost, Budget, TTI, AC,
3823 ZeroCostInstructions))
3824 return Changed;
3825 }
3826
3827 // If we folded the first phi, PN dangles at this point. Refresh it. If
3828 // we ran out of PHIs then we simplified them all.
3829 PN = dyn_cast<PHINode>(BB->begin());
3830 if (!PN)
3831 return true;
3832
3833 // Return true if at least one of these is a 'not', and another is either
3834 // a 'not' too, or a constant.
3835 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3836 if (!match(V0, m_Not(m_Value())))
3837 std::swap(V0, V1);
3838 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3839 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3840 };
3841
3842 // Don't fold i1 branches on PHIs which contain binary operators or
3843 // (possibly inverted) select form of or/ands, unless one of
3844 // the incoming values is an 'not' and another one is freely invertible.
3845 // These can often be turned into switches and other things.
3846 auto IsBinOpOrAnd = [](Value *V) {
3847 return match(
3849 };
3850 if (PN->getType()->isIntegerTy(1) &&
3851 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3852 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3853 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3854 PN->getIncomingValue(1)))
3855 return Changed;
3856
3857 // If all PHI nodes are promotable, check to make sure that all instructions
3858 // in the predecessor blocks can be promoted as well. If not, we won't be able
3859 // to get rid of the control flow, so it's not worth promoting to select
3860 // instructions.
3861 for (BasicBlock *IfBlock : IfBlocks)
3862 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3863 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3864 // This is not an aggressive instruction that we can promote.
3865 // Because of this, we won't be able to get rid of the control flow, so
3866 // the xform is not worth it.
3867 return Changed;
3868 }
3869
3870 // If either of the blocks has it's address taken, we can't do this fold.
3871 if (any_of(IfBlocks,
3872 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3873 return Changed;
3874
3875 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3876 if (IsUnpredictable) dbgs() << " (unpredictable)";
3877 dbgs() << " T: " << IfTrue->getName()
3878 << " F: " << IfFalse->getName() << "\n");
3879
3880 // If we can still promote the PHI nodes after this gauntlet of tests,
3881 // do all of the PHI's now.
3882
3883 // Move all 'aggressive' instructions, which are defined in the
3884 // conditional parts of the if's up to the dominating block.
3885 for (BasicBlock *IfBlock : IfBlocks)
3886 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3887
3888 IRBuilder<NoFolder> Builder(DomBI);
3889 // Propagate fast-math-flags from phi nodes to replacement selects.
3890 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3891 // Change the PHI node into a select instruction.
3892 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3893 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3894
3895 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3896 isa<FPMathOperator>(PN) ? PN : nullptr,
3897 "", DomBI);
3898 PN->replaceAllUsesWith(Sel);
3899 Sel->takeName(PN);
3900 PN->eraseFromParent();
3901 }
3902
3903 // At this point, all IfBlocks are empty, so our if statement
3904 // has been flattened. Change DomBlock to jump directly to our new block to
3905 // avoid other simplifycfg's kicking in on the diamond.
3906 Builder.CreateBr(BB);
3907
3909 if (DTU) {
3910 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3911 for (auto *Successor : successors(DomBlock))
3912 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3913 }
3914
3915 DomBI->eraseFromParent();
3916 if (DTU)
3917 DTU->applyUpdates(Updates);
3918
3919 return true;
3920}
3921
3924 Value *RHS, const Twine &Name = "") {
3925 // Try to relax logical op to binary op.
3926 if (impliesPoison(RHS, LHS))
3927 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3928 if (Opc == Instruction::And)
3929 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3930 if (Opc == Instruction::Or)
3931 return Builder.CreateLogicalOr(LHS, RHS, Name);
3932 llvm_unreachable("Invalid logical opcode");
3933}
3934
3935/// Return true if either PBI or BI has branch weight available, and store
3936/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3937/// not have branch weight, use 1:1 as its weight.
3939 uint64_t &PredTrueWeight,
3940 uint64_t &PredFalseWeight,
3941 uint64_t &SuccTrueWeight,
3942 uint64_t &SuccFalseWeight) {
3943 bool PredHasWeights =
3944 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3945 bool SuccHasWeights =
3946 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3947 if (PredHasWeights || SuccHasWeights) {
3948 if (!PredHasWeights)
3949 PredTrueWeight = PredFalseWeight = 1;
3950 if (!SuccHasWeights)
3951 SuccTrueWeight = SuccFalseWeight = 1;
3952 return true;
3953 } else {
3954 return false;
3955 }
3956}
3957
3958/// Determine if the two branches share a common destination and deduce a glue
3959/// that joins the branches' conditions to arrive at the common destination if
3960/// that would be profitable.
3961static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3963 const TargetTransformInfo *TTI) {
3964 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3965 "Both blocks must end with a conditional branches.");
3967 "PredBB must be a predecessor of BB.");
3968
3969 // We have the potential to fold the conditions together, but if the
3970 // predecessor branch is predictable, we may not want to merge them.
3971 uint64_t PTWeight, PFWeight;
3972 BranchProbability PBITrueProb, Likely;
3973 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3974 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3975 (PTWeight + PFWeight) != 0) {
3976 PBITrueProb =
3977 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3978 Likely = TTI->getPredictableBranchThreshold();
3979 }
3980
3981 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3982 // Speculate the 2nd condition unless the 1st is probably true.
3983 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3984 return {{BI->getSuccessor(0), Instruction::Or, false}};
3985 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3986 // Speculate the 2nd condition unless the 1st is probably false.
3987 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3988 return {{BI->getSuccessor(1), Instruction::And, false}};
3989 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3990 // Speculate the 2nd condition unless the 1st is probably true.
3991 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3992 return {{BI->getSuccessor(1), Instruction::And, true}};
3993 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3994 // Speculate the 2nd condition unless the 1st is probably false.
3995 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3996 return {{BI->getSuccessor(0), Instruction::Or, true}};
3997 }
3998 return std::nullopt;
3999}
4000
4002 DomTreeUpdater *DTU,
4003 MemorySSAUpdater *MSSAU,
4004 const TargetTransformInfo *TTI) {
4005 BasicBlock *BB = BI->getParent();
4006 BasicBlock *PredBlock = PBI->getParent();
4007
4008 // Determine if the two branches share a common destination.
4009 BasicBlock *CommonSucc;
4011 bool InvertPredCond;
4012 std::tie(CommonSucc, Opc, InvertPredCond) =
4014
4015 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4016
4017 IRBuilder<> Builder(PBI);
4018 // The builder is used to create instructions to eliminate the branch in BB.
4019 // If BB's terminator has !annotation metadata, add it to the new
4020 // instructions.
4021 Builder.CollectMetadataToCopy(BB->getTerminator(),
4022 {LLVMContext::MD_annotation});
4023
4024 // If we need to invert the condition in the pred block to match, do so now.
4025 if (InvertPredCond) {
4026 InvertBranch(PBI, Builder);
4027 }
4028
4029 BasicBlock *UniqueSucc =
4030 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4031
4032 // Before cloning instructions, notify the successor basic block that it
4033 // is about to have a new predecessor. This will update PHI nodes,
4034 // which will allow us to update live-out uses of bonus instructions.
4035 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4036
4037 // Try to update branch weights.
4038 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4039 SmallVector<uint64_t, 2> MDWeights;
4040 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4041 SuccTrueWeight, SuccFalseWeight)) {
4042
4043 if (PBI->getSuccessor(0) == BB) {
4044 // PBI: br i1 %x, BB, FalseDest
4045 // BI: br i1 %y, UniqueSucc, FalseDest
4046 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4047 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4048 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4049 // TrueWeight for PBI * FalseWeight for BI.
4050 // We assume that total weights of a BranchInst can fit into 32 bits.
4051 // Therefore, we will not have overflow using 64-bit arithmetic.
4052 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4053 PredTrueWeight * SuccFalseWeight);
4054 } else {
4055 // PBI: br i1 %x, TrueDest, BB
4056 // BI: br i1 %y, TrueDest, UniqueSucc
4057 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4058 // FalseWeight for PBI * TrueWeight for BI.
4059 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4060 PredFalseWeight * SuccTrueWeight);
4061 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4062 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4063 }
4064
4065 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4066 /*ElideAllZero=*/true);
4067
4068 // TODO: If BB is reachable from all paths through PredBlock, then we
4069 // could replace PBI's branch probabilities with BI's.
4070 } else
4071 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4072
4073 // Now, update the CFG.
4074 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4075
4076 if (DTU)
4077 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4078 {DominatorTree::Delete, PredBlock, BB}});
4079
4080 // If BI was a loop latch, it may have had associated loop metadata.
4081 // We need to copy it to the new latch, that is, PBI.
4082 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4083 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4084
4085 ValueToValueMapTy VMap; // maps original values to cloned values
4087
4088 Module *M = BB->getModule();
4089
4090 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4091 for (DbgVariableRecord &DVR :
4093 RemapDbgRecord(M, &DVR, VMap,
4095 }
4096
4097 // Now that the Cond was cloned into the predecessor basic block,
4098 // or/and the two conditions together.
4099 Value *BICond = VMap[BI->getCondition()];
4100 PBI->setCondition(
4101 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4103 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4104 if (!MDWeights.empty()) {
4105 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4106 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4107 /*IsExpected=*/false, /*ElideAllZero=*/true);
4108 }
4109
4110 ++NumFoldBranchToCommonDest;
4111 return true;
4112}
4113
4114/// Return if an instruction's type or any of its operands' types are a vector
4115/// type.
4116static bool isVectorOp(Instruction &I) {
4117 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4118 return U->getType()->isVectorTy();
4119 });
4120}
4121
4122/// If this basic block is simple enough, and if a predecessor branches to us
4123/// and one of our successors, fold the block into the predecessor and use
4124/// logical operations to pick the right destination.
4126 MemorySSAUpdater *MSSAU,
4127 const TargetTransformInfo *TTI,
4128 unsigned BonusInstThreshold) {
4129 // If this block ends with an unconditional branch,
4130 // let speculativelyExecuteBB() deal with it.
4131 if (!BI->isConditional())
4132 return false;
4133
4134 BasicBlock *BB = BI->getParent();
4138
4140
4142 Cond->getParent() != BB || !Cond->hasOneUse())
4143 return false;
4144
4145 // Finally, don't infinitely unroll conditional loops.
4146 if (is_contained(successors(BB), BB))
4147 return false;
4148
4149 // With which predecessors will we want to deal with?
4151 for (BasicBlock *PredBlock : predecessors(BB)) {
4152 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4153
4154 // Check that we have two conditional branches. If there is a PHI node in
4155 // the common successor, verify that the same value flows in from both
4156 // blocks.
4157 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4158 continue;
4159
4160 // Determine if the two branches share a common destination.
4161 BasicBlock *CommonSucc;
4163 bool InvertPredCond;
4164 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4165 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4166 else
4167 continue;
4168
4169 // Check the cost of inserting the necessary logic before performing the
4170 // transformation.
4171 if (TTI) {
4172 Type *Ty = BI->getCondition()->getType();
4173 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4174 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4175 !isa<CmpInst>(PBI->getCondition())))
4176 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4177
4179 continue;
4180 }
4181
4182 // Ok, we do want to deal with this predecessor. Record it.
4183 Preds.emplace_back(PredBlock);
4184 }
4185
4186 // If there aren't any predecessors into which we can fold,
4187 // don't bother checking the cost.
4188 if (Preds.empty())
4189 return false;
4190
4191 // Only allow this transformation if computing the condition doesn't involve
4192 // too many instructions and these involved instructions can be executed
4193 // unconditionally. We denote all involved instructions except the condition
4194 // as "bonus instructions", and only allow this transformation when the
4195 // number of the bonus instructions we'll need to create when cloning into
4196 // each predecessor does not exceed a certain threshold.
4197 unsigned NumBonusInsts = 0;
4198 bool SawVectorOp = false;
4199 const unsigned PredCount = Preds.size();
4200 for (Instruction &I : *BB) {
4201 // Don't check the branch condition comparison itself.
4202 if (&I == Cond)
4203 continue;
4204 // Ignore the terminator.
4205 if (isa<BranchInst>(I))
4206 continue;
4207 // I must be safe to execute unconditionally.
4209 return false;
4210 SawVectorOp |= isVectorOp(I);
4211
4212 // Account for the cost of duplicating this instruction into each
4213 // predecessor. Ignore free instructions.
4214 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4216 NumBonusInsts += PredCount;
4217
4218 // Early exits once we reach the limit.
4219 if (NumBonusInsts >
4220 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4221 return false;
4222 }
4223
4224 auto IsBCSSAUse = [BB, &I](Use &U) {
4225 auto *UI = cast<Instruction>(U.getUser());
4226 if (auto *PN = dyn_cast<PHINode>(UI))
4227 return PN->getIncomingBlock(U) == BB;
4228 return UI->getParent() == BB && I.comesBefore(UI);
4229 };
4230
4231 // Does this instruction require rewriting of uses?
4232 if (!all_of(I.uses(), IsBCSSAUse))
4233 return false;
4234 }
4235 if (NumBonusInsts >
4236 BonusInstThreshold *
4237 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4238 return false;
4239
4240 // Ok, we have the budget. Perform the transformation.
4241 for (BasicBlock *PredBlock : Preds) {
4242 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4243 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4244 }
4245 return false;
4246}
4247
4248// If there is only one store in BB1 and BB2, return it, otherwise return
4249// nullptr.
4251 StoreInst *S = nullptr;
4252 for (auto *BB : {BB1, BB2}) {
4253 if (!BB)
4254 continue;
4255 for (auto &I : *BB)
4256 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4257 if (S)
4258 // Multiple stores seen.
4259 return nullptr;
4260 else
4261 S = SI;
4262 }
4263 }
4264 return S;
4265}
4266
4268 Value *AlternativeV = nullptr) {
4269 // PHI is going to be a PHI node that allows the value V that is defined in
4270 // BB to be referenced in BB's only successor.
4271 //
4272 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4273 // doesn't matter to us what the other operand is (it'll never get used). We
4274 // could just create a new PHI with an undef incoming value, but that could
4275 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4276 // other PHI. So here we directly look for some PHI in BB's successor with V
4277 // as an incoming operand. If we find one, we use it, else we create a new
4278 // one.
4279 //
4280 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4281 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4282 // where OtherBB is the single other predecessor of BB's only successor.
4283 PHINode *PHI = nullptr;
4284 BasicBlock *Succ = BB->getSingleSuccessor();
4285
4286 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4287 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4288 PHI = cast<PHINode>(I);
4289 if (!AlternativeV)
4290 break;
4291
4292 assert(Succ->hasNPredecessors(2));
4293 auto PredI = pred_begin(Succ);
4294 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4295 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4296 break;
4297 PHI = nullptr;
4298 }
4299 if (PHI)
4300 return PHI;
4301
4302 // If V is not an instruction defined in BB, just return it.
4303 if (!AlternativeV &&
4304 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4305 return V;
4306
4307 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4308 PHI->insertBefore(Succ->begin());
4309 PHI->addIncoming(V, BB);
4310 for (BasicBlock *PredBB : predecessors(Succ))
4311 if (PredBB != BB)
4312 PHI->addIncoming(
4313 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4314 return PHI;
4315}
4316
4318 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4319 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4320 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4321 // For every pointer, there must be exactly two stores, one coming from
4322 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4323 // store (to any address) in PTB,PFB or QTB,QFB.
4324 // FIXME: We could relax this restriction with a bit more work and performance
4325 // testing.
4326 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4327 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4328 if (!PStore || !QStore)
4329 return false;
4330
4331 // Now check the stores are compatible.
4332 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4333 PStore->getValueOperand()->getType() !=
4334 QStore->getValueOperand()->getType())
4335 return false;
4336
4337 // Check that sinking the store won't cause program behavior changes. Sinking
4338 // the store out of the Q blocks won't change any behavior as we're sinking
4339 // from a block to its unconditional successor. But we're moving a store from
4340 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4341 // So we need to check that there are no aliasing loads or stores in
4342 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4343 // operations between PStore and the end of its parent block.
4344 //
4345 // The ideal way to do this is to query AliasAnalysis, but we don't
4346 // preserve AA currently so that is dangerous. Be super safe and just
4347 // check there are no other memory operations at all.
4348 for (auto &I : *QFB->getSinglePredecessor())
4349 if (I.mayReadOrWriteMemory())
4350 return false;
4351 for (auto &I : *QFB)
4352 if (&I != QStore && I.mayReadOrWriteMemory())
4353 return false;
4354 if (QTB)
4355 for (auto &I : *QTB)
4356 if (&I != QStore && I.mayReadOrWriteMemory())
4357 return false;
4358 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4359 I != E; ++I)
4360 if (&*I != PStore && I->mayReadOrWriteMemory())
4361 return false;
4362
4363 // If we're not in aggressive mode, we only optimize if we have some
4364 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4365 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4366 if (!BB)
4367 return true;
4368 // Heuristic: if the block can be if-converted/phi-folded and the
4369 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4370 // thread this store.
4371 InstructionCost Cost = 0;
4372 InstructionCost Budget =
4374 for (auto &I : BB->instructionsWithoutDebug(false)) {
4375 // Consider terminator instruction to be free.
4376 if (I.isTerminator())
4377 continue;
4378 // If this is one the stores that we want to speculate out of this BB,
4379 // then don't count it's cost, consider it to be free.
4380 if (auto *S = dyn_cast<StoreInst>(&I))
4381 if (llvm::find(FreeStores, S))
4382 continue;
4383 // Else, we have a white-list of instructions that we are ak speculating.
4385 return false; // Not in white-list - not worthwhile folding.
4386 // And finally, if this is a non-free instruction that we are okay
4387 // speculating, ensure that we consider the speculation budget.
4388 Cost +=
4389 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4390 if (Cost > Budget)
4391 return false; // Eagerly refuse to fold as soon as we're out of budget.
4392 }
4393 assert(Cost <= Budget &&
4394 "When we run out of budget we will eagerly return from within the "
4395 "per-instruction loop.");
4396 return true;
4397 };
4398
4399 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4401 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4402 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4403 return false;
4404
4405 // If PostBB has more than two predecessors, we need to split it so we can
4406 // sink the store.
4407 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4408 // We know that QFB's only successor is PostBB. And QFB has a single
4409 // predecessor. If QTB exists, then its only successor is also PostBB.
4410 // If QTB does not exist, then QFB's only predecessor has a conditional
4411 // branch to QFB and PostBB.
4412 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4413 BasicBlock *NewBB =
4414 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4415 if (!NewBB)
4416 return false;
4417 PostBB = NewBB;
4418 }
4419
4420 // OK, we're going to sink the stores to PostBB. The store has to be
4421 // conditional though, so first create the predicate.
4422 BranchInst *PBranch =
4424 BranchInst *QBranch =
4426 Value *PCond = PBranch->getCondition();
4427 Value *QCond = QBranch->getCondition();
4428
4430 PStore->getParent());
4432 QStore->getParent(), PPHI);
4433
4434 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4435 IRBuilder<> QB(PostBB, PostBBFirst);
4436 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4437
4438 InvertPCond ^= (PStore->getParent() != PTB);
4439 InvertQCond ^= (QStore->getParent() != QTB);
4440 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4441 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4442
4443 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4444
4445 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4446 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4447 /*Unreachable=*/false,
4448 /*BranchWeights=*/nullptr, DTU);
4449 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4451 SmallVector<uint32_t, 2> PWeights, QWeights;
4452 extractBranchWeights(*PBranch, PWeights);
4453 extractBranchWeights(*QBranch, QWeights);
4454 if (InvertPCond)
4455 std::swap(PWeights[0], PWeights[1]);
4456 if (InvertQCond)
4457 std::swap(QWeights[0], QWeights[1]);
4458 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4460 {CombinedWeights[0], CombinedWeights[1]},
4461 /*IsExpected=*/false, /*ElideAllZero=*/true);
4462 }
4463
4464 QB.SetInsertPoint(T);
4465 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4466 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4467 // Choose the minimum alignment. If we could prove both stores execute, we
4468 // could use biggest one. In this case, though, we only know that one of the
4469 // stores executes. And we don't know it's safe to take the alignment from a
4470 // store that doesn't execute.
4471 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4472
4473 QStore->eraseFromParent();
4474 PStore->eraseFromParent();
4475
4476 return true;
4477}
4478
4480 DomTreeUpdater *DTU, const DataLayout &DL,
4481 const TargetTransformInfo &TTI) {
4482 // The intention here is to find diamonds or triangles (see below) where each
4483 // conditional block contains a store to the same address. Both of these
4484 // stores are conditional, so they can't be unconditionally sunk. But it may
4485 // be profitable to speculatively sink the stores into one merged store at the
4486 // end, and predicate the merged store on the union of the two conditions of
4487 // PBI and QBI.
4488 //
4489 // This can reduce the number of stores executed if both of the conditions are
4490 // true, and can allow the blocks to become small enough to be if-converted.
4491 // This optimization will also chain, so that ladders of test-and-set
4492 // sequences can be if-converted away.
4493 //
4494 // We only deal with simple diamonds or triangles:
4495 //
4496 // PBI or PBI or a combination of the two
4497 // / \ | \
4498 // PTB PFB | PFB
4499 // \ / | /
4500 // QBI QBI
4501 // / \ | \
4502 // QTB QFB | QFB
4503 // \ / | /
4504 // PostBB PostBB
4505 //
4506 // We model triangles as a type of diamond with a nullptr "true" block.
4507 // Triangles are canonicalized so that the fallthrough edge is represented by
4508 // a true condition, as in the diagram above.
4509 BasicBlock *PTB = PBI->getSuccessor(0);
4510 BasicBlock *PFB = PBI->getSuccessor(1);
4511 BasicBlock *QTB = QBI->getSuccessor(0);
4512 BasicBlock *QFB = QBI->getSuccessor(1);
4513 BasicBlock *PostBB = QFB->getSingleSuccessor();
4514
4515 // Make sure we have a good guess for PostBB. If QTB's only successor is
4516 // QFB, then QFB is a better PostBB.
4517 if (QTB->getSingleSuccessor() == QFB)
4518 PostBB = QFB;
4519
4520 // If we couldn't find a good PostBB, stop.
4521 if (!PostBB)
4522 return false;
4523
4524 bool InvertPCond = false, InvertQCond = false;
4525 // Canonicalize fallthroughs to the true branches.
4526 if (PFB == QBI->getParent()) {
4527 std::swap(PFB, PTB);
4528 InvertPCond = true;
4529 }
4530 if (QFB == PostBB) {
4531 std::swap(QFB, QTB);
4532 InvertQCond = true;
4533 }
4534
4535 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4536 // and QFB may not. Model fallthroughs as a nullptr block.
4537 if (PTB == QBI->getParent())
4538 PTB = nullptr;
4539 if (QTB == PostBB)
4540 QTB = nullptr;
4541
4542 // Legality bailouts. We must have at least the non-fallthrough blocks and
4543 // the post-dominating block, and the non-fallthroughs must only have one
4544 // predecessor.
4545 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4546 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4547 };
4548 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4549 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4550 return false;
4551 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4552 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4553 return false;
4554 if (!QBI->getParent()->hasNUses(2))
4555 return false;
4556
4557 // OK, this is a sequence of two diamonds or triangles.
4558 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4559 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4560 for (auto *BB : {PTB, PFB}) {
4561 if (!BB)
4562 continue;
4563 for (auto &I : *BB)
4565 PStoreAddresses.insert(SI->getPointerOperand());
4566 }
4567 for (auto *BB : {QTB, QFB}) {
4568 if (!BB)
4569 continue;
4570 for (auto &I : *BB)
4572 QStoreAddresses.insert(SI->getPointerOperand());
4573 }
4574
4575 set_intersect(PStoreAddresses, QStoreAddresses);
4576 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4577 // clear what it contains.
4578 auto &CommonAddresses = PStoreAddresses;
4579
4580 bool Changed = false;
4581 for (auto *Address : CommonAddresses)
4582 Changed |=
4583 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4584 InvertPCond, InvertQCond, DTU, DL, TTI);
4585 return Changed;
4586}
4587
4588/// If the previous block ended with a widenable branch, determine if reusing
4589/// the target block is profitable and legal. This will have the effect of
4590/// "widening" PBI, but doesn't require us to reason about hosting safety.
4592 DomTreeUpdater *DTU) {
4593 // TODO: This can be generalized in two important ways:
4594 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4595 // values from the PBI edge.
4596 // 2) We can sink side effecting instructions into BI's fallthrough
4597 // successor provided they doesn't contribute to computation of
4598 // BI's condition.
4599 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4600 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4601 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4602 !BI->getParent()->getSinglePredecessor())
4603 return false;
4604 if (!IfFalseBB->phis().empty())
4605 return false; // TODO
4606 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4607 // may undo the transform done here.
4608 // TODO: There might be a more fine-grained solution to this.
4609 if (!llvm::succ_empty(IfFalseBB))
4610 return false;
4611 // Use lambda to lazily compute expensive condition after cheap ones.
4612 auto NoSideEffects = [](BasicBlock &BB) {
4613 return llvm::none_of(BB, [](const Instruction &I) {
4614 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4615 });
4616 };
4617 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4618 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4619 NoSideEffects(*BI->getParent())) {
4620 auto *OldSuccessor = BI->getSuccessor(1);
4621 OldSuccessor->removePredecessor(BI->getParent());
4622 BI->setSuccessor(1, IfFalseBB);
4623 if (DTU)
4624 DTU->applyUpdates(
4625 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4626 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4627 return true;
4628 }
4629 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4630 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4631 NoSideEffects(*BI->getParent())) {
4632 auto *OldSuccessor = BI->getSuccessor(0);
4633 OldSuccessor->removePredecessor(BI->getParent());
4634 BI->setSuccessor(0, IfFalseBB);
4635 if (DTU)
4636 DTU->applyUpdates(
4637 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4638 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4639 return true;
4640 }
4641 return false;
4642}
4643
4644/// If we have a conditional branch as a predecessor of another block,
4645/// this function tries to simplify it. We know
4646/// that PBI and BI are both conditional branches, and BI is in one of the
4647/// successor blocks of PBI - PBI branches to BI.
4649 DomTreeUpdater *DTU,
4650 const DataLayout &DL,
4651 const TargetTransformInfo &TTI) {
4652 assert(PBI->isConditional() && BI->isConditional());
4653 BasicBlock *BB = BI->getParent();
4654
4655 // If this block ends with a branch instruction, and if there is a
4656 // predecessor that ends on a branch of the same condition, make
4657 // this conditional branch redundant.
4658 if (PBI->getCondition() == BI->getCondition() &&
4659 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4660 // Okay, the outcome of this conditional branch is statically
4661 // knowable. If this block had a single pred, handle specially, otherwise
4662 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4663 if (BB->getSinglePredecessor()) {
4664 // Turn this into a branch on constant.
4665 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4666 BI->setCondition(
4667 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4668 return true; // Nuke the branch on constant.
4669 }
4670 }
4671
4672 // If the previous block ended with a widenable branch, determine if reusing
4673 // the target block is profitable and legal. This will have the effect of
4674 // "widening" PBI, but doesn't require us to reason about hosting safety.
4675 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4676 return true;
4677
4678 // If both branches are conditional and both contain stores to the same
4679 // address, remove the stores from the conditionals and create a conditional
4680 // merged store at the end.
4681 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4682 return true;
4683
4684 // If this is a conditional branch in an empty block, and if any
4685 // predecessors are a conditional branch to one of our destinations,
4686 // fold the conditions into logical ops and one cond br.
4687
4688 // Ignore dbg intrinsics.
4689 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4690 return false;
4691
4692 int PBIOp, BIOp;
4693 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4694 PBIOp = 0;
4695 BIOp = 0;
4696 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4697 PBIOp = 0;
4698 BIOp = 1;
4699 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4700 PBIOp = 1;
4701 BIOp = 0;
4702 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4703 PBIOp = 1;
4704 BIOp = 1;
4705 } else {
4706 return false;
4707 }
4708
4709 // Check to make sure that the other destination of this branch
4710 // isn't BB itself. If so, this is an infinite loop that will
4711 // keep getting unwound.
4712 if (PBI->getSuccessor(PBIOp) == BB)
4713 return false;
4714
4715 // If predecessor's branch probability to BB is too low don't merge branches.
4716 SmallVector<uint32_t, 2> PredWeights;
4717 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4718 extractBranchWeights(*PBI, PredWeights) &&
4719 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4720
4722 PredWeights[PBIOp],
4723 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4724
4725 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4726 if (CommonDestProb >= Likely)
4727 return false;
4728 }
4729
4730 // Do not perform this transformation if it would require
4731 // insertion of a large number of select instructions. For targets
4732 // without predication/cmovs, this is a big pessimization.
4733
4734 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4735 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4736 unsigned NumPhis = 0;
4737 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4738 ++II, ++NumPhis) {
4739 if (NumPhis > 2) // Disable this xform.
4740 return false;
4741 }
4742
4743 // Finally, if everything is ok, fold the branches to logical ops.
4744 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4745
4746 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4747 << "AND: " << *BI->getParent());
4748
4750
4751 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4752 // branch in it, where one edge (OtherDest) goes back to itself but the other
4753 // exits. We don't *know* that the program avoids the infinite loop
4754 // (even though that seems likely). If we do this xform naively, we'll end up
4755 // recursively unpeeling the loop. Since we know that (after the xform is
4756 // done) that the block *is* infinite if reached, we just make it an obviously
4757 // infinite loop with no cond branch.
4758 if (OtherDest == BB) {
4759 // Insert it at the end of the function, because it's either code,
4760 // or it won't matter if it's hot. :)
4761 BasicBlock *InfLoopBlock =
4762 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4763 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4764 if (DTU)
4765 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4766 OtherDest = InfLoopBlock;
4767 }
4768
4769 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4770
4771 // BI may have other predecessors. Because of this, we leave
4772 // it alone, but modify PBI.
4773
4774 // Make sure we get to CommonDest on True&True directions.
4775 Value *PBICond = PBI->getCondition();
4776 IRBuilder<NoFolder> Builder(PBI);
4777 if (PBIOp)
4778 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4779
4780 Value *BICond = BI->getCondition();
4781 if (BIOp)
4782 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4783
4784 // Merge the conditions.
4785 Value *Cond =
4786 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4787
4788 // Modify PBI to branch on the new condition to the new dests.
4789 PBI->setCondition(Cond);
4790 PBI->setSuccessor(0, CommonDest);
4791 PBI->setSuccessor(1, OtherDest);
4792
4793 if (DTU) {
4794 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4795 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4796
4797 DTU->applyUpdates(Updates);
4798 }
4799
4800 // Update branch weight for PBI.
4801 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4802 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4803 bool HasWeights =
4804 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4805 SuccTrueWeight, SuccFalseWeight);
4806 if (HasWeights) {
4807 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4808 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4809 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4810 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4811 // The weight to CommonDest should be PredCommon * SuccTotal +
4812 // PredOther * SuccCommon.
4813 // The weight to OtherDest should be PredOther * SuccOther.
4814 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4815 PredOther * SuccCommon,
4816 PredOther * SuccOther};
4817
4818 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4819 /*ElideAllZero=*/true);
4820 // Cond may be a select instruction with the first operand set to "true", or
4821 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4823 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4824 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4825 // The select is predicated on PBICond
4827 // The corresponding probabilities are what was referred to above as
4828 // PredCommon and PredOther.
4829 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4830 /*IsExpected=*/false, /*ElideAllZero=*/true);
4831 }
4832 }
4833
4834 // OtherDest may have phi nodes. If so, add an entry from PBI's
4835 // block that are identical to the entries for BI's block.
4836 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4837
4838 // We know that the CommonDest already had an edge from PBI to
4839 // it. If it has PHIs though, the PHIs may have different
4840 // entries for BB and PBI's BB. If so, insert a select to make
4841 // them agree.
4842 for (PHINode &PN : CommonDest->phis()) {
4843 Value *BIV = PN.getIncomingValueForBlock(BB);
4844 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4845 Value *PBIV = PN.getIncomingValue(PBBIdx);
4846 if (BIV != PBIV) {
4847 // Insert a select in PBI to pick the right value.
4849 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4850 PN.setIncomingValue(PBBIdx, NV);
4851 // The select has the same condition as PBI, in the same BB. The
4852 // probabilities don't change.
4853 if (HasWeights) {
4854 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4855 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4856 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4857 /*IsExpected=*/false, /*ElideAllZero=*/true);
4858 }
4859 }
4860 }
4861
4862 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4863 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4864
4865 // This basic block is probably dead. We know it has at least
4866 // one fewer predecessor.
4867 return true;
4868}
4869
4870// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4871// true or to FalseBB if Cond is false.
4872// Takes care of updating the successors and removing the old terminator.
4873// Also makes sure not to introduce new successors by assuming that edges to
4874// non-successor TrueBBs and FalseBBs aren't reachable.
4875bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4876 Value *Cond, BasicBlock *TrueBB,
4877 BasicBlock *FalseBB,
4878 uint32_t TrueWeight,
4879 uint32_t FalseWeight) {
4880 auto *BB = OldTerm->getParent();
4881 // Remove any superfluous successor edges from the CFG.
4882 // First, figure out which successors to preserve.
4883 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4884 // successor.
4885 BasicBlock *KeepEdge1 = TrueBB;
4886 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4887
4888 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4889
4890 // Then remove the rest.
4891 for (BasicBlock *Succ : successors(OldTerm)) {
4892 // Make sure only to keep exactly one copy of each edge.
4893 if (Succ == KeepEdge1)
4894 KeepEdge1 = nullptr;
4895 else if (Succ == KeepEdge2)
4896 KeepEdge2 = nullptr;
4897 else {
4898 Succ->removePredecessor(BB,
4899 /*KeepOneInputPHIs=*/true);
4900
4901 if (Succ != TrueBB && Succ != FalseBB)
4902 RemovedSuccessors.insert(Succ);
4903 }
4904 }
4905
4906 IRBuilder<> Builder(OldTerm);
4907 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4908
4909 // Insert an appropriate new terminator.
4910 if (!KeepEdge1 && !KeepEdge2) {
4911 if (TrueBB == FalseBB) {
4912 // We were only looking for one successor, and it was present.
4913 // Create an unconditional branch to it.
4914 Builder.CreateBr(TrueBB);
4915 } else {
4916 // We found both of the successors we were looking for.
4917 // Create a conditional branch sharing the condition of the select.
4918 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4919 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4920 /*IsExpected=*/false, /*ElideAllZero=*/true);
4921 }
4922 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4923 // Neither of the selected blocks were successors, so this
4924 // terminator must be unreachable.
4925 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4926 } else {
4927 // One of the selected values was a successor, but the other wasn't.
4928 // Insert an unconditional branch to the one that was found;
4929 // the edge to the one that wasn't must be unreachable.
4930 if (!KeepEdge1) {
4931 // Only TrueBB was found.
4932 Builder.CreateBr(TrueBB);
4933 } else {
4934 // Only FalseBB was found.
4935 Builder.CreateBr(FalseBB);
4936 }
4937 }
4938
4940
4941 if (DTU) {
4942 SmallVector<DominatorTree::UpdateType, 2> Updates;
4943 Updates.reserve(RemovedSuccessors.size());
4944 for (auto *RemovedSuccessor : RemovedSuccessors)
4945 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4946 DTU->applyUpdates(Updates);
4947 }
4948
4949 return true;
4950}
4951
4952// Replaces
4953// (switch (select cond, X, Y)) on constant X, Y
4954// with a branch - conditional if X and Y lead to distinct BBs,
4955// unconditional otherwise.
4956bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4957 SelectInst *Select) {
4958 // Check for constant integer values in the select.
4959 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4960 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4961 if (!TrueVal || !FalseVal)
4962 return false;
4963
4964 // Find the relevant condition and destinations.
4965 Value *Condition = Select->getCondition();
4966 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4967 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4968
4969 // Get weight for TrueBB and FalseBB.
4970 uint32_t TrueWeight = 0, FalseWeight = 0;
4971 SmallVector<uint64_t, 8> Weights;
4972 bool HasWeights = hasBranchWeightMD(*SI);
4973 if (HasWeights) {
4974 getBranchWeights(SI, Weights);
4975 if (Weights.size() == 1 + SI->getNumCases()) {
4976 TrueWeight =
4977 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4978 FalseWeight =
4979 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4980 }
4981 }
4982
4983 // Perform the actual simplification.
4984 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4985 FalseWeight);
4986}
4987
4988// Replaces
4989// (indirectbr (select cond, blockaddress(@fn, BlockA),
4990// blockaddress(@fn, BlockB)))
4991// with
4992// (br cond, BlockA, BlockB).
4993bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4994 SelectInst *SI) {
4995 // Check that both operands of the select are block addresses.
4996 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4997 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4998 if (!TBA || !FBA)
4999 return false;
5000
5001 // Extract the actual blocks.
5002 BasicBlock *TrueBB = TBA->getBasicBlock();
5003 BasicBlock *FalseBB = FBA->getBasicBlock();
5004
5005 // The select's profile becomes the profile of the conditional branch that
5006 // replaces the indirect branch.
5007 SmallVector<uint32_t> SelectBranchWeights(2);
5009 extractBranchWeights(*SI, SelectBranchWeights);
5010 // Perform the actual simplification.
5011 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5012 SelectBranchWeights[0],
5013 SelectBranchWeights[1]);
5014}
5015
5016/// This is called when we find an icmp instruction
5017/// (a seteq/setne with a constant) as the only instruction in a
5018/// block that ends with an uncond branch. We are looking for a very specific
5019/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5020/// this case, we merge the first two "or's of icmp" into a switch, but then the
5021/// default value goes to an uncond block with a seteq in it, we get something
5022/// like:
5023///
5024/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5025/// DEFAULT:
5026/// %tmp = icmp eq i8 %A, 92
5027/// br label %end
5028/// end:
5029/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5030///
5031/// We prefer to split the edge to 'end' so that there is a true/false entry to
5032/// the PHI, merging the third icmp into the switch.
5033bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5034 ICmpInst *ICI, IRBuilder<> &Builder) {
5035 // Select == nullptr means we assume that there is a hidden no-op select
5036 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5037 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5038}
5039
5040/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5041/// case. This is called when we find an icmp instruction (a seteq/setne with a
5042/// constant) and its following select instruction as the only TWO instructions
5043/// in a block that ends with an uncond branch. We are looking for a very
5044/// specific pattern that occurs when "
5045/// if (A == 1) return C1;
5046/// if (A == 2) return C2;
5047/// if (A < 3) return C3;
5048/// return C4;
5049/// " gets simplified. In this case, we merge the first two "branches of icmp"
5050/// into a switch, but then the default value goes to an uncond block with a lt
5051/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5052/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5053/// get something like:
5054///
5055/// case1:
5056/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5057/// case2:
5058/// br label %end
5059/// DEFAULT:
5060/// %tmp = icmp eq i8 %A, 2
5061/// %val = select i1 %tmp, i8 C3, i8 C4
5062/// br label %end
5063/// end:
5064/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5065///
5066/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5067/// to the PHI, merging the icmp & select into the switch, as follows:
5068///
5069/// case1:
5070/// switch i8 %A, label %DEFAULT [
5071/// i8 0, label %end
5072/// i8 1, label %case2
5073/// i8 2, label %case3
5074/// ]
5075/// case2:
5076/// br label %end
5077/// case3:
5078/// br label %end
5079/// DEFAULT:
5080/// br label %end
5081/// end:
5082/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5083bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5084 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5085 BasicBlock *BB = ICI->getParent();
5086
5087 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5088 // too complex.
5089 /// TODO: support multi-phis in succ BB of select's BB.
5090 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5091 (Select && !Select->hasOneUse()))
5092 return false;
5093
5094 // The pattern we're looking for is where our only predecessor is a switch on
5095 // 'V' and this block is the default case for the switch. In this case we can
5096 // fold the compared value into the switch to simplify things.
5097 BasicBlock *Pred = BB->getSinglePredecessor();
5098 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5099 return false;
5100
5101 Value *IcmpCond;
5102 ConstantInt *NewCaseVal;
5103 CmpPredicate Predicate;
5104
5105 // Match icmp X, C
5106 if (!match(ICI,
5107 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5108 return false;
5109
5110 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5112 if (!Select) {
5113 // If Select == nullptr, we can assume that there is a hidden no-op select
5114 // just after icmp
5115 SelectCond = ICI;
5116 SelectTrueVal = Builder.getTrue();
5117 SelectFalseVal = Builder.getFalse();
5118 User = ICI->user_back();
5119 } else {
5120 SelectCond = Select->getCondition();
5121 // Check if the select condition is the same as the icmp condition.
5122 if (SelectCond != ICI)
5123 return false;
5124 SelectTrueVal = Select->getTrueValue();
5125 SelectFalseVal = Select->getFalseValue();
5126 User = Select->user_back();
5127 }
5128
5129 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5130 if (SI->getCondition() != IcmpCond)
5131 return false;
5132
5133 // If BB is reachable on a non-default case, then we simply know the value of
5134 // V in this block. Substitute it and constant fold the icmp instruction
5135 // away.
5136 if (SI->getDefaultDest() != BB) {
5137 ConstantInt *VVal = SI->findCaseDest(BB);
5138 assert(VVal && "Should have a unique destination value");
5139 ICI->setOperand(0, VVal);
5140
5141 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5142 ICI->replaceAllUsesWith(V);
5143 ICI->eraseFromParent();
5144 }
5145 // BB is now empty, so it is likely to simplify away.
5146 return requestResimplify();
5147 }
5148
5149 // Ok, the block is reachable from the default dest. If the constant we're
5150 // comparing exists in one of the other edges, then we can constant fold ICI
5151 // and zap it.
5152 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5153 Value *V;
5154 if (Predicate == ICmpInst::ICMP_EQ)
5156 else
5158
5159 ICI->replaceAllUsesWith(V);
5160 ICI->eraseFromParent();
5161 // BB is now empty, so it is likely to simplify away.
5162 return requestResimplify();
5163 }
5164
5165 // The use of the select has to be in the 'end' block, by the only PHI node in
5166 // the block.
5167 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5168 PHINode *PHIUse = dyn_cast<PHINode>(User);
5169 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5171 return false;
5172
5173 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5174 // edge gets SelectTrueVal in the PHI.
5175 Value *DefaultCst = SelectFalseVal;
5176 Value *NewCst = SelectTrueVal;
5177
5178 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5179 std::swap(DefaultCst, NewCst);
5180
5181 // Replace Select (which is used by the PHI for the default value) with
5182 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5183 if (Select) {
5184 Select->replaceAllUsesWith(DefaultCst);
5185 Select->eraseFromParent();
5186 } else {
5187 ICI->replaceAllUsesWith(DefaultCst);
5188 }
5189 ICI->eraseFromParent();
5190
5191 SmallVector<DominatorTree::UpdateType, 2> Updates;
5192
5193 // Okay, the switch goes to this block on a default value. Add an edge from
5194 // the switch to the merge point on the compared value.
5195 BasicBlock *NewBB =
5196 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5197 {
5198 SwitchInstProfUpdateWrapper SIW(*SI);
5199 auto W0 = SIW.getSuccessorWeight(0);
5201 if (W0) {
5202 NewW = ((uint64_t(*W0) + 1) >> 1);
5203 SIW.setSuccessorWeight(0, *NewW);
5204 }
5205 SIW.addCase(NewCaseVal, NewBB, NewW);
5206 if (DTU)
5207 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5208 }
5209
5210 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5211 Builder.SetInsertPoint(NewBB);
5212 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5213 Builder.CreateBr(SuccBlock);
5214 PHIUse->addIncoming(NewCst, NewBB);
5215 if (DTU) {
5216 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5217 DTU->applyUpdates(Updates);
5218 }
5219 return true;
5220}
5221
5222/// The specified branch is a conditional branch.
5223/// Check to see if it is branching on an or/and chain of icmp instructions, and
5224/// fold it into a switch instruction if so.
5225bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5226 IRBuilder<> &Builder,
5227 const DataLayout &DL) {
5229 if (!Cond)
5230 return false;
5231
5232 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5233 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5234 // 'setne's and'ed together, collect them.
5235
5236 // Try to gather values from a chain of and/or to be turned into a switch
5237 ConstantComparesGatherer ConstantCompare(Cond, DL);
5238 // Unpack the result
5239 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5240 Value *CompVal = ConstantCompare.CompValue;
5241 unsigned UsedICmps = ConstantCompare.UsedICmps;
5242 Value *ExtraCase = ConstantCompare.Extra;
5243 bool TrueWhenEqual = ConstantCompare.IsEq;
5244
5245 // If we didn't have a multiply compared value, fail.
5246 if (!CompVal)
5247 return false;
5248
5249 // Avoid turning single icmps into a switch.
5250 if (UsedICmps <= 1)
5251 return false;
5252
5253 // There might be duplicate constants in the list, which the switch
5254 // instruction can't handle, remove them now.
5255 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5256 Values.erase(llvm::unique(Values), Values.end());
5257
5258 // If Extra was used, we require at least two switch values to do the
5259 // transformation. A switch with one value is just a conditional branch.
5260 if (ExtraCase && Values.size() < 2)
5261 return false;
5262
5263 SmallVector<uint32_t> BranchWeights;
5264 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5265 extractBranchWeights(*BI, BranchWeights);
5266
5267 // Figure out which block is which destination.
5268 BasicBlock *DefaultBB = BI->getSuccessor(1);
5269 BasicBlock *EdgeBB = BI->getSuccessor(0);
5270 if (!TrueWhenEqual) {
5271 std::swap(DefaultBB, EdgeBB);
5272 if (HasProfile)
5273 std::swap(BranchWeights[0], BranchWeights[1]);
5274 }
5275
5276 BasicBlock *BB = BI->getParent();
5277
5278 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5279 << " cases into SWITCH. BB is:\n"
5280 << *BB);
5281
5282 SmallVector<DominatorTree::UpdateType, 2> Updates;
5283
5284 // If there are any extra values that couldn't be folded into the switch
5285 // then we evaluate them with an explicit branch first. Split the block
5286 // right before the condbr to handle it.
5287 if (ExtraCase) {
5288 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5289 /*MSSAU=*/nullptr, "switch.early.test");
5290
5291 // Remove the uncond branch added to the old block.
5292 Instruction *OldTI = BB->getTerminator();
5293 Builder.SetInsertPoint(OldTI);
5294
5295 // There can be an unintended UB if extra values are Poison. Before the
5296 // transformation, extra values may not be evaluated according to the
5297 // condition, and it will not raise UB. But after transformation, we are
5298 // evaluating extra values before checking the condition, and it will raise
5299 // UB. It can be solved by adding freeze instruction to extra values.
5300 AssumptionCache *AC = Options.AC;
5301
5302 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5303 ExtraCase = Builder.CreateFreeze(ExtraCase);
5304
5305 // We don't have any info about this condition.
5306 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5307 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5309
5310 OldTI->eraseFromParent();
5311
5312 if (DTU)
5313 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5314
5315 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5316 // for the edge we just added.
5317 addPredecessorToBlock(EdgeBB, BB, NewBB);
5318
5319 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5320 << "\nEXTRABB = " << *BB);
5321 BB = NewBB;
5322 }
5323
5324 Builder.SetInsertPoint(BI);
5325 // Convert pointer to int before we switch.
5326 if (CompVal->getType()->isPointerTy()) {
5327 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5328 "Should not end up here with unstable pointers");
5329 CompVal = Builder.CreatePtrToInt(
5330 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5331 }
5332
5333 // Check if we can represent the values as a contiguous range. If so, we use a
5334 // range check + conditional branch instead of a switch.
5335 if (Values.front()->getValue() - Values.back()->getValue() ==
5336 Values.size() - 1) {
5337 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5338 Values.back()->getValue(), Values.front()->getValue() + 1);
5339 APInt Offset, RHS;
5340 ICmpInst::Predicate Pred;
5341 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5342 Value *X = CompVal;
5343 if (!Offset.isZero())
5344 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5345 Value *Cond =
5346 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5347 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5348 if (HasProfile)
5349 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5350 // We don't need to update PHI nodes since we don't add any new edges.
5351 } else {
5352 // Create the new switch instruction now.
5353 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5354 if (HasProfile) {
5355 // We know the weight of the default case. We don't know the weight of the
5356 // other cases, but rather than completely lose profiling info, we split
5357 // the remaining probability equally over them.
5358 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5359 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5360 // if TrueWhenEqual.
5361 for (auto &V : drop_begin(NewWeights))
5362 V = BranchWeights[0] / Values.size();
5363 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5364 }
5365
5366 // Add all of the 'cases' to the switch instruction.
5367 for (ConstantInt *Val : Values)
5368 New->addCase(Val, EdgeBB);
5369
5370 // We added edges from PI to the EdgeBB. As such, if there were any
5371 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5372 // the number of edges added.
5373 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5374 PHINode *PN = cast<PHINode>(BBI);
5375 Value *InVal = PN->getIncomingValueForBlock(BB);
5376 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5377 PN->addIncoming(InVal, BB);
5378 }
5379 }
5380
5381 // Erase the old branch instruction.
5383 if (DTU)
5384 DTU->applyUpdates(Updates);
5385
5386 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5387 return true;
5388}
5389
5390bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5391 if (isa<PHINode>(RI->getValue()))
5392 return simplifyCommonResume(RI);
5393 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5394 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5395 // The resume must unwind the exception that caused control to branch here.
5396 return simplifySingleResume(RI);
5397
5398 return false;
5399}
5400
5401// Check if cleanup block is empty
5403 for (Instruction &I : R) {
5404 auto *II = dyn_cast<IntrinsicInst>(&I);
5405 if (!II)
5406 return false;
5407
5408 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5409 switch (IntrinsicID) {
5410 case Intrinsic::dbg_declare:
5411 case Intrinsic::dbg_value:
5412 case Intrinsic::dbg_label:
5413 case Intrinsic::lifetime_end:
5414 break;
5415 default:
5416 return false;
5417 }
5418 }
5419 return true;
5420}
5421
5422// Simplify resume that is shared by several landing pads (phi of landing pad).
5423bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5424 BasicBlock *BB = RI->getParent();
5425
5426 // Check that there are no other instructions except for debug and lifetime
5427 // intrinsics between the phi's and resume instruction.
5428 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5429 BB->getTerminator()->getIterator())))
5430 return false;
5431
5432 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5433 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5434
5435 // Check incoming blocks to see if any of them are trivial.
5436 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5437 Idx++) {
5438 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5439 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5440
5441 // If the block has other successors, we can not delete it because
5442 // it has other dependents.
5443 if (IncomingBB->getUniqueSuccessor() != BB)
5444 continue;
5445
5446 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5447 // Not the landing pad that caused the control to branch here.
5448 if (IncomingValue != LandingPad)
5449 continue;
5450
5452 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5453 TrivialUnwindBlocks.insert(IncomingBB);
5454 }
5455
5456 // If no trivial unwind blocks, don't do any simplifications.
5457 if (TrivialUnwindBlocks.empty())
5458 return false;
5459
5460 // Turn all invokes that unwind here into calls.
5461 for (auto *TrivialBB : TrivialUnwindBlocks) {
5462 // Blocks that will be simplified should be removed from the phi node.
5463 // Note there could be multiple edges to the resume block, and we need
5464 // to remove them all.
5465 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5466 BB->removePredecessor(TrivialBB, true);
5467
5468 for (BasicBlock *Pred :
5470 removeUnwindEdge(Pred, DTU);
5471 ++NumInvokes;
5472 }
5473
5474 // In each SimplifyCFG run, only the current processed block can be erased.
5475 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5476 // of erasing TrivialBB, we only remove the branch to the common resume
5477 // block so that we can later erase the resume block since it has no
5478 // predecessors.
5479 TrivialBB->getTerminator()->eraseFromParent();
5480 new UnreachableInst(RI->getContext(), TrivialBB);
5481 if (DTU)
5482 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5483 }
5484
5485 // Delete the resume block if all its predecessors have been removed.
5486 if (pred_empty(BB))
5487 DeleteDeadBlock(BB, DTU);
5488
5489 return !TrivialUnwindBlocks.empty();
5490}
5491
5492// Simplify resume that is only used by a single (non-phi) landing pad.
5493bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5494 BasicBlock *BB = RI->getParent();
5495 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5496 assert(RI->getValue() == LPInst &&
5497 "Resume must unwind the exception that caused control to here");
5498
5499 // Check that there are no other instructions except for debug intrinsics.
5501 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5502 return false;
5503
5504 // Turn all invokes that unwind here into calls and delete the basic block.
5505 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5506 removeUnwindEdge(Pred, DTU);
5507 ++NumInvokes;
5508 }
5509
5510 // The landingpad is now unreachable. Zap it.
5511 DeleteDeadBlock(BB, DTU);
5512 return true;
5513}
5514
5516 // If this is a trivial cleanup pad that executes no instructions, it can be
5517 // eliminated. If the cleanup pad continues to the caller, any predecessor
5518 // that is an EH pad will be updated to continue to the caller and any
5519 // predecessor that terminates with an invoke instruction will have its invoke
5520 // instruction converted to a call instruction. If the cleanup pad being
5521 // simplified does not continue to the caller, each predecessor will be
5522 // updated to continue to the unwind destination of the cleanup pad being
5523 // simplified.
5524 BasicBlock *BB = RI->getParent();
5525 CleanupPadInst *CPInst = RI->getCleanupPad();
5526 if (CPInst->getParent() != BB)
5527 // This isn't an empty cleanup.
5528 return false;
5529
5530 // We cannot kill the pad if it has multiple uses. This typically arises
5531 // from unreachable basic blocks.
5532 if (!CPInst->hasOneUse())
5533 return false;
5534
5535 // Check that there are no other instructions except for benign intrinsics.
5537 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5538 return false;
5539
5540 // If the cleanup return we are simplifying unwinds to the caller, this will
5541 // set UnwindDest to nullptr.
5542 BasicBlock *UnwindDest = RI->getUnwindDest();
5543
5544 // We're about to remove BB from the control flow. Before we do, sink any
5545 // PHINodes into the unwind destination. Doing this before changing the
5546 // control flow avoids some potentially slow checks, since we can currently
5547 // be certain that UnwindDest and BB have no common predecessors (since they
5548 // are both EH pads).
5549 if (UnwindDest) {
5550 // First, go through the PHI nodes in UnwindDest and update any nodes that
5551 // reference the block we are removing
5552 for (PHINode &DestPN : UnwindDest->phis()) {
5553 int Idx = DestPN.getBasicBlockIndex(BB);
5554 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5555 assert(Idx != -1);
5556 // This PHI node has an incoming value that corresponds to a control
5557 // path through the cleanup pad we are removing. If the incoming
5558 // value is in the cleanup pad, it must be a PHINode (because we
5559 // verified above that the block is otherwise empty). Otherwise, the
5560 // value is either a constant or a value that dominates the cleanup
5561 // pad being removed.
5562 //
5563 // Because BB and UnwindDest are both EH pads, all of their
5564 // predecessors must unwind to these blocks, and since no instruction
5565 // can have multiple unwind destinations, there will be no overlap in
5566 // incoming blocks between SrcPN and DestPN.
5567 Value *SrcVal = DestPN.getIncomingValue(Idx);
5568 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5569
5570 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5571 for (auto *Pred : predecessors(BB)) {
5572 Value *Incoming =
5573 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5574 DestPN.addIncoming(Incoming, Pred);
5575 }
5576 }
5577
5578 // Sink any remaining PHI nodes directly into UnwindDest.
5579 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5580 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5581 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5582 // If the PHI node has no uses or all of its uses are in this basic
5583 // block (meaning they are debug or lifetime intrinsics), just leave
5584 // it. It will be erased when we erase BB below.
5585 continue;
5586
5587 // Otherwise, sink this PHI node into UnwindDest.
5588 // Any predecessors to UnwindDest which are not already represented
5589 // must be back edges which inherit the value from the path through
5590 // BB. In this case, the PHI value must reference itself.
5591 for (auto *pred : predecessors(UnwindDest))
5592 if (pred != BB)
5593 PN.addIncoming(&PN, pred);
5594 PN.moveBefore(InsertPt);
5595 // Also, add a dummy incoming value for the original BB itself,
5596 // so that the PHI is well-formed until we drop said predecessor.
5597 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5598 }
5599 }
5600
5601 std::vector<DominatorTree::UpdateType> Updates;
5602
5603 // We use make_early_inc_range here because we will remove all predecessors.
5605 if (UnwindDest == nullptr) {
5606 if (DTU) {
5607 DTU->applyUpdates(Updates);
5608 Updates.clear();
5609 }
5610 removeUnwindEdge(PredBB, DTU);
5611 ++NumInvokes;
5612 } else {
5613 BB->removePredecessor(PredBB);
5614 Instruction *TI = PredBB->getTerminator();
5615 TI->replaceUsesOfWith(BB, UnwindDest);
5616 if (DTU) {
5617 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5618 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5619 }
5620 }
5621 }
5622
5623 if (DTU)
5624 DTU->applyUpdates(Updates);
5625
5626 DeleteDeadBlock(BB, DTU);
5627
5628 return true;
5629}
5630
5631// Try to merge two cleanuppads together.
5633 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5634 // with.
5635 BasicBlock *UnwindDest = RI->getUnwindDest();
5636 if (!UnwindDest)
5637 return false;
5638
5639 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5640 // be safe to merge without code duplication.
5641 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5642 return false;
5643
5644 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5645 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5646 if (!SuccessorCleanupPad)
5647 return false;
5648
5649 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5650 // Replace any uses of the successor cleanupad with the predecessor pad
5651 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5652 // funclet bundle operands.
5653 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5654 // Remove the old cleanuppad.
5655 SuccessorCleanupPad->eraseFromParent();
5656 // Now, we simply replace the cleanupret with a branch to the unwind
5657 // destination.
5658 BranchInst::Create(UnwindDest, RI->getParent());
5659 RI->eraseFromParent();
5660
5661 return true;
5662}
5663
5664bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5665 // It is possible to transiantly have an undef cleanuppad operand because we
5666 // have deleted some, but not all, dead blocks.
5667 // Eventually, this block will be deleted.
5668 if (isa<UndefValue>(RI->getOperand(0)))
5669 return false;
5670
5671 if (mergeCleanupPad(RI))
5672 return true;
5673
5674 if (removeEmptyCleanup(RI, DTU))
5675 return true;
5676
5677 return false;
5678}
5679
5680// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5681bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5682 BasicBlock *BB = UI->getParent();
5683
5684 bool Changed = false;
5685
5686 // Ensure that any debug-info records that used to occur after the Unreachable
5687 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5688 // the block.
5690
5691 // Debug-info records on the unreachable inst itself should be deleted, as
5692 // below we delete everything past the final executable instruction.
5693 UI->dropDbgRecords();
5694
5695 // If there are any instructions immediately before the unreachable that can
5696 // be removed, do so.
5697 while (UI->getIterator() != BB->begin()) {
5699 --BBI;
5700
5702 break; // Can not drop any more instructions. We're done here.
5703 // Otherwise, this instruction can be freely erased,
5704 // even if it is not side-effect free.
5705
5706 // Note that deleting EH's here is in fact okay, although it involves a bit
5707 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5708 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5709 // and we can therefore guarantee this block will be erased.
5710
5711 // If we're deleting this, we're deleting any subsequent debug info, so
5712 // delete DbgRecords.
5713 BBI->dropDbgRecords();
5714
5715 // Delete this instruction (any uses are guaranteed to be dead)
5716 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5717 BBI->eraseFromParent();
5718 Changed = true;
5719 }
5720
5721 // If the unreachable instruction is the first in the block, take a gander
5722 // at all of the predecessors of this instruction, and simplify them.
5723 if (&BB->front() != UI)
5724 return Changed;
5725
5726 std::vector<DominatorTree::UpdateType> Updates;
5727
5728 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5729 for (BasicBlock *Predecessor : Preds) {
5730 Instruction *TI = Predecessor->getTerminator();
5731 IRBuilder<> Builder(TI);
5732 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5733 // We could either have a proper unconditional branch,
5734 // or a degenerate conditional branch with matching destinations.
5735 if (all_of(BI->successors(),
5736 [BB](auto *Successor) { return Successor == BB; })) {
5737 new UnreachableInst(TI->getContext(), TI->getIterator());
5738 TI->eraseFromParent();
5739 Changed = true;
5740 } else {
5741 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5742 Value* Cond = BI->getCondition();
5743 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5744 "The destinations are guaranteed to be different here.");
5745 CallInst *Assumption;
5746 if (BI->getSuccessor(0) == BB) {
5747 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5748 Builder.CreateBr(BI->getSuccessor(1));
5749 } else {
5750 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5751 Assumption = Builder.CreateAssumption(Cond);
5752 Builder.CreateBr(BI->getSuccessor(0));
5753 }
5754 if (Options.AC)
5755 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5756
5758 Changed = true;
5759 }
5760 if (DTU)
5761 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5762 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5763 SwitchInstProfUpdateWrapper SU(*SI);
5764 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5765 if (i->getCaseSuccessor() != BB) {
5766 ++i;
5767 continue;
5768 }
5769 BB->removePredecessor(SU->getParent());
5770 i = SU.removeCase(i);
5771 e = SU->case_end();
5772 Changed = true;
5773 }
5774 // Note that the default destination can't be removed!
5775 if (DTU && SI->getDefaultDest() != BB)
5776 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5777 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5778 if (II->getUnwindDest() == BB) {
5779 if (DTU) {
5780 DTU->applyUpdates(Updates);
5781 Updates.clear();
5782 }
5783 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5784 if (!CI->doesNotThrow())
5785 CI->setDoesNotThrow();
5786 Changed = true;
5787 }
5788 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5789 if (CSI->getUnwindDest() == BB) {
5790 if (DTU) {
5791 DTU->applyUpdates(Updates);
5792 Updates.clear();
5793 }
5794 removeUnwindEdge(TI->getParent(), DTU);
5795 Changed = true;
5796 continue;
5797 }
5798
5799 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5800 E = CSI->handler_end();
5801 I != E; ++I) {
5802 if (*I == BB) {
5803 CSI->removeHandler(I);
5804 --I;
5805 --E;
5806 Changed = true;
5807 }
5808 }
5809 if (DTU)
5810 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5811 if (CSI->getNumHandlers() == 0) {
5812 if (CSI->hasUnwindDest()) {
5813 // Redirect all predecessors of the block containing CatchSwitchInst
5814 // to instead branch to the CatchSwitchInst's unwind destination.
5815 if (DTU) {
5816 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5817 Updates.push_back({DominatorTree::Insert,
5818 PredecessorOfPredecessor,
5819 CSI->getUnwindDest()});
5820 Updates.push_back({DominatorTree::Delete,
5821 PredecessorOfPredecessor, Predecessor});
5822 }
5823 }
5824 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5825 } else {
5826 // Rewrite all preds to unwind to caller (or from invoke to call).
5827 if (DTU) {
5828 DTU->applyUpdates(Updates);
5829 Updates.clear();
5830 }
5831 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5832 for (BasicBlock *EHPred : EHPreds)
5833 removeUnwindEdge(EHPred, DTU);
5834 }
5835 // The catchswitch is no longer reachable.
5836 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5837 CSI->eraseFromParent();
5838 Changed = true;
5839 }
5840 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5841 (void)CRI;
5842 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5843 "Expected to always have an unwind to BB.");
5844 if (DTU)
5845 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5846 new UnreachableInst(TI->getContext(), TI->getIterator());
5847 TI->eraseFromParent();
5848 Changed = true;
5849 }
5850 }
5851
5852 if (DTU)
5853 DTU->applyUpdates(Updates);
5854
5855 // If this block is now dead, remove it.
5856 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5857 DeleteDeadBlock(BB, DTU);
5858 return true;
5859 }
5860
5861 return Changed;
5862}
5863
5872
5873static std::optional<ContiguousCasesResult>
5876 BasicBlock *Dest, BasicBlock *OtherDest) {
5877 assert(Cases.size() >= 1);
5878
5880 const APInt &Min = Cases.back()->getValue();
5881 const APInt &Max = Cases.front()->getValue();
5882 APInt Offset = Max - Min;
5883 size_t ContiguousOffset = Cases.size() - 1;
5884 if (Offset == ContiguousOffset) {
5885 return ContiguousCasesResult{
5886 /*Min=*/Cases.back(),
5887 /*Max=*/Cases.front(),
5888 /*Dest=*/Dest,
5889 /*OtherDest=*/OtherDest,
5890 /*Cases=*/&Cases,
5891 /*OtherCases=*/&OtherCases,
5892 };
5893 }
5894 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5895 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5896 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5897 // contiguous range for the other destination. N.B. If CR is not a full range,
5898 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5899 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5900 assert(Cases.size() >= 2);
5901 auto *It =
5902 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5903 return L->getValue() != R->getValue() + 1;
5904 });
5905 if (It == Cases.end())
5906 return std::nullopt;
5907 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5908 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5909 Cases.size() - 2) {
5910 return ContiguousCasesResult{
5911 /*Min=*/cast<ConstantInt>(
5912 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5913 /*Max=*/
5915 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5916 /*Dest=*/OtherDest,
5917 /*OtherDest=*/Dest,
5918 /*Cases=*/&OtherCases,
5919 /*OtherCases=*/&Cases,
5920 };
5921 }
5922 }
5923 return std::nullopt;
5924}
5925
5927 DomTreeUpdater *DTU,
5928 bool RemoveOrigDefaultBlock = true) {
5929 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5930 auto *BB = Switch->getParent();
5931 auto *OrigDefaultBlock = Switch->getDefaultDest();
5932 if (RemoveOrigDefaultBlock)
5933 OrigDefaultBlock->removePredecessor(BB);
5934 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5935 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5936 OrigDefaultBlock);
5937 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5939 Switch->setDefaultDest(&*NewDefaultBlock);
5940 if (DTU) {
5942 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5943 if (RemoveOrigDefaultBlock &&
5944 !is_contained(successors(BB), OrigDefaultBlock))
5945 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5946 DTU->applyUpdates(Updates);
5947 }
5948}
5949
5950/// Turn a switch into an integer range comparison and branch.
5951/// Switches with more than 2 destinations are ignored.
5952/// Switches with 1 destination are also ignored.
5953bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5954 IRBuilder<> &Builder) {
5955 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5956
5957 bool HasDefault = !SI->defaultDestUnreachable();
5958
5959 auto *BB = SI->getParent();
5960 // Partition the cases into two sets with different destinations.
5961 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5962 BasicBlock *DestB = nullptr;
5965
5966 for (auto Case : SI->cases()) {
5967 BasicBlock *Dest = Case.getCaseSuccessor();
5968 if (!DestA)
5969 DestA = Dest;
5970 if (Dest == DestA) {
5971 CasesA.push_back(Case.getCaseValue());
5972 continue;
5973 }
5974 if (!DestB)
5975 DestB = Dest;
5976 if (Dest == DestB) {
5977 CasesB.push_back(Case.getCaseValue());
5978 continue;
5979 }
5980 return false; // More than two destinations.
5981 }
5982 if (!DestB)
5983 return false; // All destinations are the same and the default is unreachable
5984
5985 assert(DestA && DestB &&
5986 "Single-destination switch should have been folded.");
5987 assert(DestA != DestB);
5988 assert(DestB != SI->getDefaultDest());
5989 assert(!CasesB.empty() && "There must be non-default cases.");
5990 assert(!CasesA.empty() || HasDefault);
5991
5992 // Figure out if one of the sets of cases form a contiguous range.
5993 std::optional<ContiguousCasesResult> ContiguousCases;
5994
5995 // Only one icmp is needed when there is only one case.
5996 if (!HasDefault && CasesA.size() == 1)
5997 ContiguousCases = ContiguousCasesResult{
5998 /*Min=*/CasesA[0],
5999 /*Max=*/CasesA[0],
6000 /*Dest=*/DestA,
6001 /*OtherDest=*/DestB,
6002 /*Cases=*/&CasesA,
6003 /*OtherCases=*/&CasesB,
6004 };
6005 else if (CasesB.size() == 1)
6006 ContiguousCases = ContiguousCasesResult{
6007 /*Min=*/CasesB[0],
6008 /*Max=*/CasesB[0],
6009 /*Dest=*/DestB,
6010 /*OtherDest=*/DestA,
6011 /*Cases=*/&CasesB,
6012 /*OtherCases=*/&CasesA,
6013 };
6014 // Correctness: Cases to the default destination cannot be contiguous cases.
6015 else if (!HasDefault)
6016 ContiguousCases =
6017 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6018
6019 if (!ContiguousCases)
6020 ContiguousCases =
6021 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6022
6023 if (!ContiguousCases)
6024 return false;
6025
6026 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6027
6028 // Start building the compare and branch.
6029
6031 Constant *NumCases = ConstantInt::get(Offset->getType(),
6032 Max->getValue() - Min->getValue() + 1);
6033 BranchInst *NewBI;
6034 if (NumCases->isOneValue()) {
6035 assert(Max->getValue() == Min->getValue());
6036 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6037 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6038 }
6039 // If NumCases overflowed, then all possible values jump to the successor.
6040 else if (NumCases->isNullValue() && !Cases->empty()) {
6041 NewBI = Builder.CreateBr(Dest);
6042 } else {
6043 Value *Sub = SI->getCondition();
6044 if (!Offset->isNullValue())
6045 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6046 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6047 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6048 }
6049
6050 // Update weight for the newly-created conditional branch.
6051 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
6052 SmallVector<uint64_t, 8> Weights;
6053 getBranchWeights(SI, Weights);
6054 if (Weights.size() == 1 + SI->getNumCases()) {
6055 uint64_t TrueWeight = 0;
6056 uint64_t FalseWeight = 0;
6057 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6058 if (SI->getSuccessor(I) == Dest)
6059 TrueWeight += Weights[I];
6060 else
6061 FalseWeight += Weights[I];
6062 }
6063 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6064 TrueWeight /= 2;
6065 FalseWeight /= 2;
6066 }
6067 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6068 /*IsExpected=*/false, /*ElideAllZero=*/true);
6069 }
6070 }
6071
6072 // Prune obsolete incoming values off the successors' PHI nodes.
6073 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6074 unsigned PreviousEdges = Cases->size();
6075 if (Dest == SI->getDefaultDest())
6076 ++PreviousEdges;
6077 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6078 PHI.removeIncomingValue(SI->getParent());
6079 }
6080 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6081 unsigned PreviousEdges = OtherCases->size();
6082 if (OtherDest == SI->getDefaultDest())
6083 ++PreviousEdges;
6084 unsigned E = PreviousEdges - 1;
6085 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6086 if (NewBI->isUnconditional())
6087 ++E;
6088 for (unsigned I = 0; I != E; ++I)
6089 PHI.removeIncomingValue(SI->getParent());
6090 }
6091
6092 // Clean up the default block - it may have phis or other instructions before
6093 // the unreachable terminator.
6094 if (!HasDefault)
6096
6097 auto *UnreachableDefault = SI->getDefaultDest();
6098
6099 // Drop the switch.
6100 SI->eraseFromParent();
6101
6102 if (!HasDefault && DTU)
6103 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6104
6105 return true;
6106}
6107
6108/// Compute masked bits for the condition of a switch
6109/// and use it to remove dead cases.
6111 AssumptionCache *AC,
6112 const DataLayout &DL) {
6113 Value *Cond = SI->getCondition();
6114 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6116 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6117
6118 // We can also eliminate cases by determining that their values are outside of
6119 // the limited range of the condition based on how many significant (non-sign)
6120 // bits are in the condition value.
6121 unsigned MaxSignificantBitsInCond =
6123
6124 // Gather dead cases.
6126 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6127 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6128 for (const auto &Case : SI->cases()) {
6129 auto *Successor = Case.getCaseSuccessor();
6130 if (DTU) {
6131 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6132 if (Inserted)
6133 UniqueSuccessors.push_back(Successor);
6134 ++It->second;
6135 }
6136 ConstantInt *CaseC = Case.getCaseValue();
6137 const APInt &CaseVal = CaseC->getValue();
6138 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6139 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6140 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6141 DeadCases.push_back(CaseC);
6142 if (DTU)
6143 --NumPerSuccessorCases[Successor];
6144 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6145 << " is dead.\n");
6146 } else if (IsKnownValuesValid)
6147 KnownValues.erase(CaseC);
6148 }
6149
6150 // If we can prove that the cases must cover all possible values, the
6151 // default destination becomes dead and we can remove it. If we know some
6152 // of the bits in the value, we can use that to more precisely compute the
6153 // number of possible unique case values.
6154 bool HasDefault = !SI->defaultDestUnreachable();
6155 const unsigned NumUnknownBits =
6156 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6157 assert(NumUnknownBits <= Known.getBitWidth());
6158 if (HasDefault && DeadCases.empty()) {
6159 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6161 return true;
6162 }
6163
6164 if (NumUnknownBits < 64 /* avoid overflow */) {
6165 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6166 if (SI->getNumCases() == AllNumCases) {
6168 return true;
6169 }
6170 // When only one case value is missing, replace default with that case.
6171 // Eliminating the default branch will provide more opportunities for
6172 // optimization, such as lookup tables.
6173 if (SI->getNumCases() == AllNumCases - 1) {
6174 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6175 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6176 if (CondTy->getIntegerBitWidth() > 64 ||
6177 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6178 return false;
6179
6180 uint64_t MissingCaseVal = 0;
6181 for (const auto &Case : SI->cases())
6182 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6183 auto *MissingCase = cast<ConstantInt>(
6184 ConstantInt::get(Cond->getType(), MissingCaseVal));
6186 SIW.addCase(MissingCase, SI->getDefaultDest(),
6187 SIW.getSuccessorWeight(0));
6189 /*RemoveOrigDefaultBlock*/ false);
6190 SIW.setSuccessorWeight(0, 0);
6191 return true;
6192 }
6193 }
6194 }
6195
6196 if (DeadCases.empty())
6197 return false;
6198
6200 for (ConstantInt *DeadCase : DeadCases) {
6201 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6202 assert(CaseI != SI->case_default() &&
6203 "Case was not found. Probably mistake in DeadCases forming.");
6204 // Prune unused values from PHI nodes.
6205 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6206 SIW.removeCase(CaseI);
6207 }
6208
6209 if (DTU) {
6210 std::vector<DominatorTree::UpdateType> Updates;
6211 for (auto *Successor : UniqueSuccessors)
6212 if (NumPerSuccessorCases[Successor] == 0)
6213 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6214 DTU->applyUpdates(Updates);
6215 }
6216
6217 return true;
6218}
6219
6220/// If BB would be eligible for simplification by
6221/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6222/// by an unconditional branch), look at the phi node for BB in the successor
6223/// block and see if the incoming value is equal to CaseValue. If so, return
6224/// the phi node, and set PhiIndex to BB's index in the phi node.
6226 BasicBlock *BB, int *PhiIndex) {
6227 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6228 return nullptr; // BB must be empty to be a candidate for simplification.
6229 if (!BB->getSinglePredecessor())
6230 return nullptr; // BB must be dominated by the switch.
6231
6233 if (!Branch || !Branch->isUnconditional())
6234 return nullptr; // Terminator must be unconditional branch.
6235
6236 BasicBlock *Succ = Branch->getSuccessor(0);
6237
6238 for (PHINode &PHI : Succ->phis()) {
6239 int Idx = PHI.getBasicBlockIndex(BB);
6240 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6241
6242 Value *InValue = PHI.getIncomingValue(Idx);
6243 if (InValue != CaseValue)
6244 continue;
6245
6246 *PhiIndex = Idx;
6247 return &PHI;
6248 }
6249
6250 return nullptr;
6251}
6252
6253/// Try to forward the condition of a switch instruction to a phi node
6254/// dominated by the switch, if that would mean that some of the destination
6255/// blocks of the switch can be folded away. Return true if a change is made.
6257 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6258
6259 ForwardingNodesMap ForwardingNodes;
6260 BasicBlock *SwitchBlock = SI->getParent();
6261 bool Changed = false;
6262 for (const auto &Case : SI->cases()) {
6263 ConstantInt *CaseValue = Case.getCaseValue();
6264 BasicBlock *CaseDest = Case.getCaseSuccessor();
6265
6266 // Replace phi operands in successor blocks that are using the constant case
6267 // value rather than the switch condition variable:
6268 // switchbb:
6269 // switch i32 %x, label %default [
6270 // i32 17, label %succ
6271 // ...
6272 // succ:
6273 // %r = phi i32 ... [ 17, %switchbb ] ...
6274 // -->
6275 // %r = phi i32 ... [ %x, %switchbb ] ...
6276
6277 for (PHINode &Phi : CaseDest->phis()) {
6278 // This only works if there is exactly 1 incoming edge from the switch to
6279 // a phi. If there is >1, that means multiple cases of the switch map to 1
6280 // value in the phi, and that phi value is not the switch condition. Thus,
6281 // this transform would not make sense (the phi would be invalid because
6282 // a phi can't have different incoming values from the same block).
6283 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6284 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6285 count(Phi.blocks(), SwitchBlock) == 1) {
6286 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6287 Changed = true;
6288 }
6289 }
6290
6291 // Collect phi nodes that are indirectly using this switch's case constants.
6292 int PhiIdx;
6293 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6294 ForwardingNodes[Phi].push_back(PhiIdx);
6295 }
6296
6297 for (auto &ForwardingNode : ForwardingNodes) {
6298 PHINode *Phi = ForwardingNode.first;
6299 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6300 // Check if it helps to fold PHI.
6301 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6302 continue;
6303
6304 for (int Index : Indexes)
6305 Phi->setIncomingValue(Index, SI->getCondition());
6306 Changed = true;
6307 }
6308
6309 return Changed;
6310}
6311
6312/// Return true if the backend will be able to handle
6313/// initializing an array of constants like C.
6315 if (C->isThreadDependent())
6316 return false;
6317 if (C->isDLLImportDependent())
6318 return false;
6319
6320 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6323 return false;
6324
6326 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6327 // materializing the array of constants.
6328 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6329 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6330 return false;
6331 }
6332
6333 if (!TTI.shouldBuildLookupTablesForConstant(C))
6334 return false;
6335
6336 return true;
6337}
6338
6339/// If V is a Constant, return it. Otherwise, try to look up
6340/// its constant value in ConstantPool, returning 0 if it's not there.
6341static Constant *
6344 if (Constant *C = dyn_cast<Constant>(V))
6345 return C;
6346 return ConstantPool.lookup(V);
6347}
6348
6349/// Try to fold instruction I into a constant. This works for
6350/// simple instructions such as binary operations where both operands are
6351/// constant or can be replaced by constants from the ConstantPool. Returns the
6352/// resulting constant on success, 0 otherwise.
6353static Constant *
6357 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6358 if (!A)
6359 return nullptr;
6360 if (A->isAllOnesValue())
6361 return lookupConstant(Select->getTrueValue(), ConstantPool);
6362 if (A->isNullValue())
6363 return lookupConstant(Select->getFalseValue(), ConstantPool);
6364 return nullptr;
6365 }
6366
6368 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6369 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6370 COps.push_back(A);
6371 else
6372 return nullptr;
6373 }
6374
6375 return ConstantFoldInstOperands(I, COps, DL);
6376}
6377
6378/// Try to determine the resulting constant values in phi nodes
6379/// at the common destination basic block, *CommonDest, for one of the case
6380/// destionations CaseDest corresponding to value CaseVal (0 for the default
6381/// case), of a switch instruction SI.
6382static bool
6384 BasicBlock **CommonDest,
6385 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6386 const DataLayout &DL, const TargetTransformInfo &TTI) {
6387 // The block from which we enter the common destination.
6388 BasicBlock *Pred = SI->getParent();
6389
6390 // If CaseDest is empty except for some side-effect free instructions through
6391 // which we can constant-propagate the CaseVal, continue to its successor.
6393 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6394 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6395 if (I.isTerminator()) {
6396 // If the terminator is a simple branch, continue to the next block.
6397 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6398 return false;
6399 Pred = CaseDest;
6400 CaseDest = I.getSuccessor(0);
6401 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6402 // Instruction is side-effect free and constant.
6403
6404 // If the instruction has uses outside this block or a phi node slot for
6405 // the block, it is not safe to bypass the instruction since it would then
6406 // no longer dominate all its uses.
6407 for (auto &Use : I.uses()) {
6408 User *User = Use.getUser();
6410 if (I->getParent() == CaseDest)
6411 continue;
6412 if (PHINode *Phi = dyn_cast<PHINode>(User))
6413 if (Phi->getIncomingBlock(Use) == CaseDest)
6414 continue;
6415 return false;
6416 }
6417
6418 ConstantPool.insert(std::make_pair(&I, C));
6419 } else {
6420 break;
6421 }
6422 }
6423
6424 // If we did not have a CommonDest before, use the current one.
6425 if (!*CommonDest)
6426 *CommonDest = CaseDest;
6427 // If the destination isn't the common one, abort.
6428 if (CaseDest != *CommonDest)
6429 return false;
6430
6431 // Get the values for this case from phi nodes in the destination block.
6432 for (PHINode &PHI : (*CommonDest)->phis()) {
6433 int Idx = PHI.getBasicBlockIndex(Pred);
6434 if (Idx == -1)
6435 continue;
6436
6437 Constant *ConstVal =
6438 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6439 if (!ConstVal)
6440 return false;
6441
6442 // Be conservative about which kinds of constants we support.
6443 if (!validLookupTableConstant(ConstVal, TTI))
6444 return false;
6445
6446 Res.push_back(std::make_pair(&PHI, ConstVal));
6447 }
6448
6449 return Res.size() > 0;
6450}
6451
6452// Helper function used to add CaseVal to the list of cases that generate
6453// Result. Returns the updated number of cases that generate this result.
6454static size_t mapCaseToResult(ConstantInt *CaseVal,
6455 SwitchCaseResultVectorTy &UniqueResults,
6456 Constant *Result) {
6457 for (auto &I : UniqueResults) {
6458 if (I.first == Result) {
6459 I.second.push_back(CaseVal);
6460 return I.second.size();
6461 }
6462 }
6463 UniqueResults.push_back(
6464 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6465 return 1;
6466}
6467
6468// Helper function that initializes a map containing
6469// results for the PHI node of the common destination block for a switch
6470// instruction. Returns false if multiple PHI nodes have been found or if
6471// there is not a common destination block for the switch.
6473 BasicBlock *&CommonDest,
6474 SwitchCaseResultVectorTy &UniqueResults,
6475 Constant *&DefaultResult,
6476 const DataLayout &DL,
6477 const TargetTransformInfo &TTI,
6478 uintptr_t MaxUniqueResults) {
6479 for (const auto &I : SI->cases()) {
6480 ConstantInt *CaseVal = I.getCaseValue();
6481
6482 // Resulting value at phi nodes for this case value.
6483 SwitchCaseResultsTy Results;
6484 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6485 DL, TTI))
6486 return false;
6487
6488 // Only one value per case is permitted.
6489 if (Results.size() > 1)
6490 return false;
6491
6492 // Add the case->result mapping to UniqueResults.
6493 const size_t NumCasesForResult =
6494 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6495
6496 // Early out if there are too many cases for this result.
6497 if (NumCasesForResult > MaxSwitchCasesPerResult)
6498 return false;
6499
6500 // Early out if there are too many unique results.
6501 if (UniqueResults.size() > MaxUniqueResults)
6502 return false;
6503
6504 // Check the PHI consistency.
6505 if (!PHI)
6506 PHI = Results[0].first;
6507 else if (PHI != Results[0].first)
6508 return false;
6509 }
6510 // Find the default result value.
6512 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6513 DL, TTI);
6514 // If the default value is not found abort unless the default destination
6515 // is unreachable.
6516 DefaultResult =
6517 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6518
6519 return DefaultResult || SI->defaultDestUnreachable();
6520}
6521
6522// Helper function that checks if it is possible to transform a switch with only
6523// two cases (or two cases + default) that produces a result into a select.
6524// TODO: Handle switches with more than 2 cases that map to the same result.
6525// The branch weights correspond to the provided Condition (i.e. if Condition is
6526// modified from the original SwitchInst, the caller must adjust the weights)
6527static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6528 Constant *DefaultResult, Value *Condition,
6529 IRBuilder<> &Builder, const DataLayout &DL,
6530 ArrayRef<uint32_t> BranchWeights) {
6531 // If we are selecting between only two cases transform into a simple
6532 // select or a two-way select if default is possible.
6533 // Example:
6534 // switch (a) { %0 = icmp eq i32 %a, 10
6535 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6536 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6537 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6538 // }
6539
6540 const bool HasBranchWeights =
6541 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6542
6543 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6544 ResultVector[1].second.size() == 1) {
6545 ConstantInt *FirstCase = ResultVector[0].second[0];
6546 ConstantInt *SecondCase = ResultVector[1].second[0];
6547 Value *SelectValue = ResultVector[1].first;
6548 if (DefaultResult) {
6549 Value *ValueCompare =
6550 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6551 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6552 DefaultResult, "switch.select");
6553 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6554 SI && HasBranchWeights) {
6555 // We start with 3 probabilities, where the numerator is the
6556 // corresponding BranchWeights[i], and the denominator is the sum over
6557 // BranchWeights. We want the probability and negative probability of
6558 // Condition == SecondCase.
6559 assert(BranchWeights.size() == 3);
6561 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6562 /*IsExpected=*/false, /*ElideAllZero=*/true);
6563 }
6564 }
6565 Value *ValueCompare =
6566 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6567 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6568 SelectValue, "switch.select");
6569 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6570 // We may have had a DefaultResult. Base the position of the first and
6571 // second's branch weights accordingly. Also the proability that Condition
6572 // != FirstCase needs to take that into account.
6573 assert(BranchWeights.size() >= 2);
6574 size_t FirstCasePos = (Condition != nullptr);
6575 size_t SecondCasePos = FirstCasePos + 1;
6576 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6578 {BranchWeights[FirstCasePos],
6579 DefaultCase + BranchWeights[SecondCasePos]},
6580 /*IsExpected=*/false, /*ElideAllZero=*/true);
6581 }
6582 return Ret;
6583 }
6584
6585 // Handle the degenerate case where two cases have the same result value.
6586 if (ResultVector.size() == 1 && DefaultResult) {
6587 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6588 unsigned CaseCount = CaseValues.size();
6589 // n bits group cases map to the same result:
6590 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6591 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6592 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6593 if (isPowerOf2_32(CaseCount)) {
6594 ConstantInt *MinCaseVal = CaseValues[0];
6595 // If there are bits that are set exclusively by CaseValues, we
6596 // can transform the switch into a select if the conjunction of
6597 // all the values uniquely identify CaseValues.
6598 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6599
6600 // Find the minimum value and compute the and of all the case values.
6601 for (auto *Case : CaseValues) {
6602 if (Case->getValue().slt(MinCaseVal->getValue()))
6603 MinCaseVal = Case;
6604 AndMask &= Case->getValue();
6605 }
6606 KnownBits Known = computeKnownBits(Condition, DL);
6607
6608 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6609 // Compute the number of bits that are free to vary.
6610 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6611
6612 // Check if the number of values covered by the mask is equal
6613 // to the number of cases.
6614 if (FreeBits == Log2_32(CaseCount)) {
6615 Value *And = Builder.CreateAnd(Condition, AndMask);
6616 Value *Cmp = Builder.CreateICmpEQ(
6617 And, Constant::getIntegerValue(And->getType(), AndMask));
6618 Value *Ret =
6619 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6620 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6621 // We know there's a Default case. We base the resulting branch
6622 // weights off its probability.
6623 assert(BranchWeights.size() >= 2);
6625 *SI,
6626 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6627 /*IsExpected=*/false, /*ElideAllZero=*/true);
6628 }
6629 return Ret;
6630 }
6631 }
6632
6633 // Mark the bits case number touched.
6634 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6635 for (auto *Case : CaseValues)
6636 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6637
6638 // Check if cases with the same result can cover all number
6639 // in touched bits.
6640 if (BitMask.popcount() == Log2_32(CaseCount)) {
6641 if (!MinCaseVal->isNullValue())
6642 Condition = Builder.CreateSub(Condition, MinCaseVal);
6643 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6644 Value *Cmp = Builder.CreateICmpEQ(
6645 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6646 Value *Ret =
6647 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6648 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6649 assert(BranchWeights.size() >= 2);
6651 *SI,
6652 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6653 /*IsExpected=*/false, /*ElideAllZero=*/true);
6654 }
6655 return Ret;
6656 }
6657 }
6658
6659 // Handle the degenerate case where two cases have the same value.
6660 if (CaseValues.size() == 2) {
6661 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6662 "switch.selectcmp.case1");
6663 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6664 "switch.selectcmp.case2");
6665 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6666 Value *Ret =
6667 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6668 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6669 assert(BranchWeights.size() >= 2);
6671 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6672 /*IsExpected=*/false, /*ElideAllZero=*/true);
6673 }
6674 return Ret;
6675 }
6676 }
6677
6678 return nullptr;
6679}
6680
6681// Helper function to cleanup a switch instruction that has been converted into
6682// a select, fixing up PHI nodes and basic blocks.
6684 Value *SelectValue,
6685 IRBuilder<> &Builder,
6686 DomTreeUpdater *DTU) {
6687 std::vector<DominatorTree::UpdateType> Updates;
6688
6689 BasicBlock *SelectBB = SI->getParent();
6690 BasicBlock *DestBB = PHI->getParent();
6691
6692 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6693 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6694 Builder.CreateBr(DestBB);
6695
6696 // Remove the switch.
6697
6698 PHI->removeIncomingValueIf(
6699 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6700 PHI->addIncoming(SelectValue, SelectBB);
6701
6702 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6703 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6704 BasicBlock *Succ = SI->getSuccessor(i);
6705
6706 if (Succ == DestBB)
6707 continue;
6708 Succ->removePredecessor(SelectBB);
6709 if (DTU && RemovedSuccessors.insert(Succ).second)
6710 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6711 }
6712 SI->eraseFromParent();
6713 if (DTU)
6714 DTU->applyUpdates(Updates);
6715}
6716
6717/// If a switch is only used to initialize one or more phi nodes in a common
6718/// successor block with only two different constant values, try to replace the
6719/// switch with a select. Returns true if the fold was made.
6721 DomTreeUpdater *DTU, const DataLayout &DL,
6722 const TargetTransformInfo &TTI) {
6723 Value *const Cond = SI->getCondition();
6724 PHINode *PHI = nullptr;
6725 BasicBlock *CommonDest = nullptr;
6726 Constant *DefaultResult;
6727 SwitchCaseResultVectorTy UniqueResults;
6728 // Collect all the cases that will deliver the same value from the switch.
6729 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6730 DL, TTI, /*MaxUniqueResults*/ 2))
6731 return false;
6732
6733 assert(PHI != nullptr && "PHI for value select not found");
6734 Builder.SetInsertPoint(SI);
6735 SmallVector<uint32_t, 4> BranchWeights;
6737 [[maybe_unused]] auto HasWeights =
6739 assert(!HasWeights == (BranchWeights.empty()));
6740 }
6741 assert(BranchWeights.empty() ||
6742 (BranchWeights.size() >=
6743 UniqueResults.size() + (DefaultResult != nullptr)));
6744
6745 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6746 Builder, DL, BranchWeights);
6747 if (!SelectValue)
6748 return false;
6749
6750 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6751 return true;
6752}
6753
6754namespace {
6755
6756/// This class finds alternatives for switches to ultimately
6757/// replace the switch.
6758class SwitchReplacement {
6759public:
6760 /// Create a helper for optimizations to use as a switch replacement.
6761 /// Find a better representation for the content of Values,
6762 /// using DefaultValue to fill any holes in the table.
6763 SwitchReplacement(
6764 Module &M, uint64_t TableSize, ConstantInt *Offset,
6765 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6766 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6767
6768 /// Build instructions with Builder to retrieve values using Index
6769 /// and replace the switch.
6770 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6771 Function *Func);
6772
6773 /// Return true if a table with TableSize elements of
6774 /// type ElementType would fit in a target-legal register.
6775 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6776 Type *ElementType);
6777
6778 /// Return the default value of the switch.
6779 Constant *getDefaultValue();
6780
6781 /// Return true if the replacement is a lookup table.
6782 bool isLookupTable();
6783
6784 /// Return true if the replacement is a bit map.
6785 bool isBitMap();
6786
6787private:
6788 // Depending on the switch, there are different alternatives.
6789 enum {
6790 // For switches where each case contains the same value, we just have to
6791 // store that single value and return it for each lookup.
6792 SingleValueKind,
6793
6794 // For switches where there is a linear relationship between table index
6795 // and values. We calculate the result with a simple multiplication
6796 // and addition instead of a table lookup.
6797 LinearMapKind,
6798
6799 // For small tables with integer elements, we can pack them into a bitmap
6800 // that fits into a target-legal register. Values are retrieved by
6801 // shift and mask operations.
6802 BitMapKind,
6803
6804 // The table is stored as an array of values. Values are retrieved by load
6805 // instructions from the table.
6806 LookupTableKind
6807 } Kind;
6808
6809 // The default value of the switch.
6810 Constant *DefaultValue;
6811
6812 // The type of the output values.
6813 Type *ValueType;
6814
6815 // For SingleValueKind, this is the single value.
6816 Constant *SingleValue = nullptr;
6817
6818 // For BitMapKind, this is the bitmap.
6819 ConstantInt *BitMap = nullptr;
6820 IntegerType *BitMapElementTy = nullptr;
6821
6822 // For LinearMapKind, these are the constants used to derive the value.
6823 ConstantInt *LinearOffset = nullptr;
6824 ConstantInt *LinearMultiplier = nullptr;
6825 bool LinearMapValWrapped = false;
6826
6827 // For LookupTableKind, this is the table.
6828 Constant *Initializer = nullptr;
6829};
6830
6831} // end anonymous namespace
6832
6833SwitchReplacement::SwitchReplacement(
6834 Module &M, uint64_t TableSize, ConstantInt *Offset,
6835 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6836 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6837 : DefaultValue(DefaultValue) {
6838 assert(Values.size() && "Can't build lookup table without values!");
6839 assert(TableSize >= Values.size() && "Can't fit values in table!");
6840
6841 // If all values in the table are equal, this is that value.
6842 SingleValue = Values.begin()->second;
6843
6844 ValueType = Values.begin()->second->getType();
6845
6846 // Build up the table contents.
6847 SmallVector<Constant *, 64> TableContents(TableSize);
6848 for (const auto &[CaseVal, CaseRes] : Values) {
6849 assert(CaseRes->getType() == ValueType);
6850
6851 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6852 TableContents[Idx] = CaseRes;
6853
6854 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6855 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6856 }
6857
6858 // Fill in any holes in the table with the default result.
6859 if (Values.size() < TableSize) {
6860 assert(DefaultValue &&
6861 "Need a default value to fill the lookup table holes.");
6862 assert(DefaultValue->getType() == ValueType);
6863 for (uint64_t I = 0; I < TableSize; ++I) {
6864 if (!TableContents[I])
6865 TableContents[I] = DefaultValue;
6866 }
6867
6868 // If the default value is poison, all the holes are poison.
6869 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6870
6871 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6872 SingleValue = nullptr;
6873 }
6874
6875 // If each element in the table contains the same value, we only need to store
6876 // that single value.
6877 if (SingleValue) {
6878 Kind = SingleValueKind;
6879 return;
6880 }
6881
6882 // Check if we can derive the value with a linear transformation from the
6883 // table index.
6885 bool LinearMappingPossible = true;
6886 APInt PrevVal;
6887 APInt DistToPrev;
6888 // When linear map is monotonic and signed overflow doesn't happen on
6889 // maximum index, we can attach nsw on Add and Mul.
6890 bool NonMonotonic = false;
6891 assert(TableSize >= 2 && "Should be a SingleValue table.");
6892 // Check if there is the same distance between two consecutive values.
6893 for (uint64_t I = 0; I < TableSize; ++I) {
6894 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6895
6896 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6897 // This is an poison, so it's (probably) a lookup table hole.
6898 // To prevent any regressions from before we switched to using poison as
6899 // the default value, holes will fall back to using the first value.
6900 // This can be removed once we add proper handling for poisons in lookup
6901 // tables.
6902 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6903 }
6904
6905 if (!ConstVal) {
6906 // This is an undef. We could deal with it, but undefs in lookup tables
6907 // are very seldom. It's probably not worth the additional complexity.
6908 LinearMappingPossible = false;
6909 break;
6910 }
6911 const APInt &Val = ConstVal->getValue();
6912 if (I != 0) {
6913 APInt Dist = Val - PrevVal;
6914 if (I == 1) {
6915 DistToPrev = Dist;
6916 } else if (Dist != DistToPrev) {
6917 LinearMappingPossible = false;
6918 break;
6919 }
6920 NonMonotonic |=
6921 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6922 }
6923 PrevVal = Val;
6924 }
6925 if (LinearMappingPossible) {
6926 LinearOffset = cast<ConstantInt>(TableContents[0]);
6927 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6928 APInt M = LinearMultiplier->getValue();
6929 bool MayWrap = true;
6930 if (isIntN(M.getBitWidth(), TableSize - 1))
6931 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6932 LinearMapValWrapped = NonMonotonic || MayWrap;
6933 Kind = LinearMapKind;
6934 return;
6935 }
6936 }
6937
6938 // If the type is integer and the table fits in a register, build a bitmap.
6939 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6941 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6942 for (uint64_t I = TableSize; I > 0; --I) {
6943 TableInt <<= IT->getBitWidth();
6944 // Insert values into the bitmap. Undef values are set to zero.
6945 if (!isa<UndefValue>(TableContents[I - 1])) {
6946 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6947 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6948 }
6949 }
6950 BitMap = ConstantInt::get(M.getContext(), TableInt);
6951 BitMapElementTy = IT;
6952 Kind = BitMapKind;
6953 return;
6954 }
6955
6956 // Store the table in an array.
6957 auto *TableTy = ArrayType::get(ValueType, TableSize);
6958 Initializer = ConstantArray::get(TableTy, TableContents);
6959
6960 Kind = LookupTableKind;
6961}
6962
6963Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6964 const DataLayout &DL, Function *Func) {
6965 switch (Kind) {
6966 case SingleValueKind:
6967 return SingleValue;
6968 case LinearMapKind: {
6969 ++NumLinearMaps;
6970 // Derive the result value from the input value.
6971 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6972 false, "switch.idx.cast");
6973 if (!LinearMultiplier->isOne())
6974 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6975 /*HasNUW = */ false,
6976 /*HasNSW = */ !LinearMapValWrapped);
6977
6978 if (!LinearOffset->isZero())
6979 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6980 /*HasNUW = */ false,
6981 /*HasNSW = */ !LinearMapValWrapped);
6982 return Result;
6983 }
6984 case BitMapKind: {
6985 ++NumBitMaps;
6986 // Type of the bitmap (e.g. i59).
6987 IntegerType *MapTy = BitMap->getIntegerType();
6988
6989 // Cast Index to the same type as the bitmap.
6990 // Note: The Index is <= the number of elements in the table, so
6991 // truncating it to the width of the bitmask is safe.
6992 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6993
6994 // Multiply the shift amount by the element width. NUW/NSW can always be
6995 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6996 // BitMap's bit width.
6997 ShiftAmt = Builder.CreateMul(
6998 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6999 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7000
7001 // Shift down.
7002 Value *DownShifted =
7003 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7004 // Mask off.
7005 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7006 }
7007 case LookupTableKind: {
7008 ++NumLookupTables;
7009 auto *Table =
7010 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7011 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7012 Initializer, "switch.table." + Func->getName());
7013 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7014 // Set the alignment to that of an array items. We will be only loading one
7015 // value out of it.
7016 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7017 Type *IndexTy = DL.getIndexType(Table->getType());
7018 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7019
7020 if (Index->getType() != IndexTy) {
7021 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7022 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7023 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7024 Zext->setNonNeg(
7025 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7026 }
7027
7028 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7029 Value *GEP =
7030 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7031 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7032 }
7033 }
7034 llvm_unreachable("Unknown helper kind!");
7035}
7036
7037bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7038 uint64_t TableSize,
7039 Type *ElementType) {
7040 auto *IT = dyn_cast<IntegerType>(ElementType);
7041 if (!IT)
7042 return false;
7043 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7044 // are <= 15, we could try to narrow the type.
7045
7046 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7047 if (TableSize >= UINT_MAX / IT->getBitWidth())
7048 return false;
7049 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7050}
7051
7053 const DataLayout &DL) {
7054 // Allow any legal type.
7055 if (TTI.isTypeLegal(Ty))
7056 return true;
7057
7058 auto *IT = dyn_cast<IntegerType>(Ty);
7059 if (!IT)
7060 return false;
7061
7062 // Also allow power of 2 integer types that have at least 8 bits and fit in
7063 // a register. These types are common in frontend languages and targets
7064 // usually support loads of these types.
7065 // TODO: We could relax this to any integer that fits in a register and rely
7066 // on ABI alignment and padding in the table to allow the load to be widened.
7067 // Or we could widen the constants and truncate the load.
7068 unsigned BitWidth = IT->getBitWidth();
7069 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7070 DL.fitsInLegalInteger(IT->getBitWidth());
7071}
7072
7073Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7074
7075bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7076
7077bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7078
7079static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7080 // 40% is the default density for building a jump table in optsize/minsize
7081 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7082 // function was based on.
7083 const uint64_t MinDensity = 40;
7084
7085 if (CaseRange >= UINT64_MAX / 100)
7086 return false; // Avoid multiplication overflows below.
7087
7088 return NumCases * 100 >= CaseRange * MinDensity;
7089}
7090
7092 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7093 uint64_t Range = Diff + 1;
7094 if (Range < Diff)
7095 return false; // Overflow.
7096
7097 return isSwitchDense(Values.size(), Range);
7098}
7099
7100/// Determine whether a lookup table should be built for this switch, based on
7101/// the number of cases, size of the table, and the types of the results.
7102// TODO: We could support larger than legal types by limiting based on the
7103// number of loads required and/or table size. If the constants are small we
7104// could use smaller table entries and extend after the load.
7106 const TargetTransformInfo &TTI,
7107 const DataLayout &DL,
7108 const SmallVector<Type *> &ResultTypes) {
7109 if (SI->getNumCases() > TableSize)
7110 return false; // TableSize overflowed.
7111
7112 bool AllTablesFitInRegister = true;
7113 bool HasIllegalType = false;
7114 for (const auto &Ty : ResultTypes) {
7115 // Saturate this flag to true.
7116 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7117
7118 // Saturate this flag to false.
7119 AllTablesFitInRegister =
7120 AllTablesFitInRegister &&
7121 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7122
7123 // If both flags saturate, we're done. NOTE: This *only* works with
7124 // saturating flags, and all flags have to saturate first due to the
7125 // non-deterministic behavior of iterating over a dense map.
7126 if (HasIllegalType && !AllTablesFitInRegister)
7127 break;
7128 }
7129
7130 // If each table would fit in a register, we should build it anyway.
7131 if (AllTablesFitInRegister)
7132 return true;
7133
7134 // Don't build a table that doesn't fit in-register if it has illegal types.
7135 if (HasIllegalType)
7136 return false;
7137
7138 return isSwitchDense(SI->getNumCases(), TableSize);
7139}
7140
7142 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7143 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7144 const DataLayout &DL, const TargetTransformInfo &TTI) {
7145 if (MinCaseVal.isNullValue())
7146 return true;
7147 if (MinCaseVal.isNegative() ||
7148 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7149 !HasDefaultResults)
7150 return false;
7151 return all_of(ResultTypes, [&](const auto &ResultType) {
7152 return SwitchReplacement::wouldFitInRegister(
7153 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7154 });
7155}
7156
7157/// Try to reuse the switch table index compare. Following pattern:
7158/// \code
7159/// if (idx < tablesize)
7160/// r = table[idx]; // table does not contain default_value
7161/// else
7162/// r = default_value;
7163/// if (r != default_value)
7164/// ...
7165/// \endcode
7166/// Is optimized to:
7167/// \code
7168/// cond = idx < tablesize;
7169/// if (cond)
7170/// r = table[idx];
7171/// else
7172/// r = default_value;
7173/// if (cond)
7174/// ...
7175/// \endcode
7176/// Jump threading will then eliminate the second if(cond).
7178 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7179 Constant *DefaultValue,
7180 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7182 if (!CmpInst)
7183 return;
7184
7185 // We require that the compare is in the same block as the phi so that jump
7186 // threading can do its work afterwards.
7187 if (CmpInst->getParent() != PhiBlock)
7188 return;
7189
7191 if (!CmpOp1)
7192 return;
7193
7194 Value *RangeCmp = RangeCheckBranch->getCondition();
7195 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7196 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7197
7198 // Check if the compare with the default value is constant true or false.
7199 const DataLayout &DL = PhiBlock->getDataLayout();
7201 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7202 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7203 return;
7204
7205 // Check if the compare with the case values is distinct from the default
7206 // compare result.
7207 for (auto ValuePair : Values) {
7209 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7210 if (!CaseConst || CaseConst == DefaultConst ||
7211 (CaseConst != TrueConst && CaseConst != FalseConst))
7212 return;
7213 }
7214
7215 // Check if the branch instruction dominates the phi node. It's a simple
7216 // dominance check, but sufficient for our needs.
7217 // Although this check is invariant in the calling loops, it's better to do it
7218 // at this late stage. Practically we do it at most once for a switch.
7219 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7220 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7221 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7222 return;
7223 }
7224
7225 if (DefaultConst == FalseConst) {
7226 // The compare yields the same result. We can replace it.
7227 CmpInst->replaceAllUsesWith(RangeCmp);
7228 ++NumTableCmpReuses;
7229 } else {
7230 // The compare yields the same result, just inverted. We can replace it.
7231 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7232 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7233 RangeCheckBranch->getIterator());
7234 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7235 ++NumTableCmpReuses;
7236 }
7237}
7238
7239/// If the switch is only used to initialize one or more phi nodes in a common
7240/// successor block with different constant values, replace the switch with
7241/// lookup tables.
7243 DomTreeUpdater *DTU, const DataLayout &DL,
7244 const TargetTransformInfo &TTI,
7245 bool ConvertSwitchToLookupTable) {
7246 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7247
7248 BasicBlock *BB = SI->getParent();
7249 Function *Fn = BB->getParent();
7250
7251 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7252 // split off a dense part and build a lookup table for that.
7253
7254 // FIXME: This creates arrays of GEPs to constant strings, which means each
7255 // GEP needs a runtime relocation in PIC code. We should just build one big
7256 // string and lookup indices into that.
7257
7258 // Ignore switches with less than three cases. Lookup tables will not make
7259 // them faster, so we don't analyze them.
7260 if (SI->getNumCases() < 3)
7261 return false;
7262
7263 // Figure out the corresponding result for each case value and phi node in the
7264 // common destination, as well as the min and max case values.
7265 assert(!SI->cases().empty());
7266 SwitchInst::CaseIt CI = SI->case_begin();
7267 ConstantInt *MinCaseVal = CI->getCaseValue();
7268 ConstantInt *MaxCaseVal = CI->getCaseValue();
7269
7270 BasicBlock *CommonDest = nullptr;
7271
7272 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7274
7276 SmallVector<Type *> ResultTypes;
7278
7279 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7280 ConstantInt *CaseVal = CI->getCaseValue();
7281 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7282 MinCaseVal = CaseVal;
7283 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7284 MaxCaseVal = CaseVal;
7285
7286 // Resulting value at phi nodes for this case value.
7288 ResultsTy Results;
7289 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7290 Results, DL, TTI))
7291 return false;
7292
7293 // Append the result and result types from this case to the list for each
7294 // phi.
7295 for (const auto &I : Results) {
7296 PHINode *PHI = I.first;
7297 Constant *Value = I.second;
7298 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7299 if (Inserted)
7300 PHIs.push_back(PHI);
7301 It->second.push_back(std::make_pair(CaseVal, Value));
7302 ResultTypes.push_back(PHI->getType());
7303 }
7304 }
7305
7306 // If the table has holes, we need a constant result for the default case
7307 // or a bitmask that fits in a register.
7308 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7309 bool HasDefaultResults =
7310 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7311 DefaultResultsList, DL, TTI);
7312 for (const auto &I : DefaultResultsList) {
7313 PHINode *PHI = I.first;
7314 Constant *Result = I.second;
7315 DefaultResults[PHI] = Result;
7316 }
7317
7318 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7319 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7320 uint64_t TableSize;
7321 ConstantInt *TableIndexOffset;
7322 if (UseSwitchConditionAsTableIndex) {
7323 TableSize = MaxCaseVal->getLimitedValue() + 1;
7324 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7325 } else {
7326 TableSize =
7327 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7328
7329 TableIndexOffset = MinCaseVal;
7330 }
7331
7332 // If the default destination is unreachable, or if the lookup table covers
7333 // all values of the conditional variable, branch directly to the lookup table
7334 // BB. Otherwise, check that the condition is within the case range.
7335 uint64_t NumResults = ResultLists[PHIs[0]].size();
7336 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7337
7338 bool TableHasHoles = (NumResults < TableSize);
7339
7340 // If the table has holes but the default destination doesn't produce any
7341 // constant results, the lookup table entries corresponding to the holes will
7342 // contain poison.
7343 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7344
7345 // If the default destination doesn't produce a constant result but is still
7346 // reachable, and the lookup table has holes, we need to use a mask to
7347 // determine if the current index should load from the lookup table or jump
7348 // to the default case.
7349 // The mask is unnecessary if the table has holes but the default destination
7350 // is unreachable, as in that case the holes must also be unreachable.
7351 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7352 if (NeedMask) {
7353 // As an extra penalty for the validity test we require more cases.
7354 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7355 return false;
7356 if (!DL.fitsInLegalInteger(TableSize))
7357 return false;
7358 }
7359
7360 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7361 return false;
7362
7363 // Compute the table index value.
7364 Value *TableIndex;
7365 if (UseSwitchConditionAsTableIndex) {
7366 TableIndex = SI->getCondition();
7367 if (HasDefaultResults) {
7368 // Grow the table to cover all possible index values to avoid the range
7369 // check. It will use the default result to fill in the table hole later,
7370 // so make sure it exist.
7371 ConstantRange CR =
7372 computeConstantRange(TableIndex, /* ForSigned */ false);
7373 // Grow the table shouldn't have any size impact by checking
7374 // wouldFitInRegister.
7375 // TODO: Consider growing the table also when it doesn't fit in a register
7376 // if no optsize is specified.
7377 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7378 if (!CR.isUpperWrapped() &&
7379 all_of(ResultTypes, [&](const auto &ResultType) {
7380 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7381 ResultType);
7382 })) {
7383 // There may be some case index larger than the UpperBound (unreachable
7384 // case), so make sure the table size does not get smaller.
7385 TableSize = std::max(UpperBound, TableSize);
7386 // The default branch is unreachable after we enlarge the lookup table.
7387 // Adjust DefaultIsReachable to reuse code path.
7388 DefaultIsReachable = false;
7389 }
7390 }
7391 }
7392
7393 // Keep track of the switch replacement for each phi
7395 for (PHINode *PHI : PHIs) {
7396 const auto &ResultList = ResultLists[PHI];
7397
7398 Type *ResultType = ResultList.begin()->second->getType();
7399 // Use any value to fill the lookup table holes.
7401 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7402 StringRef FuncName = Fn->getName();
7403 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7404 ResultList, DefaultVal, DL, FuncName);
7405 PhiToReplacementMap.insert({PHI, Replacement});
7406 }
7407
7408 bool AnyLookupTables = any_of(
7409 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7410 bool AnyBitMaps = any_of(PhiToReplacementMap,
7411 [](auto &KV) { return KV.second.isBitMap(); });
7412
7413 // A few conditions prevent the generation of lookup tables:
7414 // 1. The target does not support lookup tables.
7415 // 2. The "no-jump-tables" function attribute is set.
7416 // However, these objections do not apply to other switch replacements, like
7417 // the bitmap, so we only stop here if any of these conditions are met and we
7418 // want to create a LUT. Otherwise, continue with the switch replacement.
7419 if (AnyLookupTables &&
7420 (!TTI.shouldBuildLookupTables() ||
7421 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7422 return false;
7423
7424 // In the early optimization pipeline, disable formation of lookup tables,
7425 // bit maps and mask checks, as they may inhibit further optimization.
7426 if (!ConvertSwitchToLookupTable &&
7427 (AnyLookupTables || AnyBitMaps || NeedMask))
7428 return false;
7429
7430 Builder.SetInsertPoint(SI);
7431 // TableIndex is the switch condition - TableIndexOffset if we don't
7432 // use the condition directly
7433 if (!UseSwitchConditionAsTableIndex) {
7434 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7435 // we can try to attach nsw.
7436 bool MayWrap = true;
7437 if (!DefaultIsReachable) {
7438 APInt Res =
7439 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7440 (void)Res;
7441 }
7442 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7443 "switch.tableidx", /*HasNUW =*/false,
7444 /*HasNSW =*/!MayWrap);
7445 }
7446
7447 std::vector<DominatorTree::UpdateType> Updates;
7448
7449 // Compute the maximum table size representable by the integer type we are
7450 // switching upon.
7451 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7452 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7453 assert(MaxTableSize >= TableSize &&
7454 "It is impossible for a switch to have more entries than the max "
7455 "representable value of its input integer type's size.");
7456
7457 // Create the BB that does the lookups.
7458 Module &Mod = *CommonDest->getParent()->getParent();
7459 BasicBlock *LookupBB = BasicBlock::Create(
7460 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7461
7462 BranchInst *RangeCheckBranch = nullptr;
7463 BranchInst *CondBranch = nullptr;
7464
7465 Builder.SetInsertPoint(SI);
7466 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7467 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7468 Builder.CreateBr(LookupBB);
7469 if (DTU)
7470 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7471 // Note: We call removeProdecessor later since we need to be able to get the
7472 // PHI value for the default case in case we're using a bit mask.
7473 } else {
7474 Value *Cmp = Builder.CreateICmpULT(
7475 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7476 RangeCheckBranch =
7477 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7478 CondBranch = RangeCheckBranch;
7479 if (DTU)
7480 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7481 }
7482
7483 // Populate the BB that does the lookups.
7484 Builder.SetInsertPoint(LookupBB);
7485
7486 if (NeedMask) {
7487 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7488 // re-purposed to do the hole check, and we create a new LookupBB.
7489 BasicBlock *MaskBB = LookupBB;
7490 MaskBB->setName("switch.hole_check");
7491 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7492 CommonDest->getParent(), CommonDest);
7493
7494 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7495 // unnecessary illegal types.
7496 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7497 APInt MaskInt(TableSizePowOf2, 0);
7498 APInt One(TableSizePowOf2, 1);
7499 // Build bitmask; fill in a 1 bit for every case.
7500 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7501 for (const auto &Result : ResultList) {
7502 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7503 .getLimitedValue();
7504 MaskInt |= One << Idx;
7505 }
7506 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7507
7508 // Get the TableIndex'th bit of the bitmask.
7509 // If this bit is 0 (meaning hole) jump to the default destination,
7510 // else continue with table lookup.
7511 IntegerType *MapTy = TableMask->getIntegerType();
7512 Value *MaskIndex =
7513 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7514 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7515 Value *LoBit = Builder.CreateTrunc(
7516 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7517 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7518 if (DTU) {
7519 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7520 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7521 }
7522 Builder.SetInsertPoint(LookupBB);
7523 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7524 }
7525
7526 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7527 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7528 // do not delete PHINodes here.
7529 SI->getDefaultDest()->removePredecessor(BB,
7530 /*KeepOneInputPHIs=*/true);
7531 if (DTU)
7532 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7533 }
7534
7535 for (PHINode *PHI : PHIs) {
7536 const ResultListTy &ResultList = ResultLists[PHI];
7537 auto Replacement = PhiToReplacementMap.at(PHI);
7538 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7539 // Do a small peephole optimization: re-use the switch table compare if
7540 // possible.
7541 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7542 BasicBlock *PhiBlock = PHI->getParent();
7543 // Search for compare instructions which use the phi.
7544 for (auto *User : PHI->users()) {
7545 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7546 Replacement.getDefaultValue(), ResultList);
7547 }
7548 }
7549
7550 PHI->addIncoming(Result, LookupBB);
7551 }
7552
7553 Builder.CreateBr(CommonDest);
7554 if (DTU)
7555 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7556
7557 SmallVector<uint32_t> BranchWeights;
7558 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7559 extractBranchWeights(*SI, BranchWeights);
7560 uint64_t ToLookupWeight = 0;
7561 uint64_t ToDefaultWeight = 0;
7562
7563 // Remove the switch.
7564 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7565 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7566 BasicBlock *Succ = SI->getSuccessor(I);
7567
7568 if (Succ == SI->getDefaultDest()) {
7569 if (HasBranchWeights)
7570 ToDefaultWeight += BranchWeights[I];
7571 continue;
7572 }
7573 Succ->removePredecessor(BB);
7574 if (DTU && RemovedSuccessors.insert(Succ).second)
7575 Updates.push_back({DominatorTree::Delete, BB, Succ});
7576 if (HasBranchWeights)
7577 ToLookupWeight += BranchWeights[I];
7578 }
7579 SI->eraseFromParent();
7580 if (HasBranchWeights)
7581 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7582 /*IsExpected=*/false);
7583 if (DTU)
7584 DTU->applyUpdates(Updates);
7585
7586 if (NeedMask)
7587 ++NumLookupTablesHoles;
7588 return true;
7589}
7590
7591/// Try to transform a switch that has "holes" in it to a contiguous sequence
7592/// of cases.
7593///
7594/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7595/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7596///
7597/// This converts a sparse switch into a dense switch which allows better
7598/// lowering and could also allow transforming into a lookup table.
7600 const DataLayout &DL,
7601 const TargetTransformInfo &TTI) {
7602 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7603 if (CondTy->getIntegerBitWidth() > 64 ||
7604 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7605 return false;
7606 // Only bother with this optimization if there are more than 3 switch cases;
7607 // SDAG will only bother creating jump tables for 4 or more cases.
7608 if (SI->getNumCases() < 4)
7609 return false;
7610
7611 // This transform is agnostic to the signedness of the input or case values. We
7612 // can treat the case values as signed or unsigned. We can optimize more common
7613 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7614 // as signed.
7616 for (const auto &C : SI->cases())
7617 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7618 llvm::sort(Values);
7619
7620 // If the switch is already dense, there's nothing useful to do here.
7621 if (isSwitchDense(Values))
7622 return false;
7623
7624 // First, transform the values such that they start at zero and ascend.
7625 int64_t Base = Values[0];
7626 for (auto &V : Values)
7627 V -= (uint64_t)(Base);
7628
7629 // Now we have signed numbers that have been shifted so that, given enough
7630 // precision, there are no negative values. Since the rest of the transform
7631 // is bitwise only, we switch now to an unsigned representation.
7632
7633 // This transform can be done speculatively because it is so cheap - it
7634 // results in a single rotate operation being inserted.
7635
7636 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7637 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7638 // less than 64.
7639 unsigned Shift = 64;
7640 for (auto &V : Values)
7641 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7642 assert(Shift < 64);
7643 if (Shift > 0)
7644 for (auto &V : Values)
7645 V = (int64_t)((uint64_t)V >> Shift);
7646
7647 if (!isSwitchDense(Values))
7648 // Transform didn't create a dense switch.
7649 return false;
7650
7651 // The obvious transform is to shift the switch condition right and emit a
7652 // check that the condition actually cleanly divided by GCD, i.e.
7653 // C & (1 << Shift - 1) == 0
7654 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7655 //
7656 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7657 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7658 // are nonzero then the switch condition will be very large and will hit the
7659 // default case.
7660
7661 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7662 Builder.SetInsertPoint(SI);
7663 Value *Sub =
7664 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7665 Value *Rot = Builder.CreateIntrinsic(
7666 Ty, Intrinsic::fshl,
7667 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7668 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7669
7670 for (auto Case : SI->cases()) {
7671 auto *Orig = Case.getCaseValue();
7672 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7673 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7674 }
7675 return true;
7676}
7677
7678/// Tries to transform the switch when the condition is umin with a constant.
7679/// In that case, the default branch can be replaced by the constant's branch.
7680/// This method also removes dead cases when the simplification cannot replace
7681/// the default branch.
7682///
7683/// For example:
7684/// switch(umin(a, 3)) {
7685/// case 0:
7686/// case 1:
7687/// case 2:
7688/// case 3:
7689/// case 4:
7690/// // ...
7691/// default:
7692/// unreachable
7693/// }
7694///
7695/// Transforms into:
7696///
7697/// switch(a) {
7698/// case 0:
7699/// case 1:
7700/// case 2:
7701/// default:
7702/// // This is case 3
7703/// }
7705 Value *A;
7707
7708 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7709 return false;
7710
7713 BasicBlock *BB = SIW->getParent();
7714
7715 // Dead cases are removed even when the simplification fails.
7716 // A case is dead when its value is higher than the Constant.
7717 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7718 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7719 ++I;
7720 continue;
7721 }
7722 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7723 DeadCaseBB->removePredecessor(BB);
7724 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7725 I = SIW->removeCase(I);
7726 E = SIW->case_end();
7727 }
7728
7729 auto Case = SI->findCaseValue(Constant);
7730 // If the case value is not found, `findCaseValue` returns the default case.
7731 // In this scenario, since there is no explicit `case 3:`, the simplification
7732 // fails. The simplification also fails when the switch’s default destination
7733 // is reachable.
7734 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7735 if (DTU)
7736 DTU->applyUpdates(Updates);
7737 return !Updates.empty();
7738 }
7739
7740 BasicBlock *Unreachable = SI->getDefaultDest();
7741 SIW.replaceDefaultDest(Case);
7742 SIW.removeCase(Case);
7743 SIW->setCondition(A);
7744
7745 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7746
7747 if (DTU)
7748 DTU->applyUpdates(Updates);
7749
7750 return true;
7751}
7752
7753/// Tries to transform switch of powers of two to reduce switch range.
7754/// For example, switch like:
7755/// switch (C) { case 1: case 2: case 64: case 128: }
7756/// will be transformed to:
7757/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7758///
7759/// This transformation allows better lowering and may transform the switch
7760/// instruction into a sequence of bit manipulation and a smaller
7761/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7762/// address of the jump target, and indirectly jump to it).
7764 DomTreeUpdater *DTU,
7765 const DataLayout &DL,
7766 const TargetTransformInfo &TTI) {
7767 Value *Condition = SI->getCondition();
7768 LLVMContext &Context = SI->getContext();
7769 auto *CondTy = cast<IntegerType>(Condition->getType());
7770
7771 if (CondTy->getIntegerBitWidth() > 64 ||
7772 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7773 return false;
7774
7775 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7776 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7777 {Condition, ConstantInt::getTrue(Context)});
7778 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7779 TTI::TCC_Basic * 2)
7780 return false;
7781
7782 // Only bother with this optimization if there are more than 3 switch cases.
7783 // SDAG will start emitting jump tables for 4 or more cases.
7784 if (SI->getNumCases() < 4)
7785 return false;
7786
7787 // Check that switch cases are powers of two.
7789 for (const auto &Case : SI->cases()) {
7790 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7791 if (llvm::has_single_bit(CaseValue))
7792 Values.push_back(CaseValue);
7793 else
7794 return false;
7795 }
7796
7797 // isSwichDense requires case values to be sorted.
7798 llvm::sort(Values);
7799 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7800 llvm::countr_zero(Values.front()) + 1))
7801 // Transform is unable to generate dense switch.
7802 return false;
7803
7804 Builder.SetInsertPoint(SI);
7805
7806 if (!SI->defaultDestUnreachable()) {
7807 // Let non-power-of-two inputs jump to the default case, when the latter is
7808 // reachable.
7809 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7810 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7811
7812 auto *OrigBB = SI->getParent();
7813 auto *DefaultCaseBB = SI->getDefaultDest();
7814 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7815 auto It = OrigBB->getTerminator()->getIterator();
7816 SmallVector<uint32_t> Weights;
7817 auto HasWeights =
7819 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7820 if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7821 // IsPow2 covers a subset of the cases in which we'd go to the default
7822 // label. The other is those powers of 2 that don't appear in the case
7823 // statement. We don't know the distribution of the values coming in, so
7824 // the safest is to split 50-50 the original probability to `default`.
7825 uint64_t OrigDenominator =
7827 SmallVector<uint64_t> NewWeights(2);
7828 NewWeights[1] = Weights[0] / 2;
7829 NewWeights[0] = OrigDenominator - NewWeights[1];
7830 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7831 // The probability of executing the default block stays constant. It was
7832 // p_d = Weights[0] / OrigDenominator
7833 // we rewrite as W/D
7834 // We want to find the probability of the default branch of the switch
7835 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7836 // i.e. the original probability is the probability we go to the default
7837 // branch from the BI branch, or we take the default branch on the SI.
7838 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7839 // This matches using W/2 for the default branch probability numerator and
7840 // D-W/2 as the denominator.
7841 Weights[0] = NewWeights[1];
7842 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7843 for (auto &W : drop_begin(Weights))
7844 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7845
7846 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7847 }
7848 // BI is handling the default case for SI, and so should share its DebugLoc.
7849 BI->setDebugLoc(SI->getDebugLoc());
7850 It->eraseFromParent();
7851
7852 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7853 if (DTU)
7854 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7855 }
7856
7857 // Replace each case with its trailing zeros number.
7858 for (auto &Case : SI->cases()) {
7859 auto *OrigValue = Case.getCaseValue();
7860 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7861 OrigValue->getValue().countr_zero()));
7862 }
7863
7864 // Replace condition with its trailing zeros number.
7865 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7866 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7867
7868 SI->setCondition(ConditionTrailingZeros);
7869
7870 return true;
7871}
7872
7873/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7874/// the same destination.
7876 DomTreeUpdater *DTU) {
7877 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7878 if (!Cmp || !Cmp->hasOneUse())
7879 return false;
7880
7882 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7883 if (!HasWeights)
7884 Weights.resize(4); // Avoid checking HasWeights everywhere.
7885
7886 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7887 int64_t Res;
7888 BasicBlock *Succ, *OtherSucc;
7889 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7890 BasicBlock *Unreachable = nullptr;
7891
7892 if (SI->getNumCases() == 2) {
7893 // Find which of 1, 0 or -1 is missing (handled by default dest).
7894 SmallSet<int64_t, 3> Missing;
7895 Missing.insert(1);
7896 Missing.insert(0);
7897 Missing.insert(-1);
7898
7899 Succ = SI->getDefaultDest();
7900 SuccWeight = Weights[0];
7901 OtherSucc = nullptr;
7902 for (auto &Case : SI->cases()) {
7903 std::optional<int64_t> Val =
7904 Case.getCaseValue()->getValue().trySExtValue();
7905 if (!Val)
7906 return false;
7907 if (!Missing.erase(*Val))
7908 return false;
7909 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7910 return false;
7911 OtherSucc = Case.getCaseSuccessor();
7912 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7913 }
7914
7915 assert(Missing.size() == 1 && "Should have one case left");
7916 Res = *Missing.begin();
7917 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7918 // Normalize so that Succ is taken once and OtherSucc twice.
7919 Unreachable = SI->getDefaultDest();
7920 Succ = OtherSucc = nullptr;
7921 for (auto &Case : SI->cases()) {
7922 BasicBlock *NewSucc = Case.getCaseSuccessor();
7923 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7924 if (!OtherSucc || OtherSucc == NewSucc) {
7925 OtherSucc = NewSucc;
7926 OtherSuccWeight += Weight;
7927 } else if (!Succ) {
7928 Succ = NewSucc;
7929 SuccWeight = Weight;
7930 } else if (Succ == NewSucc) {
7931 std::swap(Succ, OtherSucc);
7932 std::swap(SuccWeight, OtherSuccWeight);
7933 } else
7934 return false;
7935 }
7936 for (auto &Case : SI->cases()) {
7937 std::optional<int64_t> Val =
7938 Case.getCaseValue()->getValue().trySExtValue();
7939 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7940 return false;
7941 if (Case.getCaseSuccessor() == Succ) {
7942 Res = *Val;
7943 break;
7944 }
7945 }
7946 } else {
7947 return false;
7948 }
7949
7950 // Determine predicate for the missing case.
7952 switch (Res) {
7953 case 1:
7954 Pred = ICmpInst::ICMP_UGT;
7955 break;
7956 case 0:
7957 Pred = ICmpInst::ICMP_EQ;
7958 break;
7959 case -1:
7960 Pred = ICmpInst::ICMP_ULT;
7961 break;
7962 }
7963 if (Cmp->isSigned())
7964 Pred = ICmpInst::getSignedPredicate(Pred);
7965
7966 MDNode *NewWeights = nullptr;
7967 if (HasWeights)
7968 NewWeights = MDBuilder(SI->getContext())
7969 .createBranchWeights(SuccWeight, OtherSuccWeight);
7970
7971 BasicBlock *BB = SI->getParent();
7972 Builder.SetInsertPoint(SI->getIterator());
7973 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7974 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7975 SI->getMetadata(LLVMContext::MD_unpredictable));
7976 OtherSucc->removePredecessor(BB);
7977 if (Unreachable)
7978 Unreachable->removePredecessor(BB);
7979 SI->eraseFromParent();
7980 Cmp->eraseFromParent();
7981 if (DTU && Unreachable)
7982 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7983 return true;
7984}
7985
7986/// Checking whether two cases of SI are equal depends on the contents of the
7987/// BasicBlock and the incoming values of their successor PHINodes.
7988/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7989/// calling this function on each BasicBlock every time isEqual is called,
7990/// especially since the same BasicBlock may be passed as an argument multiple
7991/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7992/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7993/// of the incoming values.
7998
8001 return static_cast<SwitchSuccWrapper *>(
8003 }
8005 return static_cast<SwitchSuccWrapper *>(
8007 }
8008 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8009 BasicBlock *Succ = SSW->Dest;
8011 assert(BI->isUnconditional() &&
8012 "Only supporting unconditional branches for now");
8013 assert(BI->getNumSuccessors() == 1 &&
8014 "Expected unconditional branches to have one successor");
8015 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8016
8017 // Since we assume the BB is just a single BranchInst with a single
8018 // successor, we hash as the BB and the incoming Values of its successor
8019 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8020 // including the incoming PHI values leads to better performance.
8021 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8022 // time and passing it in SwitchSuccWrapper, but this slowed down the
8023 // average compile time without having any impact on the worst case compile
8024 // time.
8025 BasicBlock *BB = BI->getSuccessor(0);
8026 SmallVector<Value *> PhiValsForBB;
8027 for (PHINode &Phi : BB->phis())
8028 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
8029
8030 return hash_combine(BB, hash_combine_range(PhiValsForBB));
8031 }
8032 static bool isEqual(const SwitchSuccWrapper *LHS,
8033 const SwitchSuccWrapper *RHS) {
8036 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8037 return LHS == RHS;
8038
8039 BasicBlock *A = LHS->Dest;
8040 BasicBlock *B = RHS->Dest;
8041
8042 // FIXME: we checked that the size of A and B are both 1 in
8043 // simplifyDuplicateSwitchArms to make the Case list smaller to
8044 // improve performance. If we decide to support BasicBlocks with more
8045 // than just a single instruction, we need to check that A.size() ==
8046 // B.size() here, and we need to check more than just the BranchInsts
8047 // for equality.
8048
8049 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
8050 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
8051 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8052 "Only supporting unconditional branches for now");
8053 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
8054 return false;
8055
8056 // Need to check that PHIs in successor have matching values
8057 BasicBlock *Succ = ABI->getSuccessor(0);
8058 for (PHINode &Phi : Succ->phis()) {
8059 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8060 if (PredIVs[A] != PredIVs[B])
8061 return false;
8062 }
8063
8064 return true;
8065 }
8066};
8067
8068bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8069 DomTreeUpdater *DTU) {
8070 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8071 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8072 // an entire PHI at once after the loop, opposed to calling
8073 // getIncomingValueForBlock inside this loop, since each call to
8074 // getIncomingValueForBlock is O(|Preds|).
8080 Cases.reserve(SI->getNumSuccessors());
8081
8082 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8083 BasicBlock *BB = SI->getSuccessor(I);
8084
8085 // FIXME: Support more than just a single BranchInst. One way we could do
8086 // this is by taking a hashing approach of all insts in BB.
8087 if (BB->size() != 1)
8088 continue;
8089
8090 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8091 // on other kinds of terminators. We decide to only support unconditional
8092 // branches for now for compile time reasons.
8093 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
8094 if (!BI || BI->isConditional())
8095 continue;
8096
8097 if (!Seen.insert(BB).second) {
8098 auto It = BBToSuccessorIndexes.find(BB);
8099 if (It != BBToSuccessorIndexes.end())
8100 It->second.emplace_back(I);
8101 continue;
8102 }
8103
8104 // FIXME: This case needs some extra care because the terminators other than
8105 // SI need to be updated. For now, consider only backedges to the SI.
8106 if (BB->getUniquePredecessor() != SI->getParent())
8107 continue;
8108
8109 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8110 for (BasicBlock *Succ : BI->successors())
8112
8113 // Add the successor only if not previously visited.
8114 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
8115 BBToSuccessorIndexes[BB].emplace_back(I);
8116 }
8117
8118 // Precompute a data structure to improve performance of isEqual for
8119 // SwitchSuccWrapper.
8120 PhiPredIVs.reserve(Phis.size());
8121 for (PHINode *Phi : Phis) {
8122 auto &IVs =
8123 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8124 for (auto &IV : Phi->incoming_values())
8125 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8126 }
8127
8128 // Build a set such that if the SwitchSuccWrapper exists in the set and
8129 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8130 // which is not in the set should be replaced with the one in the set. If the
8131 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8132 // other SwitchSuccWrappers can check against it in the same manner. We use
8133 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8134 // around information to isEquality, getHashValue, and when doing the
8135 // replacement with better performance.
8136 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8137 ReplaceWith.reserve(Cases.size());
8138
8140 Updates.reserve(ReplaceWith.size());
8141 bool MadeChange = false;
8142 for (auto &SSW : Cases) {
8143 // SSW is a candidate for simplification. If we find a duplicate BB,
8144 // replace it.
8145 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
8146 if (!Inserted) {
8147 // We know that SI's parent BB no longer dominates the old case successor
8148 // since we are making it dead.
8149 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
8150 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
8151 for (unsigned Idx : Successors)
8152 SI->setSuccessor(Idx, (*It)->Dest);
8153 MadeChange = true;
8154 }
8155 }
8156
8157 if (DTU)
8158 DTU->applyUpdates(Updates);
8159
8160 return MadeChange;
8161}
8162
8163bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8164 BasicBlock *BB = SI->getParent();
8165
8166 if (isValueEqualityComparison(SI)) {
8167 // If we only have one predecessor, and if it is a branch on this value,
8168 // see if that predecessor totally determines the outcome of this switch.
8169 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8170 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8171 return requestResimplify();
8172
8173 Value *Cond = SI->getCondition();
8174 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8175 if (simplifySwitchOnSelect(SI, Select))
8176 return requestResimplify();
8177
8178 // If the block only contains the switch, see if we can fold the block
8179 // away into any preds.
8180 if (SI == &*BB->instructionsWithoutDebug(false).begin())
8181 if (foldValueComparisonIntoPredecessors(SI, Builder))
8182 return requestResimplify();
8183 }
8184
8185 // Try to transform the switch into an icmp and a branch.
8186 // The conversion from switch to comparison may lose information on
8187 // impossible switch values, so disable it early in the pipeline.
8188 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8189 return requestResimplify();
8190
8191 // Remove unreachable cases.
8192 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8193 return requestResimplify();
8194
8195 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8196 return requestResimplify();
8197
8198 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8199 return requestResimplify();
8200
8201 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8202 return requestResimplify();
8203
8204 // The conversion of switches to arithmetic or lookup table is disabled in
8205 // the early optimization pipeline, as it may lose information or make the
8206 // resulting code harder to analyze.
8207 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8208 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8209 Options.ConvertSwitchToLookupTable))
8210 return requestResimplify();
8211
8212 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8213 return requestResimplify();
8214
8215 if (reduceSwitchRange(SI, Builder, DL, TTI))
8216 return requestResimplify();
8217
8218 if (HoistCommon &&
8219 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8220 return requestResimplify();
8221
8222 if (simplifyDuplicateSwitchArms(SI, DTU))
8223 return requestResimplify();
8224
8225 if (simplifySwitchWhenUMin(SI, DTU))
8226 return requestResimplify();
8227
8228 return false;
8229}
8230
8231bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8232 BasicBlock *BB = IBI->getParent();
8233 bool Changed = false;
8234 SmallVector<uint32_t> BranchWeights;
8235 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8236 extractBranchWeights(*IBI, BranchWeights);
8237
8238 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8239 if (HasBranchWeights)
8240 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8241 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8242
8243 // Eliminate redundant destinations.
8244 SmallPtrSet<Value *, 8> Succs;
8245 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8246 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8247 BasicBlock *Dest = IBI->getDestination(I);
8248 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8249 if (!Dest->hasAddressTaken())
8250 RemovedSuccs.insert(Dest);
8251 Dest->removePredecessor(BB);
8252 IBI->removeDestination(I);
8253 --I;
8254 --E;
8255 Changed = true;
8256 }
8257 }
8258
8259 if (DTU) {
8260 std::vector<DominatorTree::UpdateType> Updates;
8261 Updates.reserve(RemovedSuccs.size());
8262 for (auto *RemovedSucc : RemovedSuccs)
8263 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8264 DTU->applyUpdates(Updates);
8265 }
8266
8267 if (IBI->getNumDestinations() == 0) {
8268 // If the indirectbr has no successors, change it to unreachable.
8269 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8271 return true;
8272 }
8273
8274 if (IBI->getNumDestinations() == 1) {
8275 // If the indirectbr has one successor, change it to a direct branch.
8278 return true;
8279 }
8280 if (HasBranchWeights) {
8281 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8282 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8283 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8284 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8285 }
8286 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8287 if (simplifyIndirectBrOnSelect(IBI, SI))
8288 return requestResimplify();
8289 }
8290 return Changed;
8291}
8292
8293/// Given an block with only a single landing pad and a unconditional branch
8294/// try to find another basic block which this one can be merged with. This
8295/// handles cases where we have multiple invokes with unique landing pads, but
8296/// a shared handler.
8297///
8298/// We specifically choose to not worry about merging non-empty blocks
8299/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8300/// practice, the optimizer produces empty landing pad blocks quite frequently
8301/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8302/// sinking in this file)
8303///
8304/// This is primarily a code size optimization. We need to avoid performing
8305/// any transform which might inhibit optimization (such as our ability to
8306/// specialize a particular handler via tail commoning). We do this by not
8307/// merging any blocks which require us to introduce a phi. Since the same
8308/// values are flowing through both blocks, we don't lose any ability to
8309/// specialize. If anything, we make such specialization more likely.
8310///
8311/// TODO - This transformation could remove entries from a phi in the target
8312/// block when the inputs in the phi are the same for the two blocks being
8313/// merged. In some cases, this could result in removal of the PHI entirely.
8315 BasicBlock *BB, DomTreeUpdater *DTU) {
8316 auto Succ = BB->getUniqueSuccessor();
8317 assert(Succ);
8318 // If there's a phi in the successor block, we'd likely have to introduce
8319 // a phi into the merged landing pad block.
8320 if (isa<PHINode>(*Succ->begin()))
8321 return false;
8322
8323 for (BasicBlock *OtherPred : predecessors(Succ)) {
8324 if (BB == OtherPred)
8325 continue;
8326 BasicBlock::iterator I = OtherPred->begin();
8328 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8329 continue;
8330 ++I;
8332 if (!BI2 || !BI2->isIdenticalTo(BI))
8333 continue;
8334
8335 std::vector<DominatorTree::UpdateType> Updates;
8336
8337 // We've found an identical block. Update our predecessors to take that
8338 // path instead and make ourselves dead.
8340 for (BasicBlock *Pred : UniquePreds) {
8341 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8342 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8343 "unexpected successor");
8344 II->setUnwindDest(OtherPred);
8345 if (DTU) {
8346 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8347 Updates.push_back({DominatorTree::Delete, Pred, BB});
8348 }
8349 }
8350
8352 for (BasicBlock *Succ : UniqueSuccs) {
8353 Succ->removePredecessor(BB);
8354 if (DTU)
8355 Updates.push_back({DominatorTree::Delete, BB, Succ});
8356 }
8357
8358 IRBuilder<> Builder(BI);
8359 Builder.CreateUnreachable();
8360 BI->eraseFromParent();
8361 if (DTU)
8362 DTU->applyUpdates(Updates);
8363 return true;
8364 }
8365 return false;
8366}
8367
8368bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8369 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8370 : simplifyCondBranch(Branch, Builder);
8371}
8372
8373bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8374 IRBuilder<> &Builder) {
8375 BasicBlock *BB = BI->getParent();
8376 BasicBlock *Succ = BI->getSuccessor(0);
8377
8378 // If the Terminator is the only non-phi instruction, simplify the block.
8379 // If LoopHeader is provided, check if the block or its successor is a loop
8380 // header. (This is for early invocations before loop simplify and
8381 // vectorization to keep canonical loop forms for nested loops. These blocks
8382 // can be eliminated when the pass is invoked later in the back-end.)
8383 // Note that if BB has only one predecessor then we do not introduce new
8384 // backedge, so we can eliminate BB.
8385 bool NeedCanonicalLoop =
8386 Options.NeedCanonicalLoop &&
8387 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8388 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8390 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8391 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8392 return true;
8393
8394 // If the only instruction in the block is a seteq/setne comparison against a
8395 // constant, try to simplify the block.
8396 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8397 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8398 ++I;
8399 if (I->isTerminator() &&
8400 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8401 return true;
8402 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8403 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8404 Builder))
8405 return true;
8406 }
8407 }
8408
8409 // See if we can merge an empty landing pad block with another which is
8410 // equivalent.
8411 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8412 ++I;
8413 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8414 return true;
8415 }
8416
8417 // If this basic block is ONLY a compare and a branch, and if a predecessor
8418 // branches to us and our successor, fold the comparison into the
8419 // predecessor and use logical operations to update the incoming value
8420 // for PHI nodes in common successor.
8421 if (Options.SpeculateBlocks &&
8422 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8423 Options.BonusInstThreshold))
8424 return requestResimplify();
8425 return false;
8426}
8427
8429 BasicBlock *PredPred = nullptr;
8430 for (auto *P : predecessors(BB)) {
8431 BasicBlock *PPred = P->getSinglePredecessor();
8432 if (!PPred || (PredPred && PredPred != PPred))
8433 return nullptr;
8434 PredPred = PPred;
8435 }
8436 return PredPred;
8437}
8438
8439/// Fold the following pattern:
8440/// bb0:
8441/// br i1 %cond1, label %bb1, label %bb2
8442/// bb1:
8443/// br i1 %cond2, label %bb3, label %bb4
8444/// bb2:
8445/// br i1 %cond2, label %bb4, label %bb3
8446/// bb3:
8447/// ...
8448/// bb4:
8449/// ...
8450/// into
8451/// bb0:
8452/// %cond = xor i1 %cond1, %cond2
8453/// br i1 %cond, label %bb4, label %bb3
8454/// bb3:
8455/// ...
8456/// bb4:
8457/// ...
8458/// NOTE: %cond2 always dominates the terminator of bb0.
8460 BasicBlock *BB = BI->getParent();
8461 BasicBlock *BB1 = BI->getSuccessor(0);
8462 BasicBlock *BB2 = BI->getSuccessor(1);
8463 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8464 if (Succ == BB)
8465 return false;
8466 if (&Succ->front() != Succ->getTerminator())
8467 return false;
8468 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8469 if (!SuccBI || !SuccBI->isConditional())
8470 return false;
8471 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8472 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8473 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8474 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8475 };
8476 BranchInst *BB1BI, *BB2BI;
8477 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8478 return false;
8479
8480 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8481 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8482 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8483 return false;
8484
8485 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8486 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8487 IRBuilder<> Builder(BI);
8488 BI->setCondition(
8489 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8490 BB1->removePredecessor(BB);
8491 BI->setSuccessor(0, BB4);
8492 BB2->removePredecessor(BB);
8493 BI->setSuccessor(1, BB3);
8494 if (DTU) {
8496 Updates.push_back({DominatorTree::Delete, BB, BB1});
8497 Updates.push_back({DominatorTree::Insert, BB, BB4});
8498 Updates.push_back({DominatorTree::Delete, BB, BB2});
8499 Updates.push_back({DominatorTree::Insert, BB, BB3});
8500
8501 DTU->applyUpdates(Updates);
8502 }
8503 bool HasWeight = false;
8504 uint64_t BBTWeight, BBFWeight;
8505 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8506 HasWeight = true;
8507 else
8508 BBTWeight = BBFWeight = 1;
8509 uint64_t BB1TWeight, BB1FWeight;
8510 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8511 HasWeight = true;
8512 else
8513 BB1TWeight = BB1FWeight = 1;
8514 uint64_t BB2TWeight, BB2FWeight;
8515 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8516 HasWeight = true;
8517 else
8518 BB2TWeight = BB2FWeight = 1;
8519 if (HasWeight) {
8520 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8521 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8522 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8523 /*ElideAllZero=*/true);
8524 }
8525 return true;
8526}
8527
8528bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8529 assert(
8531 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8532 "Tautological conditional branch should have been eliminated already.");
8533
8534 BasicBlock *BB = BI->getParent();
8535 if (!Options.SimplifyCondBranch ||
8536 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8537 return false;
8538
8539 // Conditional branch
8540 if (isValueEqualityComparison(BI)) {
8541 // If we only have one predecessor, and if it is a branch on this value,
8542 // see if that predecessor totally determines the outcome of this
8543 // switch.
8544 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8545 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8546 return requestResimplify();
8547
8548 // This block must be empty, except for the setcond inst, if it exists.
8549 // Ignore dbg and pseudo intrinsics.
8550 auto I = BB->instructionsWithoutDebug(true).begin();
8551 if (&*I == BI) {
8552 if (foldValueComparisonIntoPredecessors(BI, Builder))
8553 return requestResimplify();
8554 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8555 ++I;
8556 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8557 return requestResimplify();
8558 }
8559 }
8560
8561 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8562 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8563 return true;
8564
8565 // If this basic block has dominating predecessor blocks and the dominating
8566 // blocks' conditions imply BI's condition, we know the direction of BI.
8567 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8568 if (Imp) {
8569 // Turn this into a branch on constant.
8570 auto *OldCond = BI->getCondition();
8571 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8572 : ConstantInt::getFalse(BB->getContext());
8573 BI->setCondition(TorF);
8575 return requestResimplify();
8576 }
8577
8578 // If this basic block is ONLY a compare and a branch, and if a predecessor
8579 // branches to us and one of our successors, fold the comparison into the
8580 // predecessor and use logical operations to pick the right destination.
8581 if (Options.SpeculateBlocks &&
8582 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8583 Options.BonusInstThreshold))
8584 return requestResimplify();
8585
8586 // We have a conditional branch to two blocks that are only reachable
8587 // from BI. We know that the condbr dominates the two blocks, so see if
8588 // there is any identical code in the "then" and "else" blocks. If so, we
8589 // can hoist it up to the branching block.
8590 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8591 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8592 if (HoistCommon &&
8593 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8594 return requestResimplify();
8595
8596 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8597 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8598 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8599 auto CanSpeculateConditionalLoadsStores = [&]() {
8600 for (auto *Succ : successors(BB)) {
8601 for (Instruction &I : *Succ) {
8602 if (I.isTerminator()) {
8603 if (I.getNumSuccessors() > 1)
8604 return false;
8605 continue;
8606 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8607 SpeculatedConditionalLoadsStores.size() ==
8609 return false;
8610 }
8611 SpeculatedConditionalLoadsStores.push_back(&I);
8612 }
8613 }
8614 return !SpeculatedConditionalLoadsStores.empty();
8615 };
8616
8617 if (CanSpeculateConditionalLoadsStores()) {
8618 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8619 std::nullopt, nullptr);
8620 return requestResimplify();
8621 }
8622 }
8623 } else {
8624 // If Successor #1 has multiple preds, we may be able to conditionally
8625 // execute Successor #0 if it branches to Successor #1.
8626 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8627 if (Succ0TI->getNumSuccessors() == 1 &&
8628 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8629 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8630 return requestResimplify();
8631 }
8632 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8633 // If Successor #0 has multiple preds, we may be able to conditionally
8634 // execute Successor #1 if it branches to Successor #0.
8635 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8636 if (Succ1TI->getNumSuccessors() == 1 &&
8637 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8638 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8639 return requestResimplify();
8640 }
8641
8642 // If this is a branch on something for which we know the constant value in
8643 // predecessors (e.g. a phi node in the current block), thread control
8644 // through this block.
8645 if (foldCondBranchOnValueKnownInPredecessor(BI))
8646 return requestResimplify();
8647
8648 // Scan predecessor blocks for conditional branches.
8649 for (BasicBlock *Pred : predecessors(BB))
8650 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8651 if (PBI != BI && PBI->isConditional())
8652 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8653 return requestResimplify();
8654
8655 // Look for diamond patterns.
8656 if (MergeCondStores)
8657 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8658 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8659 if (PBI != BI && PBI->isConditional())
8660 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8661 return requestResimplify();
8662
8663 // Look for nested conditional branches.
8664 if (mergeNestedCondBranch(BI, DTU))
8665 return requestResimplify();
8666
8667 return false;
8668}
8669
8670/// Check if passing a value to an instruction will cause undefined behavior.
8671static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8672 assert(V->getType() == I->getType() && "Mismatched types");
8674 if (!C)
8675 return false;
8676
8677 if (I->use_empty())
8678 return false;
8679
8680 if (C->isNullValue() || isa<UndefValue>(C)) {
8681 // Only look at the first use we can handle, avoid hurting compile time with
8682 // long uselists
8683 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8684 auto *Use = cast<Instruction>(U.getUser());
8685 // Change this list when we want to add new instructions.
8686 switch (Use->getOpcode()) {
8687 default:
8688 return false;
8689 case Instruction::GetElementPtr:
8690 case Instruction::Ret:
8691 case Instruction::BitCast:
8692 case Instruction::Load:
8693 case Instruction::Store:
8694 case Instruction::Call:
8695 case Instruction::CallBr:
8696 case Instruction::Invoke:
8697 case Instruction::UDiv:
8698 case Instruction::URem:
8699 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8700 // implemented to avoid code complexity as it is unclear how useful such
8701 // logic is.
8702 case Instruction::SDiv:
8703 case Instruction::SRem:
8704 return true;
8705 }
8706 });
8707 if (FindUse == I->use_end())
8708 return false;
8709 auto &Use = *FindUse;
8710 auto *User = cast<Instruction>(Use.getUser());
8711 // Bail out if User is not in the same BB as I or User == I or User comes
8712 // before I in the block. The latter two can be the case if User is a
8713 // PHI node.
8714 if (User->getParent() != I->getParent() || User == I ||
8715 User->comesBefore(I))
8716 return false;
8717
8718 // Now make sure that there are no instructions in between that can alter
8719 // control flow (eg. calls)
8720 auto InstrRange =
8721 make_range(std::next(I->getIterator()), User->getIterator());
8722 if (any_of(InstrRange, [](Instruction &I) {
8724 }))
8725 return false;
8726
8727 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8729 if (GEP->getPointerOperand() == I) {
8730 // The type of GEP may differ from the type of base pointer.
8731 // Bail out on vector GEPs, as they are not handled by other checks.
8732 if (GEP->getType()->isVectorTy())
8733 return false;
8734 // The current base address is null, there are four cases to consider:
8735 // getelementptr (TY, null, 0) -> null
8736 // getelementptr (TY, null, not zero) -> may be modified
8737 // getelementptr inbounds (TY, null, 0) -> null
8738 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8739 // undefined?
8740 if (!GEP->hasAllZeroIndices() &&
8741 (!GEP->isInBounds() ||
8742 NullPointerIsDefined(GEP->getFunction(),
8743 GEP->getPointerAddressSpace())))
8744 PtrValueMayBeModified = true;
8745 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8746 }
8747
8748 // Look through return.
8749 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8750 bool HasNoUndefAttr =
8751 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8752 // Return undefined to a noundef return value is undefined.
8753 if (isa<UndefValue>(C) && HasNoUndefAttr)
8754 return true;
8755 // Return null to a nonnull+noundef return value is undefined.
8756 if (C->isNullValue() && HasNoUndefAttr &&
8757 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8758 return !PtrValueMayBeModified;
8759 }
8760 }
8761
8762 // Load from null is undefined.
8763 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8764 if (!LI->isVolatile())
8765 return !NullPointerIsDefined(LI->getFunction(),
8766 LI->getPointerAddressSpace());
8767
8768 // Store to null is undefined.
8770 if (!SI->isVolatile())
8771 return (!NullPointerIsDefined(SI->getFunction(),
8772 SI->getPointerAddressSpace())) &&
8773 SI->getPointerOperand() == I;
8774
8775 // llvm.assume(false/undef) always triggers immediate UB.
8776 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8777 // Ignore assume operand bundles.
8778 if (I == Assume->getArgOperand(0))
8779 return true;
8780 }
8781
8782 if (auto *CB = dyn_cast<CallBase>(User)) {
8783 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8784 return false;
8785 // A call to null is undefined.
8786 if (CB->getCalledOperand() == I)
8787 return true;
8788
8789 if (CB->isArgOperand(&Use)) {
8790 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8791 // Passing null to a nonnnull+noundef argument is undefined.
8793 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8794 return !PtrValueMayBeModified;
8795 // Passing undef to a noundef argument is undefined.
8796 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8797 return true;
8798 }
8799 }
8800 // Div/Rem by zero is immediate UB
8801 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8802 return true;
8803 }
8804 return false;
8805}
8806
8807/// If BB has an incoming value that will always trigger undefined behavior
8808/// (eg. null pointer dereference), remove the branch leading here.
8810 DomTreeUpdater *DTU,
8811 AssumptionCache *AC) {
8812 for (PHINode &PHI : BB->phis())
8813 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8814 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8815 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8816 Instruction *T = Predecessor->getTerminator();
8817 IRBuilder<> Builder(T);
8818 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8819 BB->removePredecessor(Predecessor);
8820 // Turn unconditional branches into unreachables and remove the dead
8821 // destination from conditional branches.
8822 if (BI->isUnconditional())
8823 Builder.CreateUnreachable();
8824 else {
8825 // Preserve guarding condition in assume, because it might not be
8826 // inferrable from any dominating condition.
8827 Value *Cond = BI->getCondition();
8828 CallInst *Assumption;
8829 if (BI->getSuccessor(0) == BB)
8830 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8831 else
8832 Assumption = Builder.CreateAssumption(Cond);
8833 if (AC)
8834 AC->registerAssumption(cast<AssumeInst>(Assumption));
8835 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8836 : BI->getSuccessor(0));
8837 }
8838 BI->eraseFromParent();
8839 if (DTU)
8840 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8841 return true;
8842 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8843 // Redirect all branches leading to UB into
8844 // a newly created unreachable block.
8845 BasicBlock *Unreachable = BasicBlock::Create(
8846 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8847 Builder.SetInsertPoint(Unreachable);
8848 // The new block contains only one instruction: Unreachable
8849 Builder.CreateUnreachable();
8850 for (const auto &Case : SI->cases())
8851 if (Case.getCaseSuccessor() == BB) {
8852 BB->removePredecessor(Predecessor);
8853 Case.setSuccessor(Unreachable);
8854 }
8855 if (SI->getDefaultDest() == BB) {
8856 BB->removePredecessor(Predecessor);
8857 SI->setDefaultDest(Unreachable);
8858 }
8859
8860 if (DTU)
8861 DTU->applyUpdates(
8862 { { DominatorTree::Insert, Predecessor, Unreachable },
8863 { DominatorTree::Delete, Predecessor, BB } });
8864 return true;
8865 }
8866 }
8867
8868 return false;
8869}
8870
8871bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8872 bool Changed = false;
8873
8874 assert(BB && BB->getParent() && "Block not embedded in function!");
8875 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8876
8877 // Remove basic blocks that have no predecessors (except the entry block)...
8878 // or that just have themself as a predecessor. These are unreachable.
8879 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8880 BB->getSinglePredecessor() == BB) {
8881 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8882 DeleteDeadBlock(BB, DTU);
8883 return true;
8884 }
8885
8886 // Check to see if we can constant propagate this terminator instruction
8887 // away...
8888 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8889 /*TLI=*/nullptr, DTU);
8890
8891 // Check for and eliminate duplicate PHI nodes in this block.
8893
8894 // Check for and remove branches that will always cause undefined behavior.
8896 return requestResimplify();
8897
8898 // Merge basic blocks into their predecessor if there is only one distinct
8899 // pred, and if there is only one distinct successor of the predecessor, and
8900 // if there are no PHI nodes.
8901 if (MergeBlockIntoPredecessor(BB, DTU))
8902 return true;
8903
8904 if (SinkCommon && Options.SinkCommonInsts)
8905 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8906 mergeCompatibleInvokes(BB, DTU)) {
8907 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8908 // so we may now how duplicate PHI's.
8909 // Let's rerun EliminateDuplicatePHINodes() first,
8910 // before foldTwoEntryPHINode() potentially converts them into select's,
8911 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8912 return true;
8913 }
8914
8915 IRBuilder<> Builder(BB);
8916
8917 if (Options.SpeculateBlocks &&
8918 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8919 // If there is a trivial two-entry PHI node in this basic block, and we can
8920 // eliminate it, do so now.
8921 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8922 if (PN->getNumIncomingValues() == 2)
8923 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8924 Options.SpeculateUnpredictables))
8925 return true;
8926 }
8927
8929 Builder.SetInsertPoint(Terminator);
8930 switch (Terminator->getOpcode()) {
8931 case Instruction::Br:
8932 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8933 break;
8934 case Instruction::Resume:
8935 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8936 break;
8937 case Instruction::CleanupRet:
8938 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8939 break;
8940 case Instruction::Switch:
8941 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8942 break;
8943 case Instruction::Unreachable:
8944 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8945 break;
8946 case Instruction::IndirectBr:
8947 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8948 break;
8949 }
8950
8951 return Changed;
8952}
8953
8954bool SimplifyCFGOpt::run(BasicBlock *BB) {
8955 bool Changed = false;
8956
8957 // Repeated simplify BB as long as resimplification is requested.
8958 do {
8959 Resimplify = false;
8960
8961 // Perform one round of simplifcation. Resimplify flag will be set if
8962 // another iteration is requested.
8963 Changed |= simplifyOnce(BB);
8964 } while (Resimplify);
8965
8966 return Changed;
8967}
8968
8971 ArrayRef<WeakVH> LoopHeaders) {
8972 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8973 Options)
8974 .run(BB);
8975}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1167
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1575
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
back - Get the last element.
Definition ArrayRef.h:151
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:939
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1130
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
LLVM_ABI CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:292
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1700
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2088
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1789
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2140
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3875
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1715
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1594
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2100
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:320
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276