LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306 SelectInst *Select,
307 IRBuilder<> &Builder);
308 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
309 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
310 Instruction *TI, Instruction *I1,
311 SmallVectorImpl<Instruction *> &OtherSuccTIs,
312 ArrayRef<BasicBlock *> UniqueSuccessors);
313 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
314 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
315 BasicBlock *TrueBB, BasicBlock *FalseBB,
316 uint32_t TrueWeight, uint32_t FalseWeight);
317 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
318 const DataLayout &DL);
319 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
320 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
321 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
853 if (BI->isConditional())
854 Cond = dyn_cast<Instruction>(BI->getCondition());
855 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
856 Cond = dyn_cast<Instruction>(IBI->getAddress());
857 }
858
859 TI->eraseFromParent();
860 if (Cond)
862}
863
864/// Return true if the specified terminator checks
865/// to see if a value is equal to constant integer value.
866Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
867 Value *CV = nullptr;
868 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
869 // Do not permit merging of large switch instructions into their
870 // predecessors unless there is only one predecessor.
871 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
872 CV = SI->getCondition();
873 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
874 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
875 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
876 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
877 CV = ICI->getOperand(0);
878 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
879 if (Trunc->hasNoUnsignedWrap())
880 CV = Trunc->getOperand(0);
881 }
882 }
883
884 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
885 if (CV) {
886 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
887 Value *Ptr = PTII->getPointerOperand();
888 if (DL.hasUnstableRepresentation(Ptr->getType()))
889 return CV;
890 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
891 CV = Ptr;
892 }
893 }
894 return CV;
895}
896
897/// Given a value comparison instruction,
898/// decode all of the 'cases' that it represents and return the 'default' block.
899BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
900 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
901 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
902 Cases.reserve(SI->getNumCases());
903 for (auto Case : SI->cases())
904 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
905 Case.getCaseSuccessor()));
906 return SI->getDefaultDest();
907 }
908
909 BranchInst *BI = cast<BranchInst>(TI);
910 Value *Cond = BI->getCondition();
911 ICmpInst::Predicate Pred;
912 ConstantInt *C;
913 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
914 Pred = ICI->getPredicate();
915 C = getConstantInt(ICI->getOperand(1), DL);
916 } else {
917 Pred = ICmpInst::ICMP_NE;
918 auto *Trunc = cast<TruncInst>(Cond);
919 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
920 }
921 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
922 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
923 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
924}
925
926/// Given a vector of bb/value pairs, remove any entries
927/// in the list that match the specified block.
928static void
930 std::vector<ValueEqualityComparisonCase> &Cases) {
931 llvm::erase(Cases, BB);
932}
933
934/// Return true if there are any keys in C1 that exist in C2 as well.
935static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
936 std::vector<ValueEqualityComparisonCase> &C2) {
937 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
938
939 // Make V1 be smaller than V2.
940 if (V1->size() > V2->size())
941 std::swap(V1, V2);
942
943 if (V1->empty())
944 return false;
945 if (V1->size() == 1) {
946 // Just scan V2.
947 ConstantInt *TheVal = (*V1)[0].Value;
948 for (const ValueEqualityComparisonCase &VECC : *V2)
949 if (TheVal == VECC.Value)
950 return true;
951 }
952
953 // Otherwise, just sort both lists and compare element by element.
954 array_pod_sort(V1->begin(), V1->end());
955 array_pod_sort(V2->begin(), V2->end());
956 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
957 while (i1 != e1 && i2 != e2) {
958 if ((*V1)[i1].Value == (*V2)[i2].Value)
959 return true;
960 if ((*V1)[i1].Value < (*V2)[i2].Value)
961 ++i1;
962 else
963 ++i2;
964 }
965 return false;
966}
967
968/// If TI is known to be a terminator instruction and its block is known to
969/// only have a single predecessor block, check to see if that predecessor is
970/// also a value comparison with the same value, and if that comparison
971/// determines the outcome of this comparison. If so, simplify TI. This does a
972/// very limited form of jump threading.
973bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
974 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
975 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
976 if (!PredVal)
977 return false; // Not a value comparison in predecessor.
978
979 Value *ThisVal = isValueEqualityComparison(TI);
980 assert(ThisVal && "This isn't a value comparison!!");
981 if (ThisVal != PredVal)
982 return false; // Different predicates.
983
984 // TODO: Preserve branch weight metadata, similarly to how
985 // foldValueComparisonIntoPredecessors preserves it.
986
987 // Find out information about when control will move from Pred to TI's block.
988 std::vector<ValueEqualityComparisonCase> PredCases;
989 BasicBlock *PredDef =
990 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
991 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
992
993 // Find information about how control leaves this block.
994 std::vector<ValueEqualityComparisonCase> ThisCases;
995 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
996 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
997
998 // If TI's block is the default block from Pred's comparison, potentially
999 // simplify TI based on this knowledge.
1000 if (PredDef == TI->getParent()) {
1001 // If we are here, we know that the value is none of those cases listed in
1002 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1003 // can simplify TI.
1004 if (!valuesOverlap(PredCases, ThisCases))
1005 return false;
1006
1007 if (isa<BranchInst>(TI)) {
1008 // Okay, one of the successors of this condbr is dead. Convert it to a
1009 // uncond br.
1010 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1011 // Insert the new branch.
1012 Instruction *NI = Builder.CreateBr(ThisDef);
1013 (void)NI;
1014
1015 // Remove PHI node entries for the dead edge.
1016 ThisCases[0].Dest->removePredecessor(PredDef);
1017
1018 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1019 << "Through successor TI: " << *TI << "Leaving: " << *NI
1020 << "\n");
1021
1023
1024 if (DTU)
1025 DTU->applyUpdates(
1026 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1027
1028 return true;
1029 }
1030
1031 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1032 // Okay, TI has cases that are statically dead, prune them away.
1033 SmallPtrSet<Constant *, 16> DeadCases;
1034 for (const ValueEqualityComparisonCase &Case : PredCases)
1035 DeadCases.insert(Case.Value);
1036
1037 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1038 << "Through successor TI: " << *TI);
1039
1040 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1041 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1042 --i;
1043 auto *Successor = i->getCaseSuccessor();
1044 if (DTU)
1045 ++NumPerSuccessorCases[Successor];
1046 if (DeadCases.count(i->getCaseValue())) {
1047 Successor->removePredecessor(PredDef);
1048 SI.removeCase(i);
1049 if (DTU)
1050 --NumPerSuccessorCases[Successor];
1051 }
1052 }
1053
1054 if (DTU) {
1055 std::vector<DominatorTree::UpdateType> Updates;
1056 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1057 if (I.second == 0)
1058 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1059 DTU->applyUpdates(Updates);
1060 }
1061
1062 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1063 return true;
1064 }
1065
1066 // Otherwise, TI's block must correspond to some matched value. Find out
1067 // which value (or set of values) this is.
1068 ConstantInt *TIV = nullptr;
1069 BasicBlock *TIBB = TI->getParent();
1070 for (const auto &[Value, Dest] : PredCases)
1071 if (Dest == TIBB) {
1072 if (TIV)
1073 return false; // Cannot handle multiple values coming to this block.
1074 TIV = Value;
1075 }
1076 assert(TIV && "No edge from pred to succ?");
1077
1078 // Okay, we found the one constant that our value can be if we get into TI's
1079 // BB. Find out which successor will unconditionally be branched to.
1080 BasicBlock *TheRealDest = nullptr;
1081 for (const auto &[Value, Dest] : ThisCases)
1082 if (Value == TIV) {
1083 TheRealDest = Dest;
1084 break;
1085 }
1086
1087 // If not handled by any explicit cases, it is handled by the default case.
1088 if (!TheRealDest)
1089 TheRealDest = ThisDef;
1090
1091 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1092
1093 // Remove PHI node entries for dead edges.
1094 BasicBlock *CheckEdge = TheRealDest;
1095 for (BasicBlock *Succ : successors(TIBB))
1096 if (Succ != CheckEdge) {
1097 if (Succ != TheRealDest)
1098 RemovedSuccs.insert(Succ);
1099 Succ->removePredecessor(TIBB);
1100 } else
1101 CheckEdge = nullptr;
1102
1103 // Insert the new branch.
1104 Instruction *NI = Builder.CreateBr(TheRealDest);
1105 (void)NI;
1106
1107 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1108 << "Through successor TI: " << *TI << "Leaving: " << *NI
1109 << "\n");
1110
1112 if (DTU) {
1113 SmallVector<DominatorTree::UpdateType, 2> Updates;
1114 Updates.reserve(RemovedSuccs.size());
1115 for (auto *RemovedSucc : RemovedSuccs)
1116 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1117 DTU->applyUpdates(Updates);
1118 }
1119 return true;
1120}
1121
1122namespace {
1123
1124/// This class implements a stable ordering of constant
1125/// integers that does not depend on their address. This is important for
1126/// applications that sort ConstantInt's to ensure uniqueness.
1127struct ConstantIntOrdering {
1128 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1129 return LHS->getValue().ult(RHS->getValue());
1130 }
1131};
1132
1133} // end anonymous namespace
1134
1136 ConstantInt *const *P2) {
1137 const ConstantInt *LHS = *P1;
1138 const ConstantInt *RHS = *P2;
1139 if (LHS == RHS)
1140 return 0;
1141 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1142}
1143
1144/// Get Weights of a given terminator, the default weight is at the front
1145/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1146/// metadata.
1148 SmallVectorImpl<uint64_t> &Weights) {
1149 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1150 assert(MD && "Invalid branch-weight metadata");
1151 extractFromBranchWeightMD64(MD, Weights);
1152
1153 // If TI is a conditional eq, the default case is the false case,
1154 // and the corresponding branch-weight data is at index 2. We swap the
1155 // default weight to be the first entry.
1156 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1157 assert(Weights.size() == 2);
1158 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1159 if (!ICI)
1160 return;
1161
1162 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1163 std::swap(Weights.front(), Weights.back());
1164 }
1165}
1166
1168 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1169 Instruction *PTI = PredBlock->getTerminator();
1170
1171 // If we have bonus instructions, clone them into the predecessor block.
1172 // Note that there may be multiple predecessor blocks, so we cannot move
1173 // bonus instructions to a predecessor block.
1174 for (Instruction &BonusInst : *BB) {
1175 if (BonusInst.isTerminator())
1176 continue;
1177
1178 Instruction *NewBonusInst = BonusInst.clone();
1179
1180 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1181 // Unless the instruction has the same !dbg location as the original
1182 // branch, drop it. When we fold the bonus instructions we want to make
1183 // sure we reset their debug locations in order to avoid stepping on
1184 // dead code caused by folding dead branches.
1185 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1186 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1187 mapAtomInstance(DL, VMap);
1188 }
1189
1190 RemapInstruction(NewBonusInst, VMap,
1192
1193 // If we speculated an instruction, we need to drop any metadata that may
1194 // result in undefined behavior, as the metadata might have been valid
1195 // only given the branch precondition.
1196 // Similarly strip attributes on call parameters that may cause UB in
1197 // location the call is moved to.
1198 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1199
1200 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1201 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1202 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1204
1205 NewBonusInst->takeName(&BonusInst);
1206 BonusInst.setName(NewBonusInst->getName() + ".old");
1207 VMap[&BonusInst] = NewBonusInst;
1208
1209 // Update (liveout) uses of bonus instructions,
1210 // now that the bonus instruction has been cloned into predecessor.
1211 // Note that we expect to be in a block-closed SSA form for this to work!
1212 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1213 auto *UI = cast<Instruction>(U.getUser());
1214 auto *PN = dyn_cast<PHINode>(UI);
1215 if (!PN) {
1216 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1217 "If the user is not a PHI node, then it should be in the same "
1218 "block as, and come after, the original bonus instruction.");
1219 continue; // Keep using the original bonus instruction.
1220 }
1221 // Is this the block-closed SSA form PHI node?
1222 if (PN->getIncomingBlock(U) == BB)
1223 continue; // Great, keep using the original bonus instruction.
1224 // The only other alternative is an "use" when coming from
1225 // the predecessor block - here we should refer to the cloned bonus instr.
1226 assert(PN->getIncomingBlock(U) == PredBlock &&
1227 "Not in block-closed SSA form?");
1228 U.set(NewBonusInst);
1229 }
1230 }
1231
1232 // Key Instructions: We may have propagated atom info into the pred. If the
1233 // pred's terminator already has atom info do nothing as merging would drop
1234 // one atom group anyway. If it doesn't, propagte the remapped atom group
1235 // from BB's terminator.
1236 if (auto &PredDL = PTI->getDebugLoc()) {
1237 auto &DL = BB->getTerminator()->getDebugLoc();
1238 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1239 PredDL.isSameSourceLocation(DL)) {
1240 PTI->setDebugLoc(DL);
1241 RemapSourceAtom(PTI, VMap);
1242 }
1243 }
1244}
1245
1246bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1247 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1248 BasicBlock *BB = TI->getParent();
1249 BasicBlock *Pred = PTI->getParent();
1250
1252
1253 // Figure out which 'cases' to copy from SI to PSI.
1254 std::vector<ValueEqualityComparisonCase> BBCases;
1255 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1256
1257 std::vector<ValueEqualityComparisonCase> PredCases;
1258 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1259
1260 // Based on whether the default edge from PTI goes to BB or not, fill in
1261 // PredCases and PredDefault with the new switch cases we would like to
1262 // build.
1263 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1264
1265 // Update the branch weight metadata along the way
1266 SmallVector<uint64_t, 8> Weights;
1267 bool PredHasWeights = hasBranchWeightMD(*PTI);
1268 bool SuccHasWeights = hasBranchWeightMD(*TI);
1269
1270 if (PredHasWeights) {
1271 getBranchWeights(PTI, Weights);
1272 // branch-weight metadata is inconsistent here.
1273 if (Weights.size() != 1 + PredCases.size())
1274 PredHasWeights = SuccHasWeights = false;
1275 } else if (SuccHasWeights)
1276 // If there are no predecessor weights but there are successor weights,
1277 // populate Weights with 1, which will later be scaled to the sum of
1278 // successor's weights
1279 Weights.assign(1 + PredCases.size(), 1);
1280
1281 SmallVector<uint64_t, 8> SuccWeights;
1282 if (SuccHasWeights) {
1283 getBranchWeights(TI, SuccWeights);
1284 // branch-weight metadata is inconsistent here.
1285 if (SuccWeights.size() != 1 + BBCases.size())
1286 PredHasWeights = SuccHasWeights = false;
1287 } else if (PredHasWeights)
1288 SuccWeights.assign(1 + BBCases.size(), 1);
1289
1290 if (PredDefault == BB) {
1291 // If this is the default destination from PTI, only the edges in TI
1292 // that don't occur in PTI, or that branch to BB will be activated.
1293 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1294 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1295 if (PredCases[i].Dest != BB)
1296 PTIHandled.insert(PredCases[i].Value);
1297 else {
1298 // The default destination is BB, we don't need explicit targets.
1299 std::swap(PredCases[i], PredCases.back());
1300
1301 if (PredHasWeights || SuccHasWeights) {
1302 // Increase weight for the default case.
1303 Weights[0] += Weights[i + 1];
1304 std::swap(Weights[i + 1], Weights.back());
1305 Weights.pop_back();
1306 }
1307
1308 PredCases.pop_back();
1309 --i;
1310 --e;
1311 }
1312
1313 // Reconstruct the new switch statement we will be building.
1314 if (PredDefault != BBDefault) {
1315 PredDefault->removePredecessor(Pred);
1316 if (DTU && PredDefault != BB)
1317 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1318 PredDefault = BBDefault;
1319 ++NewSuccessors[BBDefault];
1320 }
1321
1322 unsigned CasesFromPred = Weights.size();
1323 uint64_t ValidTotalSuccWeight = 0;
1324 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1325 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1326 PredCases.push_back(BBCases[i]);
1327 ++NewSuccessors[BBCases[i].Dest];
1328 if (SuccHasWeights || PredHasWeights) {
1329 // The default weight is at index 0, so weight for the ith case
1330 // should be at index i+1. Scale the cases from successor by
1331 // PredDefaultWeight (Weights[0]).
1332 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1333 ValidTotalSuccWeight += SuccWeights[i + 1];
1334 }
1335 }
1336
1337 if (SuccHasWeights || PredHasWeights) {
1338 ValidTotalSuccWeight += SuccWeights[0];
1339 // Scale the cases from predecessor by ValidTotalSuccWeight.
1340 for (unsigned i = 1; i < CasesFromPred; ++i)
1341 Weights[i] *= ValidTotalSuccWeight;
1342 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1343 Weights[0] *= SuccWeights[0];
1344 }
1345 } else {
1346 // If this is not the default destination from PSI, only the edges
1347 // in SI that occur in PSI with a destination of BB will be
1348 // activated.
1349 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1350 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1351 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1352 if (PredCases[i].Dest == BB) {
1353 PTIHandled.insert(PredCases[i].Value);
1354
1355 if (PredHasWeights || SuccHasWeights) {
1356 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1357 std::swap(Weights[i + 1], Weights.back());
1358 Weights.pop_back();
1359 }
1360
1361 std::swap(PredCases[i], PredCases.back());
1362 PredCases.pop_back();
1363 --i;
1364 --e;
1365 }
1366
1367 // Okay, now we know which constants were sent to BB from the
1368 // predecessor. Figure out where they will all go now.
1369 for (const ValueEqualityComparisonCase &Case : BBCases)
1370 if (PTIHandled.count(Case.Value)) {
1371 // If this is one we are capable of getting...
1372 if (PredHasWeights || SuccHasWeights)
1373 Weights.push_back(WeightsForHandled[Case.Value]);
1374 PredCases.push_back(Case);
1375 ++NewSuccessors[Case.Dest];
1376 PTIHandled.erase(Case.Value); // This constant is taken care of
1377 }
1378
1379 // If there are any constants vectored to BB that TI doesn't handle,
1380 // they must go to the default destination of TI.
1381 for (ConstantInt *I : PTIHandled) {
1382 if (PredHasWeights || SuccHasWeights)
1383 Weights.push_back(WeightsForHandled[I]);
1384 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1385 ++NewSuccessors[BBDefault];
1386 }
1387 }
1388
1389 // Okay, at this point, we know which new successor Pred will get. Make
1390 // sure we update the number of entries in the PHI nodes for these
1391 // successors.
1392 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1393 if (DTU) {
1394 SuccsOfPred = {llvm::from_range, successors(Pred)};
1395 Updates.reserve(Updates.size() + NewSuccessors.size());
1396 }
1397 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1398 NewSuccessors) {
1399 for (auto I : seq(NewSuccessor.second)) {
1400 (void)I;
1401 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1402 }
1403 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1404 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1405 }
1406
1407 Builder.SetInsertPoint(PTI);
1408 // Convert pointer to int before we switch.
1409 if (CV->getType()->isPointerTy()) {
1410 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1411 "Should not end up here with unstable pointers");
1412 CV =
1413 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1414 }
1415
1416 // Now that the successors are updated, create the new Switch instruction.
1417 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1418 NewSI->setDebugLoc(PTI->getDebugLoc());
1419 for (ValueEqualityComparisonCase &V : PredCases)
1420 NewSI->addCase(V.Value, V.Dest);
1421
1422 if (PredHasWeights || SuccHasWeights)
1423 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1424 /*ElideAllZero=*/true);
1425
1427
1428 // Okay, last check. If BB is still a successor of PSI, then we must
1429 // have an infinite loop case. If so, add an infinitely looping block
1430 // to handle the case to preserve the behavior of the code.
1431 BasicBlock *InfLoopBlock = nullptr;
1432 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1433 if (NewSI->getSuccessor(i) == BB) {
1434 if (!InfLoopBlock) {
1435 // Insert it at the end of the function, because it's either code,
1436 // or it won't matter if it's hot. :)
1437 InfLoopBlock =
1438 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1439 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1440 if (DTU)
1441 Updates.push_back(
1442 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1443 }
1444 NewSI->setSuccessor(i, InfLoopBlock);
1445 }
1446
1447 if (DTU) {
1448 if (InfLoopBlock)
1449 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1450
1451 Updates.push_back({DominatorTree::Delete, Pred, BB});
1452
1453 DTU->applyUpdates(Updates);
1454 }
1455
1456 ++NumFoldValueComparisonIntoPredecessors;
1457 return true;
1458}
1459
1460/// The specified terminator is a value equality comparison instruction
1461/// (either a switch or a branch on "X == c").
1462/// See if any of the predecessors of the terminator block are value comparisons
1463/// on the same value. If so, and if safe to do so, fold them together.
1464bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1465 IRBuilder<> &Builder) {
1466 BasicBlock *BB = TI->getParent();
1467 Value *CV = isValueEqualityComparison(TI); // CondVal
1468 assert(CV && "Not a comparison?");
1469
1470 bool Changed = false;
1471
1472 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1473 while (!Preds.empty()) {
1474 BasicBlock *Pred = Preds.pop_back_val();
1475 Instruction *PTI = Pred->getTerminator();
1476
1477 // Don't try to fold into itself.
1478 if (Pred == BB)
1479 continue;
1480
1481 // See if the predecessor is a comparison with the same value.
1482 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1483 if (PCV != CV)
1484 continue;
1485
1486 SmallSetVector<BasicBlock *, 4> FailBlocks;
1487 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1488 for (auto *Succ : FailBlocks) {
1489 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1490 return false;
1491 }
1492 }
1493
1494 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1495 Changed = true;
1496 }
1497 return Changed;
1498}
1499
1500// If we would need to insert a select that uses the value of this invoke
1501// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1502// need to do this), we can't hoist the invoke, as there is nowhere to put the
1503// select in this case.
1505 Instruction *I1, Instruction *I2) {
1506 for (BasicBlock *Succ : successors(BB1)) {
1507 for (const PHINode &PN : Succ->phis()) {
1508 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1509 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1510 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1511 return false;
1512 }
1513 }
1514 }
1515 return true;
1516}
1517
1518// Get interesting characteristics of instructions that
1519// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1520// instructions can be reordered across.
1526
1528 unsigned Flags = 0;
1529 if (I->mayReadFromMemory())
1530 Flags |= SkipReadMem;
1531 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1532 // inalloca) across stacksave/stackrestore boundaries.
1533 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1534 Flags |= SkipSideEffect;
1536 Flags |= SkipImplicitControlFlow;
1537 return Flags;
1538}
1539
1540// Returns true if it is safe to reorder an instruction across preceding
1541// instructions in a basic block.
1542static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1543 // Don't reorder a store over a load.
1544 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1545 return false;
1546
1547 // If we have seen an instruction with side effects, it's unsafe to reorder an
1548 // instruction which reads memory or itself has side effects.
1549 if ((Flags & SkipSideEffect) &&
1550 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1551 return false;
1552
1553 // Reordering across an instruction which does not necessarily transfer
1554 // control to the next instruction is speculation.
1556 return false;
1557
1558 // Hoisting of llvm.deoptimize is only legal together with the next return
1559 // instruction, which this pass is not always able to do.
1560 if (auto *CB = dyn_cast<CallBase>(I))
1561 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1562 return false;
1563
1564 // It's also unsafe/illegal to hoist an instruction above its instruction
1565 // operands
1566 BasicBlock *BB = I->getParent();
1567 for (Value *Op : I->operands()) {
1568 if (auto *J = dyn_cast<Instruction>(Op))
1569 if (J->getParent() == BB)
1570 return false;
1571 }
1572
1573 return true;
1574}
1575
1576static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1577
1578/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1579/// instructions \p I1 and \p I2 can and should be hoisted.
1581 const TargetTransformInfo &TTI) {
1582 // If we're going to hoist a call, make sure that the two instructions
1583 // we're commoning/hoisting are both marked with musttail, or neither of
1584 // them is marked as such. Otherwise, we might end up in a situation where
1585 // we hoist from a block where the terminator is a `ret` to a block where
1586 // the terminator is a `br`, and `musttail` calls expect to be followed by
1587 // a return.
1588 auto *C1 = dyn_cast<CallInst>(I1);
1589 auto *C2 = dyn_cast<CallInst>(I2);
1590 if (C1 && C2)
1591 if (C1->isMustTailCall() != C2->isMustTailCall())
1592 return false;
1593
1594 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1595 return false;
1596
1597 // If any of the two call sites has nomerge or convergent attribute, stop
1598 // hoisting.
1599 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1600 if (CB1->cannotMerge() || CB1->isConvergent())
1601 return false;
1602 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1603 if (CB2->cannotMerge() || CB2->isConvergent())
1604 return false;
1605
1606 return true;
1607}
1608
1609/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1610/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1611/// hoistCommonCodeFromSuccessors. e.g. The input:
1612/// I1 DVRs: { x, z },
1613/// OtherInsts: { I2 DVRs: { x, y, z } }
1614/// would result in hoisting only DbgVariableRecord x.
1616 Instruction *TI, Instruction *I1,
1617 SmallVectorImpl<Instruction *> &OtherInsts) {
1618 if (!I1->hasDbgRecords())
1619 return;
1620 using CurrentAndEndIt =
1621 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1622 // Vector of {Current, End} iterators.
1624 Itrs.reserve(OtherInsts.size() + 1);
1625 // Helper lambdas for lock-step checks:
1626 // Return true if this Current == End.
1627 auto atEnd = [](const CurrentAndEndIt &Pair) {
1628 return Pair.first == Pair.second;
1629 };
1630 // Return true if all Current are identical.
1631 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1632 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1634 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1635 });
1636 };
1637
1638 // Collect the iterators.
1639 Itrs.push_back(
1640 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1641 for (Instruction *Other : OtherInsts) {
1642 if (!Other->hasDbgRecords())
1643 return;
1644 Itrs.push_back(
1645 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1646 }
1647
1648 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1649 // the lock-step DbgRecord are identical, hoist all of them to TI.
1650 // This replicates the dbg.* intrinsic behaviour in
1651 // hoistCommonCodeFromSuccessors.
1652 while (none_of(Itrs, atEnd)) {
1653 bool HoistDVRs = allIdentical(Itrs);
1654 for (CurrentAndEndIt &Pair : Itrs) {
1655 // Increment Current iterator now as we may be about to move the
1656 // DbgRecord.
1657 DbgRecord &DR = *Pair.first++;
1658 if (HoistDVRs) {
1659 DR.removeFromParent();
1660 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1661 }
1662 }
1663 }
1664}
1665
1667 const Instruction *I2) {
1668 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1669 return true;
1670
1671 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1672 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1673 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1674 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1675 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1676
1677 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1678 return I1->getOperand(0) == I2->getOperand(1) &&
1679 I1->getOperand(1) == I2->getOperand(0) &&
1680 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1681 }
1682
1683 return false;
1684}
1685
1686/// If the target supports conditional faulting,
1687/// we look for the following pattern:
1688/// \code
1689/// BB:
1690/// ...
1691/// %cond = icmp ult %x, %y
1692/// br i1 %cond, label %TrueBB, label %FalseBB
1693/// FalseBB:
1694/// store i32 1, ptr %q, align 4
1695/// ...
1696/// TrueBB:
1697/// %maskedloadstore = load i32, ptr %b, align 4
1698/// store i32 %maskedloadstore, ptr %p, align 4
1699/// ...
1700/// \endcode
1701///
1702/// and transform it into:
1703///
1704/// \code
1705/// BB:
1706/// ...
1707/// %cond = icmp ult %x, %y
1708/// %maskedloadstore = cload i32, ptr %b, %cond
1709/// cstore i32 %maskedloadstore, ptr %p, %cond
1710/// cstore i32 1, ptr %q, ~%cond
1711/// br i1 %cond, label %TrueBB, label %FalseBB
1712/// FalseBB:
1713/// ...
1714/// TrueBB:
1715/// ...
1716/// \endcode
1717///
1718/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1719/// e.g.
1720///
1721/// \code
1722/// %vcond = bitcast i1 %cond to <1 x i1>
1723/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1724/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1725/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1726/// call void @llvm.masked.store.v1i32.p0
1727/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1728/// %cond.not = xor i1 %cond, true
1729/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1730/// call void @llvm.masked.store.v1i32.p0
1731/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1732/// \endcode
1733///
1734/// So we need to turn hoisted load/store into cload/cstore.
1735///
1736/// \param BI The branch instruction.
1737/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1738/// will be speculated.
1739/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1741 BranchInst *BI,
1742 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1743 std::optional<bool> Invert, Instruction *Sel) {
1744 auto &Context = BI->getParent()->getContext();
1745 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1746 auto *Cond = BI->getOperand(0);
1747 // Construct the condition if needed.
1748 BasicBlock *BB = BI->getParent();
1749 Value *Mask = nullptr;
1750 Value *MaskFalse = nullptr;
1751 Value *MaskTrue = nullptr;
1752 if (Invert.has_value()) {
1753 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1754 Mask = Builder.CreateBitCast(
1755 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1756 VCondTy);
1757 } else {
1758 IRBuilder<> Builder(BI);
1759 MaskFalse = Builder.CreateBitCast(
1760 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1761 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1762 }
1763 auto PeekThroughBitcasts = [](Value *V) {
1764 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1765 V = BitCast->getOperand(0);
1766 return V;
1767 };
1768 for (auto *I : SpeculatedConditionalLoadsStores) {
1769 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1770 if (!Invert.has_value())
1771 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1772 // We currently assume conditional faulting load/store is supported for
1773 // scalar types only when creating new instructions. This can be easily
1774 // extended for vector types in the future.
1775 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1776 auto *Op0 = I->getOperand(0);
1777 CallInst *MaskedLoadStore = nullptr;
1778 if (auto *LI = dyn_cast<LoadInst>(I)) {
1779 // Handle Load.
1780 auto *Ty = I->getType();
1781 PHINode *PN = nullptr;
1782 Value *PassThru = nullptr;
1783 if (Invert.has_value())
1784 for (User *U : I->users()) {
1785 if ((PN = dyn_cast<PHINode>(U))) {
1786 PassThru = Builder.CreateBitCast(
1787 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1788 FixedVectorType::get(Ty, 1));
1789 } else if (auto *Ins = cast<Instruction>(U);
1790 Sel && Ins->getParent() == BB) {
1791 // This happens when store or/and a speculative instruction between
1792 // load and store were hoisted to the BB. Make sure the masked load
1793 // inserted before its use.
1794 // We assume there's one of such use.
1795 Builder.SetInsertPoint(Ins);
1796 }
1797 }
1798 MaskedLoadStore = Builder.CreateMaskedLoad(
1799 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1800 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1801 if (PN)
1802 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1803 I->replaceAllUsesWith(NewLoadStore);
1804 } else {
1805 // Handle Store.
1806 auto *StoredVal = Builder.CreateBitCast(
1807 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1808 MaskedLoadStore = Builder.CreateMaskedStore(
1809 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1810 }
1811 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1812 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1813 //
1814 // !nonnull, !align : Not support pointer type, no need to keep.
1815 // !range: Load type is changed from scalar to vector, but the metadata on
1816 // vector specifies a per-element range, so the semantics stay the
1817 // same. Keep it.
1818 // !annotation: Not impact semantics. Keep it.
1819 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1820 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1821 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1822 // FIXME: DIAssignID is not supported for masked store yet.
1823 // (Verifier::visitDIAssignIDMetadata)
1825 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1826 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1827 });
1828 MaskedLoadStore->copyMetadata(*I);
1829 I->eraseFromParent();
1830 }
1831}
1832
1834 const TargetTransformInfo &TTI) {
1835 // Not handle volatile or atomic.
1836 bool IsStore = false;
1837 if (auto *L = dyn_cast<LoadInst>(I)) {
1838 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1839 return false;
1840 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1841 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1842 return false;
1843 IsStore = true;
1844 } else
1845 return false;
1846
1847 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1848 // That's why we have the alignment limitation.
1849 // FIXME: Update the prototype of the intrinsics?
1850 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1852}
1853
1854/// Hoist any common code in the successor blocks up into the block. This
1855/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1856/// given, only perform hoisting in case all successors blocks contain matching
1857/// instructions only. In that case, all instructions can be hoisted and the
1858/// original branch will be replaced and selects for PHIs are added.
1859bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1860 bool AllInstsEqOnly) {
1861 // This does very trivial matching, with limited scanning, to find identical
1862 // instructions in the two blocks. In particular, we don't want to get into
1863 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1864 // such, we currently just scan for obviously identical instructions in an
1865 // identical order, possibly separated by the same number of non-identical
1866 // instructions.
1867 BasicBlock *BB = TI->getParent();
1868 unsigned int SuccSize = succ_size(BB);
1869 if (SuccSize < 2)
1870 return false;
1871
1872 // If either of the blocks has it's address taken, then we can't do this fold,
1873 // because the code we'd hoist would no longer run when we jump into the block
1874 // by it's address.
1875 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1876 for (auto *Succ : UniqueSuccessors) {
1877 if (Succ->hasAddressTaken())
1878 return false;
1879 // Use getUniquePredecessor instead of getSinglePredecessor to support
1880 // multi-cases successors in switch.
1881 if (Succ->getUniquePredecessor())
1882 continue;
1883 // If Succ has >1 predecessors, continue to check if the Succ contains only
1884 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1885 // can relax the condition based on the assumptiom that the program would
1886 // never enter Succ and trigger such an UB.
1887 if (isa<UnreachableInst>(*Succ->begin()))
1888 continue;
1889 return false;
1890 }
1891 // The second of pair is a SkipFlags bitmask.
1892 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1893 SmallVector<SuccIterPair, 8> SuccIterPairs;
1894 for (auto *Succ : UniqueSuccessors) {
1895 BasicBlock::iterator SuccItr = Succ->begin();
1896 if (isa<PHINode>(*SuccItr))
1897 return false;
1898 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1899 }
1900
1901 if (AllInstsEqOnly) {
1902 // Check if all instructions in the successor blocks match. This allows
1903 // hoisting all instructions and removing the blocks we are hoisting from,
1904 // so does not add any new instructions.
1905
1906 // Check if sizes and terminators of all successors match.
1907 bool AllSame =
1908 none_of(UniqueSuccessors, [&UniqueSuccessors](BasicBlock *Succ) {
1909 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1910 Instruction *Term = Succ->getTerminator();
1911 return !Term->isSameOperationAs(Term0) ||
1912 !equal(Term->operands(), Term0->operands()) ||
1913 UniqueSuccessors[0]->size() != Succ->size();
1914 });
1915 if (!AllSame)
1916 return false;
1917 if (AllSame) {
1918 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1919 while (LRI.isValid()) {
1920 Instruction *I0 = (*LRI)[0];
1921 if (any_of(*LRI, [I0](Instruction *I) {
1922 return !areIdenticalUpToCommutativity(I0, I);
1923 })) {
1924 return false;
1925 }
1926 --LRI;
1927 }
1928 }
1929 // Now we know that all instructions in all successors can be hoisted. Let
1930 // the loop below handle the hoisting.
1931 }
1932
1933 // Count how many instructions were not hoisted so far. There's a limit on how
1934 // many instructions we skip, serving as a compilation time control as well as
1935 // preventing excessive increase of life ranges.
1936 unsigned NumSkipped = 0;
1937 // If we find an unreachable instruction at the beginning of a basic block, we
1938 // can still hoist instructions from the rest of the basic blocks.
1939 if (SuccIterPairs.size() > 2) {
1940 erase_if(SuccIterPairs,
1941 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1942 if (SuccIterPairs.size() < 2)
1943 return false;
1944 }
1945
1946 bool Changed = false;
1947
1948 for (;;) {
1949 auto *SuccIterPairBegin = SuccIterPairs.begin();
1950 auto &BB1ItrPair = *SuccIterPairBegin++;
1951 auto OtherSuccIterPairRange =
1952 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1953 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1954
1955 Instruction *I1 = &*BB1ItrPair.first;
1956
1957 bool AllInstsAreIdentical = true;
1958 bool HasTerminator = I1->isTerminator();
1959 for (auto &SuccIter : OtherSuccIterRange) {
1960 Instruction *I2 = &*SuccIter;
1961 HasTerminator |= I2->isTerminator();
1962 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1963 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1964 AllInstsAreIdentical = false;
1965 }
1966
1967 SmallVector<Instruction *, 8> OtherInsts;
1968 for (auto &SuccIter : OtherSuccIterRange)
1969 OtherInsts.push_back(&*SuccIter);
1970
1971 // If we are hoisting the terminator instruction, don't move one (making a
1972 // broken BB), instead clone it, and remove BI.
1973 if (HasTerminator) {
1974 // Even if BB, which contains only one unreachable instruction, is ignored
1975 // at the beginning of the loop, we can hoist the terminator instruction.
1976 // If any instructions remain in the block, we cannot hoist terminators.
1977 if (NumSkipped || !AllInstsAreIdentical) {
1978 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1979 return Changed;
1980 }
1981
1982 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1983 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1984 Changed;
1985 }
1986
1987 if (AllInstsAreIdentical) {
1988 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1989 AllInstsAreIdentical =
1990 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1991 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1992 Instruction *I2 = &*Pair.first;
1993 unsigned SkipFlagsBB2 = Pair.second;
1994 // Even if the instructions are identical, it may not
1995 // be safe to hoist them if we have skipped over
1996 // instructions with side effects or their operands
1997 // weren't hoisted.
1998 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2000 });
2001 }
2002
2003 if (AllInstsAreIdentical) {
2004 BB1ItrPair.first++;
2005 // For a normal instruction, we just move one to right before the
2006 // branch, then replace all uses of the other with the first. Finally,
2007 // we remove the now redundant second instruction.
2008 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2009 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2010 // and leave any that were not hoisted behind (by calling moveBefore
2011 // rather than moveBeforePreserving).
2012 I1->moveBefore(TI->getIterator());
2013 for (auto &SuccIter : OtherSuccIterRange) {
2014 Instruction *I2 = &*SuccIter++;
2015 assert(I2 != I1);
2016 if (!I2->use_empty())
2017 I2->replaceAllUsesWith(I1);
2018 I1->andIRFlags(I2);
2019 if (auto *CB = dyn_cast<CallBase>(I1)) {
2020 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2021 assert(Success && "We should not be trying to hoist callbases "
2022 "with non-intersectable attributes");
2023 // For NDEBUG Compile.
2024 (void)Success;
2025 }
2026
2027 combineMetadataForCSE(I1, I2, true);
2028 // I1 and I2 are being combined into a single instruction. Its debug
2029 // location is the merged locations of the original instructions.
2030 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2031 I2->eraseFromParent();
2032 }
2033 if (!Changed)
2034 NumHoistCommonCode += SuccIterPairs.size();
2035 Changed = true;
2036 NumHoistCommonInstrs += SuccIterPairs.size();
2037 } else {
2038 if (NumSkipped >= HoistCommonSkipLimit) {
2039 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2040 return Changed;
2041 }
2042 // We are about to skip over a pair of non-identical instructions. Record
2043 // if any have characteristics that would prevent reordering instructions
2044 // across them.
2045 for (auto &SuccIterPair : SuccIterPairs) {
2046 Instruction *I = &*SuccIterPair.first++;
2047 SuccIterPair.second |= skippedInstrFlags(I);
2048 }
2049 ++NumSkipped;
2050 }
2051 }
2052}
2053
2054bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2055 Instruction *TI, Instruction *I1,
2056 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2057 ArrayRef<BasicBlock *> UniqueSuccessors) {
2058
2059 auto *BI = dyn_cast<BranchInst>(TI);
2060
2061 bool Changed = false;
2062 BasicBlock *TIParent = TI->getParent();
2063 BasicBlock *BB1 = I1->getParent();
2064
2065 // Use only for an if statement.
2066 auto *I2 = *OtherSuccTIs.begin();
2067 auto *BB2 = I2->getParent();
2068 if (BI) {
2069 assert(OtherSuccTIs.size() == 1);
2070 assert(BI->getSuccessor(0) == I1->getParent());
2071 assert(BI->getSuccessor(1) == I2->getParent());
2072 }
2073
2074 // In the case of an if statement, we try to hoist an invoke.
2075 // FIXME: Can we define a safety predicate for CallBr?
2076 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2077 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2078 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2079 return false;
2080
2081 // TODO: callbr hoisting currently disabled pending further study.
2082 if (isa<CallBrInst>(I1))
2083 return false;
2084
2085 for (BasicBlock *Succ : successors(BB1)) {
2086 for (PHINode &PN : Succ->phis()) {
2087 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2088 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2089 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2090 if (BB1V == BB2V)
2091 continue;
2092
2093 // In the case of an if statement, check for
2094 // passingValueIsAlwaysUndefined here because we would rather eliminate
2095 // undefined control flow then converting it to a select.
2096 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2098 return false;
2099 }
2100 }
2101 }
2102
2103 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2104 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2105 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2106 // Clone the terminator and hoist it into the pred, without any debug info.
2107 Instruction *NT = I1->clone();
2108 NT->insertInto(TIParent, TI->getIterator());
2109 if (!NT->getType()->isVoidTy()) {
2110 I1->replaceAllUsesWith(NT);
2111 for (Instruction *OtherSuccTI : OtherSuccTIs)
2112 OtherSuccTI->replaceAllUsesWith(NT);
2113 NT->takeName(I1);
2114 }
2115 Changed = true;
2116 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2117
2118 // Ensure terminator gets a debug location, even an unknown one, in case
2119 // it involves inlinable calls.
2121 Locs.push_back(I1->getDebugLoc());
2122 for (auto *OtherSuccTI : OtherSuccTIs)
2123 Locs.push_back(OtherSuccTI->getDebugLoc());
2124 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2125
2126 // PHIs created below will adopt NT's merged DebugLoc.
2127 IRBuilder<NoFolder> Builder(NT);
2128
2129 // In the case of an if statement, hoisting one of the terminators from our
2130 // successor is a great thing. Unfortunately, the successors of the if/else
2131 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2132 // must agree for all PHI nodes, so we insert select instruction to compute
2133 // the final result.
2134 if (BI) {
2135 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2136 for (BasicBlock *Succ : successors(BB1)) {
2137 for (PHINode &PN : Succ->phis()) {
2138 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2139 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2140 if (BB1V == BB2V)
2141 continue;
2142
2143 // These values do not agree. Insert a select instruction before NT
2144 // that determines the right value.
2145 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2146 if (!SI) {
2147 // Propagate fast-math-flags from phi node to its replacement select.
2149 BI->getCondition(), BB1V, BB2V,
2150 isa<FPMathOperator>(PN) ? &PN : nullptr,
2151 BB1V->getName() + "." + BB2V->getName(), BI));
2152 }
2153
2154 // Make the PHI node use the select for all incoming values for BB1/BB2
2155 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2156 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2157 PN.setIncomingValue(i, SI);
2158 }
2159 }
2160 }
2161
2163
2164 // Update any PHI nodes in our new successors.
2165 for (BasicBlock *Succ : successors(BB1)) {
2166 addPredecessorToBlock(Succ, TIParent, BB1);
2167 if (DTU)
2168 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2169 }
2170
2171 if (DTU) {
2172 // TI might be a switch with multi-cases destination, so we need to care for
2173 // the duplication of successors.
2174 for (BasicBlock *Succ : UniqueSuccessors)
2175 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2176 }
2177
2179 if (DTU)
2180 DTU->applyUpdates(Updates);
2181 return Changed;
2182}
2183
2184// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2185// into variables.
2187 int OpIdx) {
2188 // Divide/Remainder by constant is typically much cheaper than by variable.
2189 if (I->isIntDivRem())
2190 return OpIdx != 1;
2191 return !isa<IntrinsicInst>(I);
2192}
2193
2194// All instructions in Insts belong to different blocks that all unconditionally
2195// branch to a common successor. Analyze each instruction and return true if it
2196// would be possible to sink them into their successor, creating one common
2197// instruction instead. For every value that would be required to be provided by
2198// PHI node (because an operand varies in each input block), add to PHIOperands.
2201 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2202 // Prune out obviously bad instructions to move. Each instruction must have
2203 // the same number of uses, and we check later that the uses are consistent.
2204 std::optional<unsigned> NumUses;
2205 for (auto *I : Insts) {
2206 // These instructions may change or break semantics if moved.
2207 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2208 I->getType()->isTokenTy())
2209 return false;
2210
2211 // Do not try to sink an instruction in an infinite loop - it can cause
2212 // this algorithm to infinite loop.
2213 if (I->getParent()->getSingleSuccessor() == I->getParent())
2214 return false;
2215
2216 // Conservatively return false if I is an inline-asm instruction. Sinking
2217 // and merging inline-asm instructions can potentially create arguments
2218 // that cannot satisfy the inline-asm constraints.
2219 // If the instruction has nomerge or convergent attribute, return false.
2220 if (const auto *C = dyn_cast<CallBase>(I))
2221 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2222 return false;
2223
2224 if (!NumUses)
2225 NumUses = I->getNumUses();
2226 else if (NumUses != I->getNumUses())
2227 return false;
2228 }
2229
2230 const Instruction *I0 = Insts.front();
2231 const auto I0MMRA = MMRAMetadata(*I0);
2232 for (auto *I : Insts) {
2233 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2234 return false;
2235
2236 // Treat MMRAs conservatively. This pass can be quite aggressive and
2237 // could drop a lot of MMRAs otherwise.
2238 if (MMRAMetadata(*I) != I0MMRA)
2239 return false;
2240 }
2241
2242 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2243 // then the other phi operands must match the instructions from Insts. This
2244 // also has to hold true for any phi nodes that would be created as a result
2245 // of sinking. Both of these cases are represented by PhiOperands.
2246 for (const Use &U : I0->uses()) {
2247 auto It = PHIOperands.find(&U);
2248 if (It == PHIOperands.end())
2249 // There may be uses in other blocks when sinking into a loop header.
2250 return false;
2251 if (!equal(Insts, It->second))
2252 return false;
2253 }
2254
2255 // For calls to be sinkable, they must all be indirect, or have same callee.
2256 // I.e. if we have two direct calls to different callees, we don't want to
2257 // turn that into an indirect call. Likewise, if we have an indirect call,
2258 // and a direct call, we don't actually want to have a single indirect call.
2259 if (isa<CallBase>(I0)) {
2260 auto IsIndirectCall = [](const Instruction *I) {
2261 return cast<CallBase>(I)->isIndirectCall();
2262 };
2263 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2264 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2265 if (HaveIndirectCalls) {
2266 if (!AllCallsAreIndirect)
2267 return false;
2268 } else {
2269 // All callees must be identical.
2270 Value *Callee = nullptr;
2271 for (const Instruction *I : Insts) {
2272 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2273 if (!Callee)
2274 Callee = CurrCallee;
2275 else if (Callee != CurrCallee)
2276 return false;
2277 }
2278 }
2279 }
2280
2281 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2282 Value *Op = I0->getOperand(OI);
2283 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2284 assert(I->getNumOperands() == I0->getNumOperands());
2285 return I->getOperand(OI) == I0->getOperand(OI);
2286 };
2287 if (!all_of(Insts, SameAsI0)) {
2290 // We can't create a PHI from this GEP.
2291 return false;
2292 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2293 for (auto *I : Insts)
2294 Ops.push_back(I->getOperand(OI));
2295 }
2296 }
2297 return true;
2298}
2299
2300// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2301// instruction of every block in Blocks to their common successor, commoning
2302// into one instruction.
2304 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2305
2306 // canSinkInstructions returning true guarantees that every block has at
2307 // least one non-terminator instruction.
2309 for (auto *BB : Blocks) {
2310 Instruction *I = BB->getTerminator();
2311 I = I->getPrevNode();
2312 Insts.push_back(I);
2313 }
2314
2315 // We don't need to do any more checking here; canSinkInstructions should
2316 // have done it all for us.
2317 SmallVector<Value*, 4> NewOperands;
2318 Instruction *I0 = Insts.front();
2319 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2320 // This check is different to that in canSinkInstructions. There, we
2321 // cared about the global view once simplifycfg (and instcombine) have
2322 // completed - it takes into account PHIs that become trivially
2323 // simplifiable. However here we need a more local view; if an operand
2324 // differs we create a PHI and rely on instcombine to clean up the very
2325 // small mess we may make.
2326 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2327 return I->getOperand(O) != I0->getOperand(O);
2328 });
2329 if (!NeedPHI) {
2330 NewOperands.push_back(I0->getOperand(O));
2331 continue;
2332 }
2333
2334 // Create a new PHI in the successor block and populate it.
2335 auto *Op = I0->getOperand(O);
2336 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2337 auto *PN =
2338 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2339 PN->insertBefore(BBEnd->begin());
2340 for (auto *I : Insts)
2341 PN->addIncoming(I->getOperand(O), I->getParent());
2342 NewOperands.push_back(PN);
2343 }
2344
2345 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2346 // and move it to the start of the successor block.
2347 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2348 I0->getOperandUse(O).set(NewOperands[O]);
2349
2350 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2351
2352 // Update metadata and IR flags, and merge debug locations.
2353 for (auto *I : Insts)
2354 if (I != I0) {
2355 // The debug location for the "common" instruction is the merged locations
2356 // of all the commoned instructions. We start with the original location
2357 // of the "common" instruction and iteratively merge each location in the
2358 // loop below.
2359 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2360 // However, as N-way merge for CallInst is rare, so we use simplified API
2361 // instead of using complex API for N-way merge.
2362 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2363 combineMetadataForCSE(I0, I, true);
2364 I0->andIRFlags(I);
2365 if (auto *CB = dyn_cast<CallBase>(I0)) {
2366 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2367 assert(Success && "We should not be trying to sink callbases "
2368 "with non-intersectable attributes");
2369 // For NDEBUG Compile.
2370 (void)Success;
2371 }
2372 }
2373
2374 for (User *U : make_early_inc_range(I0->users())) {
2375 // canSinkLastInstruction checked that all instructions are only used by
2376 // phi nodes in a way that allows replacing the phi node with the common
2377 // instruction.
2378 auto *PN = cast<PHINode>(U);
2379 PN->replaceAllUsesWith(I0);
2380 PN->eraseFromParent();
2381 }
2382
2383 // Finally nuke all instructions apart from the common instruction.
2384 for (auto *I : Insts) {
2385 if (I == I0)
2386 continue;
2387 // The remaining uses are debug users, replace those with the common inst.
2388 // In most (all?) cases this just introduces a use-before-def.
2389 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2390 I->replaceAllUsesWith(I0);
2391 I->eraseFromParent();
2392 }
2393}
2394
2395/// Check whether BB's predecessors end with unconditional branches. If it is
2396/// true, sink any common code from the predecessors to BB.
2398 DomTreeUpdater *DTU) {
2399 // We support two situations:
2400 // (1) all incoming arcs are unconditional
2401 // (2) there are non-unconditional incoming arcs
2402 //
2403 // (2) is very common in switch defaults and
2404 // else-if patterns;
2405 //
2406 // if (a) f(1);
2407 // else if (b) f(2);
2408 //
2409 // produces:
2410 //
2411 // [if]
2412 // / \
2413 // [f(1)] [if]
2414 // | | \
2415 // | | |
2416 // | [f(2)]|
2417 // \ | /
2418 // [ end ]
2419 //
2420 // [end] has two unconditional predecessor arcs and one conditional. The
2421 // conditional refers to the implicit empty 'else' arc. This conditional
2422 // arc can also be caused by an empty default block in a switch.
2423 //
2424 // In this case, we attempt to sink code from all *unconditional* arcs.
2425 // If we can sink instructions from these arcs (determined during the scan
2426 // phase below) we insert a common successor for all unconditional arcs and
2427 // connect that to [end], to enable sinking:
2428 //
2429 // [if]
2430 // / \
2431 // [x(1)] [if]
2432 // | | \
2433 // | | \
2434 // | [x(2)] |
2435 // \ / |
2436 // [sink.split] |
2437 // \ /
2438 // [ end ]
2439 //
2440 SmallVector<BasicBlock*,4> UnconditionalPreds;
2441 bool HaveNonUnconditionalPredecessors = false;
2442 for (auto *PredBB : predecessors(BB)) {
2443 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2444 if (PredBr && PredBr->isUnconditional())
2445 UnconditionalPreds.push_back(PredBB);
2446 else
2447 HaveNonUnconditionalPredecessors = true;
2448 }
2449 if (UnconditionalPreds.size() < 2)
2450 return false;
2451
2452 // We take a two-step approach to tail sinking. First we scan from the end of
2453 // each block upwards in lockstep. If the n'th instruction from the end of each
2454 // block can be sunk, those instructions are added to ValuesToSink and we
2455 // carry on. If we can sink an instruction but need to PHI-merge some operands
2456 // (because they're not identical in each instruction) we add these to
2457 // PHIOperands.
2458 // We prepopulate PHIOperands with the phis that already exist in BB.
2460 for (PHINode &PN : BB->phis()) {
2462 for (const Use &U : PN.incoming_values())
2463 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2464 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2465 for (BasicBlock *Pred : UnconditionalPreds)
2466 Ops.push_back(*IncomingVals[Pred]);
2467 }
2468
2469 int ScanIdx = 0;
2470 SmallPtrSet<Value*,4> InstructionsToSink;
2471 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2472 while (LRI.isValid() &&
2473 canSinkInstructions(*LRI, PHIOperands)) {
2474 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2475 << "\n");
2476 InstructionsToSink.insert_range(*LRI);
2477 ++ScanIdx;
2478 --LRI;
2479 }
2480
2481 // If no instructions can be sunk, early-return.
2482 if (ScanIdx == 0)
2483 return false;
2484
2485 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2486
2487 if (!followedByDeoptOrUnreachable) {
2488 // Check whether this is the pointer operand of a load/store.
2489 auto IsMemOperand = [](Use &U) {
2490 auto *I = cast<Instruction>(U.getUser());
2491 if (isa<LoadInst>(I))
2492 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2493 if (isa<StoreInst>(I))
2494 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2495 return false;
2496 };
2497
2498 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2499 // actually sink before encountering instruction that is unprofitable to
2500 // sink?
2501 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2502 unsigned NumPHIInsts = 0;
2503 for (Use &U : (*LRI)[0]->operands()) {
2504 auto It = PHIOperands.find(&U);
2505 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2506 return InstructionsToSink.contains(V);
2507 })) {
2508 ++NumPHIInsts;
2509 // Do not separate a load/store from the gep producing the address.
2510 // The gep can likely be folded into the load/store as an addressing
2511 // mode. Additionally, a load of a gep is easier to analyze than a
2512 // load of a phi.
2513 if (IsMemOperand(U) &&
2514 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2515 return false;
2516 // FIXME: this check is overly optimistic. We may end up not sinking
2517 // said instruction, due to the very same profitability check.
2518 // See @creating_too_many_phis in sink-common-code.ll.
2519 }
2520 }
2521 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2522 return NumPHIInsts <= 1;
2523 };
2524
2525 // We've determined that we are going to sink last ScanIdx instructions,
2526 // and recorded them in InstructionsToSink. Now, some instructions may be
2527 // unprofitable to sink. But that determination depends on the instructions
2528 // that we are going to sink.
2529
2530 // First, forward scan: find the first instruction unprofitable to sink,
2531 // recording all the ones that are profitable to sink.
2532 // FIXME: would it be better, after we detect that not all are profitable.
2533 // to either record the profitable ones, or erase the unprofitable ones?
2534 // Maybe we need to choose (at runtime) the one that will touch least
2535 // instrs?
2536 LRI.reset();
2537 int Idx = 0;
2538 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2539 while (Idx < ScanIdx) {
2540 if (!ProfitableToSinkInstruction(LRI)) {
2541 // Too many PHIs would be created.
2542 LLVM_DEBUG(
2543 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2544 break;
2545 }
2546 InstructionsProfitableToSink.insert_range(*LRI);
2547 --LRI;
2548 ++Idx;
2549 }
2550
2551 // If no instructions can be sunk, early-return.
2552 if (Idx == 0)
2553 return false;
2554
2555 // Did we determine that (only) some instructions are unprofitable to sink?
2556 if (Idx < ScanIdx) {
2557 // Okay, some instructions are unprofitable.
2558 ScanIdx = Idx;
2559 InstructionsToSink = InstructionsProfitableToSink;
2560
2561 // But, that may make other instructions unprofitable, too.
2562 // So, do a backward scan, do any earlier instructions become
2563 // unprofitable?
2564 assert(
2565 !ProfitableToSinkInstruction(LRI) &&
2566 "We already know that the last instruction is unprofitable to sink");
2567 ++LRI;
2568 --Idx;
2569 while (Idx >= 0) {
2570 // If we detect that an instruction becomes unprofitable to sink,
2571 // all earlier instructions won't be sunk either,
2572 // so preemptively keep InstructionsProfitableToSink in sync.
2573 // FIXME: is this the most performant approach?
2574 for (auto *I : *LRI)
2575 InstructionsProfitableToSink.erase(I);
2576 if (!ProfitableToSinkInstruction(LRI)) {
2577 // Everything starting with this instruction won't be sunk.
2578 ScanIdx = Idx;
2579 InstructionsToSink = InstructionsProfitableToSink;
2580 }
2581 ++LRI;
2582 --Idx;
2583 }
2584 }
2585
2586 // If no instructions can be sunk, early-return.
2587 if (ScanIdx == 0)
2588 return false;
2589 }
2590
2591 bool Changed = false;
2592
2593 if (HaveNonUnconditionalPredecessors) {
2594 if (!followedByDeoptOrUnreachable) {
2595 // It is always legal to sink common instructions from unconditional
2596 // predecessors. However, if not all predecessors are unconditional,
2597 // this transformation might be pessimizing. So as a rule of thumb,
2598 // don't do it unless we'd sink at least one non-speculatable instruction.
2599 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2600 LRI.reset();
2601 int Idx = 0;
2602 bool Profitable = false;
2603 while (Idx < ScanIdx) {
2604 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2605 Profitable = true;
2606 break;
2607 }
2608 --LRI;
2609 ++Idx;
2610 }
2611 if (!Profitable)
2612 return false;
2613 }
2614
2615 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2616 // We have a conditional edge and we're going to sink some instructions.
2617 // Insert a new block postdominating all blocks we're going to sink from.
2618 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2619 // Edges couldn't be split.
2620 return false;
2621 Changed = true;
2622 }
2623
2624 // Now that we've analyzed all potential sinking candidates, perform the
2625 // actual sink. We iteratively sink the last non-terminator of the source
2626 // blocks into their common successor unless doing so would require too
2627 // many PHI instructions to be generated (currently only one PHI is allowed
2628 // per sunk instruction).
2629 //
2630 // We can use InstructionsToSink to discount values needing PHI-merging that will
2631 // actually be sunk in a later iteration. This allows us to be more
2632 // aggressive in what we sink. This does allow a false positive where we
2633 // sink presuming a later value will also be sunk, but stop half way through
2634 // and never actually sink it which means we produce more PHIs than intended.
2635 // This is unlikely in practice though.
2636 int SinkIdx = 0;
2637 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2638 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2639 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2640 << "\n");
2641
2642 // Because we've sunk every instruction in turn, the current instruction to
2643 // sink is always at index 0.
2644 LRI.reset();
2645
2646 sinkLastInstruction(UnconditionalPreds);
2647 NumSinkCommonInstrs++;
2648 Changed = true;
2649 }
2650 if (SinkIdx != 0)
2651 ++NumSinkCommonCode;
2652 return Changed;
2653}
2654
2655namespace {
2656
2657struct CompatibleSets {
2658 using SetTy = SmallVector<InvokeInst *, 2>;
2659
2661
2662 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2663
2664 SetTy &getCompatibleSet(InvokeInst *II);
2665
2666 void insert(InvokeInst *II);
2667};
2668
2669CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2670 // Perform a linear scan over all the existing sets, see if the new `invoke`
2671 // is compatible with any particular set. Since we know that all the `invokes`
2672 // within a set are compatible, only check the first `invoke` in each set.
2673 // WARNING: at worst, this has quadratic complexity.
2674 for (CompatibleSets::SetTy &Set : Sets) {
2675 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2676 return Set;
2677 }
2678
2679 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2680 return Sets.emplace_back();
2681}
2682
2683void CompatibleSets::insert(InvokeInst *II) {
2684 getCompatibleSet(II).emplace_back(II);
2685}
2686
2687bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2688 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2689
2690 // Can we theoretically merge these `invoke`s?
2691 auto IsIllegalToMerge = [](InvokeInst *II) {
2692 return II->cannotMerge() || II->isInlineAsm();
2693 };
2694 if (any_of(Invokes, IsIllegalToMerge))
2695 return false;
2696
2697 // Either both `invoke`s must be direct,
2698 // or both `invoke`s must be indirect.
2699 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2700 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2701 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2702 if (HaveIndirectCalls) {
2703 if (!AllCallsAreIndirect)
2704 return false;
2705 } else {
2706 // All callees must be identical.
2707 Value *Callee = nullptr;
2708 for (InvokeInst *II : Invokes) {
2709 Value *CurrCallee = II->getCalledOperand();
2710 assert(CurrCallee && "There is always a called operand.");
2711 if (!Callee)
2712 Callee = CurrCallee;
2713 else if (Callee != CurrCallee)
2714 return false;
2715 }
2716 }
2717
2718 // Either both `invoke`s must not have a normal destination,
2719 // or both `invoke`s must have a normal destination,
2720 auto HasNormalDest = [](InvokeInst *II) {
2721 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2722 };
2723 if (any_of(Invokes, HasNormalDest)) {
2724 // Do not merge `invoke` that does not have a normal destination with one
2725 // that does have a normal destination, even though doing so would be legal.
2726 if (!all_of(Invokes, HasNormalDest))
2727 return false;
2728
2729 // All normal destinations must be identical.
2730 BasicBlock *NormalBB = nullptr;
2731 for (InvokeInst *II : Invokes) {
2732 BasicBlock *CurrNormalBB = II->getNormalDest();
2733 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2734 if (!NormalBB)
2735 NormalBB = CurrNormalBB;
2736 else if (NormalBB != CurrNormalBB)
2737 return false;
2738 }
2739
2740 // In the normal destination, the incoming values for these two `invoke`s
2741 // must be compatible.
2742 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2744 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2745 &EquivalenceSet))
2746 return false;
2747 }
2748
2749#ifndef NDEBUG
2750 // All unwind destinations must be identical.
2751 // We know that because we have started from said unwind destination.
2752 BasicBlock *UnwindBB = nullptr;
2753 for (InvokeInst *II : Invokes) {
2754 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2755 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2756 if (!UnwindBB)
2757 UnwindBB = CurrUnwindBB;
2758 else
2759 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2760 }
2761#endif
2762
2763 // In the unwind destination, the incoming values for these two `invoke`s
2764 // must be compatible.
2766 Invokes.front()->getUnwindDest(),
2767 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2768 return false;
2769
2770 // Ignoring arguments, these `invoke`s must be identical,
2771 // including operand bundles.
2772 const InvokeInst *II0 = Invokes.front();
2773 for (auto *II : Invokes.drop_front())
2774 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2775 return false;
2776
2777 // Can we theoretically form the data operands for the merged `invoke`?
2778 auto IsIllegalToMergeArguments = [](auto Ops) {
2779 Use &U0 = std::get<0>(Ops);
2780 Use &U1 = std::get<1>(Ops);
2781 if (U0 == U1)
2782 return false;
2784 U0.getOperandNo());
2785 };
2786 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2787 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2788 IsIllegalToMergeArguments))
2789 return false;
2790
2791 return true;
2792}
2793
2794} // namespace
2795
2796// Merge all invokes in the provided set, all of which are compatible
2797// as per the `CompatibleSets::shouldBelongToSameSet()`.
2799 DomTreeUpdater *DTU) {
2800 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2801
2803 if (DTU)
2804 Updates.reserve(2 + 3 * Invokes.size());
2805
2806 bool HasNormalDest =
2807 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2808
2809 // Clone one of the invokes into a new basic block.
2810 // Since they are all compatible, it doesn't matter which invoke is cloned.
2811 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2812 InvokeInst *II0 = Invokes.front();
2813 BasicBlock *II0BB = II0->getParent();
2814 BasicBlock *InsertBeforeBlock =
2815 II0->getParent()->getIterator()->getNextNode();
2816 Function *Func = II0BB->getParent();
2817 LLVMContext &Ctx = II0->getContext();
2818
2819 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2820 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2821
2822 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2823 // NOTE: all invokes have the same attributes, so no handling needed.
2824 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2825
2826 if (!HasNormalDest) {
2827 // This set does not have a normal destination,
2828 // so just form a new block with unreachable terminator.
2829 BasicBlock *MergedNormalDest = BasicBlock::Create(
2830 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2831 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2832 UI->setDebugLoc(DebugLoc::getTemporary());
2833 MergedInvoke->setNormalDest(MergedNormalDest);
2834 }
2835
2836 // The unwind destination, however, remainds identical for all invokes here.
2837
2838 return MergedInvoke;
2839 }();
2840
2841 if (DTU) {
2842 // Predecessor blocks that contained these invokes will now branch to
2843 // the new block that contains the merged invoke, ...
2844 for (InvokeInst *II : Invokes)
2845 Updates.push_back(
2846 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2847
2848 // ... which has the new `unreachable` block as normal destination,
2849 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2850 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2851 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2852 SuccBBOfMergedInvoke});
2853
2854 // Since predecessor blocks now unconditionally branch to a new block,
2855 // they no longer branch to their original successors.
2856 for (InvokeInst *II : Invokes)
2857 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2858 Updates.push_back(
2859 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2860 }
2861
2862 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2863
2864 // Form the merged operands for the merged invoke.
2865 for (Use &U : MergedInvoke->operands()) {
2866 // Only PHI together the indirect callees and data operands.
2867 if (MergedInvoke->isCallee(&U)) {
2868 if (!IsIndirectCall)
2869 continue;
2870 } else if (!MergedInvoke->isDataOperand(&U))
2871 continue;
2872
2873 // Don't create trivial PHI's with all-identical incoming values.
2874 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2875 return II->getOperand(U.getOperandNo()) != U.get();
2876 });
2877 if (!NeedPHI)
2878 continue;
2879
2880 // Form a PHI out of all the data ops under this index.
2882 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2883 for (InvokeInst *II : Invokes)
2884 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2885
2886 U.set(PN);
2887 }
2888
2889 // We've ensured that each PHI node has compatible (identical) incoming values
2890 // when coming from each of the `invoke`s in the current merge set,
2891 // so update the PHI nodes accordingly.
2892 for (BasicBlock *Succ : successors(MergedInvoke))
2893 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2894 /*ExistPred=*/Invokes.front()->getParent());
2895
2896 // And finally, replace the original `invoke`s with an unconditional branch
2897 // to the block with the merged `invoke`. Also, give that merged `invoke`
2898 // the merged debugloc of all the original `invoke`s.
2899 DILocation *MergedDebugLoc = nullptr;
2900 for (InvokeInst *II : Invokes) {
2901 // Compute the debug location common to all the original `invoke`s.
2902 if (!MergedDebugLoc)
2903 MergedDebugLoc = II->getDebugLoc();
2904 else
2905 MergedDebugLoc =
2906 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2907
2908 // And replace the old `invoke` with an unconditionally branch
2909 // to the block with the merged `invoke`.
2910 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2911 OrigSuccBB->removePredecessor(II->getParent());
2912 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2913 // The unconditional branch is part of the replacement for the original
2914 // invoke, so should use its DebugLoc.
2915 BI->setDebugLoc(II->getDebugLoc());
2916 bool Success = MergedInvoke->tryIntersectAttributes(II);
2917 assert(Success && "Merged invokes with incompatible attributes");
2918 // For NDEBUG Compile
2919 (void)Success;
2920 II->replaceAllUsesWith(MergedInvoke);
2921 II->eraseFromParent();
2922 ++NumInvokesMerged;
2923 }
2924 MergedInvoke->setDebugLoc(MergedDebugLoc);
2925 ++NumInvokeSetsFormed;
2926
2927 if (DTU)
2928 DTU->applyUpdates(Updates);
2929}
2930
2931/// If this block is a `landingpad` exception handling block, categorize all
2932/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2933/// being "mergeable" together, and then merge invokes in each set together.
2934///
2935/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2936/// [...] [...]
2937/// | |
2938/// [invoke0] [invoke1]
2939/// / \ / \
2940/// [cont0] [landingpad] [cont1]
2941/// to:
2942/// [...] [...]
2943/// \ /
2944/// [invoke]
2945/// / \
2946/// [cont] [landingpad]
2947///
2948/// But of course we can only do that if the invokes share the `landingpad`,
2949/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2950/// and the invoked functions are "compatible".
2953 return false;
2954
2955 bool Changed = false;
2956
2957 // FIXME: generalize to all exception handling blocks?
2958 if (!BB->isLandingPad())
2959 return Changed;
2960
2961 CompatibleSets Grouper;
2962
2963 // Record all the predecessors of this `landingpad`. As per verifier,
2964 // the only allowed predecessor is the unwind edge of an `invoke`.
2965 // We want to group "compatible" `invokes` into the same set to be merged.
2966 for (BasicBlock *PredBB : predecessors(BB))
2967 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2968
2969 // And now, merge `invoke`s that were grouped togeter.
2970 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2971 if (Invokes.size() < 2)
2972 continue;
2973 Changed = true;
2974 mergeCompatibleInvokesImpl(Invokes, DTU);
2975 }
2976
2977 return Changed;
2978}
2979
2980namespace {
2981/// Track ephemeral values, which should be ignored for cost-modelling
2982/// purposes. Requires walking instructions in reverse order.
2983class EphemeralValueTracker {
2984 SmallPtrSet<const Instruction *, 32> EphValues;
2985
2986 bool isEphemeral(const Instruction *I) {
2987 if (isa<AssumeInst>(I))
2988 return true;
2989 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2990 all_of(I->users(), [&](const User *U) {
2991 return EphValues.count(cast<Instruction>(U));
2992 });
2993 }
2994
2995public:
2996 bool track(const Instruction *I) {
2997 if (isEphemeral(I)) {
2998 EphValues.insert(I);
2999 return true;
3000 }
3001 return false;
3002 }
3003
3004 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3005};
3006} // namespace
3007
3008/// Determine if we can hoist sink a sole store instruction out of a
3009/// conditional block.
3010///
3011/// We are looking for code like the following:
3012/// BrBB:
3013/// store i32 %add, i32* %arrayidx2
3014/// ... // No other stores or function calls (we could be calling a memory
3015/// ... // function).
3016/// %cmp = icmp ult %x, %y
3017/// br i1 %cmp, label %EndBB, label %ThenBB
3018/// ThenBB:
3019/// store i32 %add5, i32* %arrayidx2
3020/// br label EndBB
3021/// EndBB:
3022/// ...
3023/// We are going to transform this into:
3024/// BrBB:
3025/// store i32 %add, i32* %arrayidx2
3026/// ... //
3027/// %cmp = icmp ult %x, %y
3028/// %add.add5 = select i1 %cmp, i32 %add, %add5
3029/// store i32 %add.add5, i32* %arrayidx2
3030/// ...
3031///
3032/// \return The pointer to the value of the previous store if the store can be
3033/// hoisted into the predecessor block. 0 otherwise.
3035 BasicBlock *StoreBB, BasicBlock *EndBB) {
3036 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3037 if (!StoreToHoist)
3038 return nullptr;
3039
3040 // Volatile or atomic.
3041 if (!StoreToHoist->isSimple())
3042 return nullptr;
3043
3044 Value *StorePtr = StoreToHoist->getPointerOperand();
3045 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3046
3047 // Look for a store to the same pointer in BrBB.
3048 unsigned MaxNumInstToLookAt = 9;
3049 // Skip pseudo probe intrinsic calls which are not really killing any memory
3050 // accesses.
3051 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3052 if (!MaxNumInstToLookAt)
3053 break;
3054 --MaxNumInstToLookAt;
3055
3056 // Could be calling an instruction that affects memory like free().
3057 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3058 return nullptr;
3059
3060 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3061 // Found the previous store to same location and type. Make sure it is
3062 // simple, to avoid introducing a spurious non-atomic write after an
3063 // atomic write.
3064 if (SI->getPointerOperand() == StorePtr &&
3065 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3066 SI->getAlign() >= StoreToHoist->getAlign())
3067 // Found the previous store, return its value operand.
3068 return SI->getValueOperand();
3069 return nullptr; // Unknown store.
3070 }
3071
3072 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3073 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3074 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3075 Value *Obj = getUnderlyingObject(StorePtr);
3076 bool ExplicitlyDereferenceableOnly;
3077 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3079 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3081 (!ExplicitlyDereferenceableOnly ||
3082 isDereferenceablePointer(StorePtr, StoreTy,
3083 LI->getDataLayout()))) {
3084 // Found a previous load, return it.
3085 return LI;
3086 }
3087 }
3088 // The load didn't work out, but we may still find a store.
3089 }
3090 }
3091
3092 return nullptr;
3093}
3094
3095/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3096/// converted to selects.
3098 BasicBlock *EndBB,
3099 unsigned &SpeculatedInstructions,
3100 InstructionCost &Cost,
3101 const TargetTransformInfo &TTI) {
3103 BB->getParent()->hasMinSize()
3106
3107 bool HaveRewritablePHIs = false;
3108 for (PHINode &PN : EndBB->phis()) {
3109 Value *OrigV = PN.getIncomingValueForBlock(BB);
3110 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3111
3112 // FIXME: Try to remove some of the duplication with
3113 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3114 if (ThenV == OrigV)
3115 continue;
3116
3117 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3118 CmpInst::makeCmpResultType(PN.getType()),
3120
3121 // Don't convert to selects if we could remove undefined behavior instead.
3122 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3124 return false;
3125
3126 HaveRewritablePHIs = true;
3127 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3128 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3129 if (!OrigCE && !ThenCE)
3130 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3131
3132 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3133 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3134 InstructionCost MaxCost =
3136 if (OrigCost + ThenCost > MaxCost)
3137 return false;
3138
3139 // Account for the cost of an unfolded ConstantExpr which could end up
3140 // getting expanded into Instructions.
3141 // FIXME: This doesn't account for how many operations are combined in the
3142 // constant expression.
3143 ++SpeculatedInstructions;
3144 if (SpeculatedInstructions > 1)
3145 return false;
3146 }
3147
3148 return HaveRewritablePHIs;
3149}
3150
3152 std::optional<bool> Invert,
3153 const TargetTransformInfo &TTI) {
3154 // If the branch is non-unpredictable, and is predicted to *not* branch to
3155 // the `then` block, then avoid speculating it.
3156 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3157 return true;
3158
3159 uint64_t TWeight, FWeight;
3160 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3161 return true;
3162
3163 if (!Invert.has_value())
3164 return false;
3165
3166 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3167 BranchProbability BIEndProb =
3168 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3169 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3170 return BIEndProb < Likely;
3171}
3172
3173/// Speculate a conditional basic block flattening the CFG.
3174///
3175/// Note that this is a very risky transform currently. Speculating
3176/// instructions like this is most often not desirable. Instead, there is an MI
3177/// pass which can do it with full awareness of the resource constraints.
3178/// However, some cases are "obvious" and we should do directly. An example of
3179/// this is speculating a single, reasonably cheap instruction.
3180///
3181/// There is only one distinct advantage to flattening the CFG at the IR level:
3182/// it makes very common but simplistic optimizations such as are common in
3183/// instcombine and the DAG combiner more powerful by removing CFG edges and
3184/// modeling their effects with easier to reason about SSA value graphs.
3185///
3186///
3187/// An illustration of this transform is turning this IR:
3188/// \code
3189/// BB:
3190/// %cmp = icmp ult %x, %y
3191/// br i1 %cmp, label %EndBB, label %ThenBB
3192/// ThenBB:
3193/// %sub = sub %x, %y
3194/// br label BB2
3195/// EndBB:
3196/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3197/// ...
3198/// \endcode
3199///
3200/// Into this IR:
3201/// \code
3202/// BB:
3203/// %cmp = icmp ult %x, %y
3204/// %sub = sub %x, %y
3205/// %cond = select i1 %cmp, 0, %sub
3206/// ...
3207/// \endcode
3208///
3209/// \returns true if the conditional block is removed.
3210bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3211 BasicBlock *ThenBB) {
3212 if (!Options.SpeculateBlocks)
3213 return false;
3214
3215 // Be conservative for now. FP select instruction can often be expensive.
3216 Value *BrCond = BI->getCondition();
3217 if (isa<FCmpInst>(BrCond))
3218 return false;
3219
3220 BasicBlock *BB = BI->getParent();
3221 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3222 InstructionCost Budget =
3224
3225 // If ThenBB is actually on the false edge of the conditional branch, remember
3226 // to swap the select operands later.
3227 bool Invert = false;
3228 if (ThenBB != BI->getSuccessor(0)) {
3229 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3230 Invert = true;
3231 }
3232 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3233
3234 if (!isProfitableToSpeculate(BI, Invert, TTI))
3235 return false;
3236
3237 // Keep a count of how many times instructions are used within ThenBB when
3238 // they are candidates for sinking into ThenBB. Specifically:
3239 // - They are defined in BB, and
3240 // - They have no side effects, and
3241 // - All of their uses are in ThenBB.
3242 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3243
3244 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3245
3246 unsigned SpeculatedInstructions = 0;
3247 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3248 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3249 Value *SpeculatedStoreValue = nullptr;
3250 StoreInst *SpeculatedStore = nullptr;
3251 EphemeralValueTracker EphTracker;
3252 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3253 // Skip pseudo probes. The consequence is we lose track of the branch
3254 // probability for ThenBB, which is fine since the optimization here takes
3255 // place regardless of the branch probability.
3256 if (isa<PseudoProbeInst>(I)) {
3257 // The probe should be deleted so that it will not be over-counted when
3258 // the samples collected on the non-conditional path are counted towards
3259 // the conditional path. We leave it for the counts inference algorithm to
3260 // figure out a proper count for an unknown probe.
3261 SpeculatedPseudoProbes.push_back(&I);
3262 continue;
3263 }
3264
3265 // Ignore ephemeral values, they will be dropped by the transform.
3266 if (EphTracker.track(&I))
3267 continue;
3268
3269 // Only speculatively execute a single instruction (not counting the
3270 // terminator) for now.
3271 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3273 SpeculatedConditionalLoadsStores.size() <
3275 // Not count load/store into cost if target supports conditional faulting
3276 // b/c it's cheap to speculate it.
3277 if (IsSafeCheapLoadStore)
3278 SpeculatedConditionalLoadsStores.push_back(&I);
3279 else
3280 ++SpeculatedInstructions;
3281
3282 if (SpeculatedInstructions > 1)
3283 return false;
3284
3285 // Don't hoist the instruction if it's unsafe or expensive.
3286 if (!IsSafeCheapLoadStore &&
3288 !(HoistCondStores && !SpeculatedStoreValue &&
3289 (SpeculatedStoreValue =
3290 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3291 return false;
3292 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3295 return false;
3296
3297 // Store the store speculation candidate.
3298 if (!SpeculatedStore && SpeculatedStoreValue)
3299 SpeculatedStore = cast<StoreInst>(&I);
3300
3301 // Do not hoist the instruction if any of its operands are defined but not
3302 // used in BB. The transformation will prevent the operand from
3303 // being sunk into the use block.
3304 for (Use &Op : I.operands()) {
3306 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3307 continue; // Not a candidate for sinking.
3308
3309 ++SinkCandidateUseCounts[OpI];
3310 }
3311 }
3312
3313 // Consider any sink candidates which are only used in ThenBB as costs for
3314 // speculation. Note, while we iterate over a DenseMap here, we are summing
3315 // and so iteration order isn't significant.
3316 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3317 if (Inst->hasNUses(Count)) {
3318 ++SpeculatedInstructions;
3319 if (SpeculatedInstructions > 1)
3320 return false;
3321 }
3322
3323 // Check that we can insert the selects and that it's not too expensive to do
3324 // so.
3325 bool Convert =
3326 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3328 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3329 SpeculatedInstructions, Cost, TTI);
3330 if (!Convert || Cost > Budget)
3331 return false;
3332
3333 // If we get here, we can hoist the instruction and if-convert.
3334 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3335
3336 Instruction *Sel = nullptr;
3337 // Insert a select of the value of the speculated store.
3338 if (SpeculatedStoreValue) {
3339 IRBuilder<NoFolder> Builder(BI);
3340 Value *OrigV = SpeculatedStore->getValueOperand();
3341 Value *TrueV = SpeculatedStore->getValueOperand();
3342 Value *FalseV = SpeculatedStoreValue;
3343 if (Invert)
3344 std::swap(TrueV, FalseV);
3345 Value *S = Builder.CreateSelect(
3346 BrCond, TrueV, FalseV, "spec.store.select", BI);
3347 Sel = cast<Instruction>(S);
3348 SpeculatedStore->setOperand(0, S);
3349 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3350 SpeculatedStore->getDebugLoc());
3351 // The value stored is still conditional, but the store itself is now
3352 // unconditonally executed, so we must be sure that any linked dbg.assign
3353 // intrinsics are tracking the new stored value (the result of the
3354 // select). If we don't, and the store were to be removed by another pass
3355 // (e.g. DSE), then we'd eventually end up emitting a location describing
3356 // the conditional value, unconditionally.
3357 //
3358 // === Before this transformation ===
3359 // pred:
3360 // store %one, %x.dest, !DIAssignID !1
3361 // dbg.assign %one, "x", ..., !1, ...
3362 // br %cond if.then
3363 //
3364 // if.then:
3365 // store %two, %x.dest, !DIAssignID !2
3366 // dbg.assign %two, "x", ..., !2, ...
3367 //
3368 // === After this transformation ===
3369 // pred:
3370 // store %one, %x.dest, !DIAssignID !1
3371 // dbg.assign %one, "x", ..., !1
3372 /// ...
3373 // %merge = select %cond, %two, %one
3374 // store %merge, %x.dest, !DIAssignID !2
3375 // dbg.assign %merge, "x", ..., !2
3376 for (DbgVariableRecord *DbgAssign :
3377 at::getDVRAssignmentMarkers(SpeculatedStore))
3378 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3379 DbgAssign->replaceVariableLocationOp(OrigV, S);
3380 }
3381
3382 // Metadata can be dependent on the condition we are hoisting above.
3383 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3384 // to avoid making it appear as if the condition is a constant, which would
3385 // be misleading while debugging.
3386 // Similarly strip attributes that maybe dependent on condition we are
3387 // hoisting above.
3388 for (auto &I : make_early_inc_range(*ThenBB)) {
3389 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3390 I.dropLocation();
3391 }
3392 I.dropUBImplyingAttrsAndMetadata();
3393
3394 // Drop ephemeral values.
3395 if (EphTracker.contains(&I)) {
3396 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3397 I.eraseFromParent();
3398 }
3399 }
3400
3401 // Hoist the instructions.
3402 // Drop DbgVariableRecords attached to these instructions.
3403 for (auto &It : *ThenBB)
3404 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3405 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3406 // equivalent).
3407 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3408 !DVR || !DVR->isDbgAssign())
3409 It.dropOneDbgRecord(&DR);
3410 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3411 std::prev(ThenBB->end()));
3412
3413 if (!SpeculatedConditionalLoadsStores.empty())
3414 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3415 Sel);
3416
3417 // Insert selects and rewrite the PHI operands.
3418 IRBuilder<NoFolder> Builder(BI);
3419 for (PHINode &PN : EndBB->phis()) {
3420 unsigned OrigI = PN.getBasicBlockIndex(BB);
3421 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3422 Value *OrigV = PN.getIncomingValue(OrigI);
3423 Value *ThenV = PN.getIncomingValue(ThenI);
3424
3425 // Skip PHIs which are trivial.
3426 if (OrigV == ThenV)
3427 continue;
3428
3429 // Create a select whose true value is the speculatively executed value and
3430 // false value is the pre-existing value. Swap them if the branch
3431 // destinations were inverted.
3432 Value *TrueV = ThenV, *FalseV = OrigV;
3433 if (Invert)
3434 std::swap(TrueV, FalseV);
3435 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3436 PN.setIncomingValue(OrigI, V);
3437 PN.setIncomingValue(ThenI, V);
3438 }
3439
3440 // Remove speculated pseudo probes.
3441 for (Instruction *I : SpeculatedPseudoProbes)
3442 I->eraseFromParent();
3443
3444 ++NumSpeculations;
3445 return true;
3446}
3447
3449
3450// Return false if number of blocks searched is too much.
3451static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3452 BlocksSet &ReachesNonLocalUses) {
3453 if (BB == DefBB)
3454 return true;
3455 if (!ReachesNonLocalUses.insert(BB).second)
3456 return true;
3457
3458 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3459 return false;
3460 for (BasicBlock *Pred : predecessors(BB))
3461 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3462 return false;
3463 return true;
3464}
3465
3466/// Return true if we can thread a branch across this block.
3468 BlocksSet &NonLocalUseBlocks) {
3469 int Size = 0;
3470 EphemeralValueTracker EphTracker;
3471
3472 // Walk the loop in reverse so that we can identify ephemeral values properly
3473 // (values only feeding assumes).
3474 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3475 // Can't fold blocks that contain noduplicate or convergent calls.
3476 if (CallInst *CI = dyn_cast<CallInst>(&I))
3477 if (CI->cannotDuplicate() || CI->isConvergent())
3478 return false;
3479
3480 // Ignore ephemeral values which are deleted during codegen.
3481 // We will delete Phis while threading, so Phis should not be accounted in
3482 // block's size.
3483 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3484 if (Size++ > MaxSmallBlockSize)
3485 return false; // Don't clone large BB's.
3486 }
3487
3488 // Record blocks with non-local uses of values defined in the current basic
3489 // block.
3490 for (User *U : I.users()) {
3492 BasicBlock *UsedInBB = UI->getParent();
3493 if (UsedInBB == BB) {
3494 if (isa<PHINode>(UI))
3495 return false;
3496 } else
3497 NonLocalUseBlocks.insert(UsedInBB);
3498 }
3499
3500 // Looks ok, continue checking.
3501 }
3502
3503 return true;
3504}
3505
3507 BasicBlock *To) {
3508 // Don't look past the block defining the value, we might get the value from
3509 // a previous loop iteration.
3510 auto *I = dyn_cast<Instruction>(V);
3511 if (I && I->getParent() == To)
3512 return nullptr;
3513
3514 // We know the value if the From block branches on it.
3515 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3516 if (BI && BI->isConditional() && BI->getCondition() == V &&
3517 BI->getSuccessor(0) != BI->getSuccessor(1))
3518 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3520
3521 return nullptr;
3522}
3523
3524/// If we have a conditional branch on something for which we know the constant
3525/// value in predecessors (e.g. a phi node in the current block), thread edges
3526/// from the predecessor to their ultimate destination.
3527static std::optional<bool>
3529 const DataLayout &DL,
3530 AssumptionCache *AC) {
3532 BasicBlock *BB = BI->getParent();
3533 Value *Cond = BI->getCondition();
3535 if (PN && PN->getParent() == BB) {
3536 // Degenerate case of a single entry PHI.
3537 if (PN->getNumIncomingValues() == 1) {
3539 return true;
3540 }
3541
3542 for (Use &U : PN->incoming_values())
3543 if (auto *CB = dyn_cast<ConstantInt>(U))
3544 KnownValues[CB].insert(PN->getIncomingBlock(U));
3545 } else {
3546 for (BasicBlock *Pred : predecessors(BB)) {
3547 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3548 KnownValues[CB].insert(Pred);
3549 }
3550 }
3551
3552 if (KnownValues.empty())
3553 return false;
3554
3555 // Now we know that this block has multiple preds and two succs.
3556 // Check that the block is small enough and record which non-local blocks use
3557 // values defined in the block.
3558
3559 BlocksSet NonLocalUseBlocks;
3560 BlocksSet ReachesNonLocalUseBlocks;
3561 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3562 return false;
3563
3564 // Jump-threading can only be done to destinations where no values defined
3565 // in BB are live.
3566
3567 // Quickly check if both destinations have uses. If so, jump-threading cannot
3568 // be done.
3569 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3570 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3571 return false;
3572
3573 // Search backward from NonLocalUseBlocks to find which blocks
3574 // reach non-local uses.
3575 for (BasicBlock *UseBB : NonLocalUseBlocks)
3576 // Give up if too many blocks are searched.
3577 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3578 return false;
3579
3580 for (const auto &Pair : KnownValues) {
3581 ConstantInt *CB = Pair.first;
3582 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3583 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3584
3585 // Okay, we now know that all edges from PredBB should be revectored to
3586 // branch to RealDest.
3587 if (RealDest == BB)
3588 continue; // Skip self loops.
3589
3590 // Skip if the predecessor's terminator is an indirect branch.
3591 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3592 return isa<IndirectBrInst>(PredBB->getTerminator());
3593 }))
3594 continue;
3595
3596 // Only revector to RealDest if no values defined in BB are live.
3597 if (ReachesNonLocalUseBlocks.contains(RealDest))
3598 continue;
3599
3600 LLVM_DEBUG({
3601 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3602 << " has value " << *Pair.first << " in predecessors:\n";
3603 for (const BasicBlock *PredBB : Pair.second)
3604 dbgs() << " " << PredBB->getName() << "\n";
3605 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3606 });
3607
3608 // Split the predecessors we are threading into a new edge block. We'll
3609 // clone the instructions into this block, and then redirect it to RealDest.
3610 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3611
3612 // TODO: These just exist to reduce test diff, we can drop them if we like.
3613 EdgeBB->setName(RealDest->getName() + ".critedge");
3614 EdgeBB->moveBefore(RealDest);
3615
3616 // Update PHI nodes.
3617 addPredecessorToBlock(RealDest, EdgeBB, BB);
3618
3619 // BB may have instructions that are being threaded over. Clone these
3620 // instructions into EdgeBB. We know that there will be no uses of the
3621 // cloned instructions outside of EdgeBB.
3622 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3623 ValueToValueMapTy TranslateMap; // Track translated values.
3624 TranslateMap[Cond] = CB;
3625
3626 // RemoveDIs: track instructions that we optimise away while folding, so
3627 // that we can copy DbgVariableRecords from them later.
3628 BasicBlock::iterator SrcDbgCursor = BB->begin();
3629 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3630 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3631 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3632 continue;
3633 }
3634 // Clone the instruction.
3635 Instruction *N = BBI->clone();
3636 // Insert the new instruction into its new home.
3637 N->insertInto(EdgeBB, InsertPt);
3638
3639 if (BBI->hasName())
3640 N->setName(BBI->getName() + ".c");
3641
3642 // Update operands due to translation.
3643 // Key Instructions: Remap all the atom groups.
3644 if (const DebugLoc &DL = BBI->getDebugLoc())
3645 mapAtomInstance(DL, TranslateMap);
3646 RemapInstruction(N, TranslateMap,
3648
3649 // Check for trivial simplification.
3650 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3651 if (!BBI->use_empty())
3652 TranslateMap[&*BBI] = V;
3653 if (!N->mayHaveSideEffects()) {
3654 N->eraseFromParent(); // Instruction folded away, don't need actual
3655 // inst
3656 N = nullptr;
3657 }
3658 } else {
3659 if (!BBI->use_empty())
3660 TranslateMap[&*BBI] = N;
3661 }
3662 if (N) {
3663 // Copy all debug-info attached to instructions from the last we
3664 // successfully clone, up to this instruction (they might have been
3665 // folded away).
3666 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3667 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3668 SrcDbgCursor = std::next(BBI);
3669 // Clone debug-info on this instruction too.
3670 N->cloneDebugInfoFrom(&*BBI);
3671
3672 // Register the new instruction with the assumption cache if necessary.
3673 if (auto *Assume = dyn_cast<AssumeInst>(N))
3674 if (AC)
3675 AC->registerAssumption(Assume);
3676 }
3677 }
3678
3679 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3680 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3681 InsertPt->cloneDebugInfoFrom(BI);
3682
3683 BB->removePredecessor(EdgeBB);
3684 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3685 EdgeBI->setSuccessor(0, RealDest);
3686 EdgeBI->setDebugLoc(BI->getDebugLoc());
3687
3688 if (DTU) {
3690 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3691 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3692 DTU->applyUpdates(Updates);
3693 }
3694
3695 // For simplicity, we created a separate basic block for the edge. Merge
3696 // it back into the predecessor if possible. This not only avoids
3697 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3698 // bypass the check for trivial cycles above.
3699 MergeBlockIntoPredecessor(EdgeBB, DTU);
3700
3701 // Signal repeat, simplifying any other constants.
3702 return std::nullopt;
3703 }
3704
3705 return false;
3706}
3707
3708bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3709 // Note: If BB is a loop header then there is a risk that threading introduces
3710 // a non-canonical loop by moving a back edge. So we avoid this optimization
3711 // for loop headers if NeedCanonicalLoop is set.
3712 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3713 return false;
3714
3715 std::optional<bool> Result;
3716 bool EverChanged = false;
3717 do {
3718 // Note that None means "we changed things, but recurse further."
3719 Result =
3721 EverChanged |= Result == std::nullopt || *Result;
3722 } while (Result == std::nullopt);
3723 return EverChanged;
3724}
3725
3726/// Given a BB that starts with the specified two-entry PHI node,
3727/// see if we can eliminate it.
3730 const DataLayout &DL,
3731 bool SpeculateUnpredictables) {
3732 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3733 // statement", which has a very simple dominance structure. Basically, we
3734 // are trying to find the condition that is being branched on, which
3735 // subsequently causes this merge to happen. We really want control
3736 // dependence information for this check, but simplifycfg can't keep it up
3737 // to date, and this catches most of the cases we care about anyway.
3738 BasicBlock *BB = PN->getParent();
3739
3740 BasicBlock *IfTrue, *IfFalse;
3741 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3742 if (!DomBI)
3743 return false;
3744 Value *IfCond = DomBI->getCondition();
3745 // Don't bother if the branch will be constant folded trivially.
3746 if (isa<ConstantInt>(IfCond))
3747 return false;
3748
3749 BasicBlock *DomBlock = DomBI->getParent();
3752 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3753 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3754 });
3755 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3756 "Will have either one or two blocks to speculate.");
3757
3758 // If the branch is non-unpredictable, see if we either predictably jump to
3759 // the merge bb (if we have only a single 'then' block), or if we predictably
3760 // jump to one specific 'then' block (if we have two of them).
3761 // It isn't beneficial to speculatively execute the code
3762 // from the block that we know is predictably not entered.
3763 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3764 if (!IsUnpredictable) {
3765 uint64_t TWeight, FWeight;
3766 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3767 (TWeight + FWeight) != 0) {
3768 BranchProbability BITrueProb =
3769 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3770 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3771 BranchProbability BIFalseProb = BITrueProb.getCompl();
3772 if (IfBlocks.size() == 1) {
3773 BranchProbability BIBBProb =
3774 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3775 if (BIBBProb >= Likely)
3776 return false;
3777 } else {
3778 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3779 return false;
3780 }
3781 }
3782 }
3783
3784 // Don't try to fold an unreachable block. For example, the phi node itself
3785 // can't be the candidate if-condition for a select that we want to form.
3786 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3787 if (IfCondPhiInst->getParent() == BB)
3788 return false;
3789
3790 // Okay, we found that we can merge this two-entry phi node into a select.
3791 // Doing so would require us to fold *all* two entry phi nodes in this block.
3792 // At some point this becomes non-profitable (particularly if the target
3793 // doesn't support cmov's). Only do this transformation if there are two or
3794 // fewer PHI nodes in this block.
3795 unsigned NumPhis = 0;
3796 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3797 if (NumPhis > 2)
3798 return false;
3799
3800 // Loop over the PHI's seeing if we can promote them all to select
3801 // instructions. While we are at it, keep track of the instructions
3802 // that need to be moved to the dominating block.
3803 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3804 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3805 InstructionCost Cost = 0;
3806 InstructionCost Budget =
3808 if (SpeculateUnpredictables && IsUnpredictable)
3809 Budget += TTI.getBranchMispredictPenalty();
3810
3811 bool Changed = false;
3812 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3813 PHINode *PN = cast<PHINode>(II++);
3814 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3815 PN->replaceAllUsesWith(V);
3816 PN->eraseFromParent();
3817 Changed = true;
3818 continue;
3819 }
3820
3821 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3822 AggressiveInsts, Cost, Budget, TTI, AC,
3823 ZeroCostInstructions) ||
3824 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3825 AggressiveInsts, Cost, Budget, TTI, AC,
3826 ZeroCostInstructions))
3827 return Changed;
3828 }
3829
3830 // If we folded the first phi, PN dangles at this point. Refresh it. If
3831 // we ran out of PHIs then we simplified them all.
3832 PN = dyn_cast<PHINode>(BB->begin());
3833 if (!PN)
3834 return true;
3835
3836 // Return true if at least one of these is a 'not', and another is either
3837 // a 'not' too, or a constant.
3838 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3839 if (!match(V0, m_Not(m_Value())))
3840 std::swap(V0, V1);
3841 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3842 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3843 };
3844
3845 // Don't fold i1 branches on PHIs which contain binary operators or
3846 // (possibly inverted) select form of or/ands, unless one of
3847 // the incoming values is an 'not' and another one is freely invertible.
3848 // These can often be turned into switches and other things.
3849 auto IsBinOpOrAnd = [](Value *V) {
3850 return match(
3852 };
3853 if (PN->getType()->isIntegerTy(1) &&
3854 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3855 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3856 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3857 PN->getIncomingValue(1)))
3858 return Changed;
3859
3860 // If all PHI nodes are promotable, check to make sure that all instructions
3861 // in the predecessor blocks can be promoted as well. If not, we won't be able
3862 // to get rid of the control flow, so it's not worth promoting to select
3863 // instructions.
3864 for (BasicBlock *IfBlock : IfBlocks)
3865 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3866 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3867 // This is not an aggressive instruction that we can promote.
3868 // Because of this, we won't be able to get rid of the control flow, so
3869 // the xform is not worth it.
3870 return Changed;
3871 }
3872
3873 // If either of the blocks has it's address taken, we can't do this fold.
3874 if (any_of(IfBlocks,
3875 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3876 return Changed;
3877
3878 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3879 if (IsUnpredictable) dbgs() << " (unpredictable)";
3880 dbgs() << " T: " << IfTrue->getName()
3881 << " F: " << IfFalse->getName() << "\n");
3882
3883 // If we can still promote the PHI nodes after this gauntlet of tests,
3884 // do all of the PHI's now.
3885
3886 // Move all 'aggressive' instructions, which are defined in the
3887 // conditional parts of the if's up to the dominating block.
3888 for (BasicBlock *IfBlock : IfBlocks)
3889 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3890
3891 IRBuilder<NoFolder> Builder(DomBI);
3892 // Propagate fast-math-flags from phi nodes to replacement selects.
3893 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3894 // Change the PHI node into a select instruction.
3895 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3896 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3897
3898 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3899 isa<FPMathOperator>(PN) ? PN : nullptr,
3900 "", DomBI);
3901 PN->replaceAllUsesWith(Sel);
3902 Sel->takeName(PN);
3903 PN->eraseFromParent();
3904 }
3905
3906 // At this point, all IfBlocks are empty, so our if statement
3907 // has been flattened. Change DomBlock to jump directly to our new block to
3908 // avoid other simplifycfg's kicking in on the diamond.
3909 Builder.CreateBr(BB);
3910
3912 if (DTU) {
3913 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3914 for (auto *Successor : successors(DomBlock))
3915 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3916 }
3917
3918 DomBI->eraseFromParent();
3919 if (DTU)
3920 DTU->applyUpdates(Updates);
3921
3922 return true;
3923}
3924
3927 Value *RHS, const Twine &Name = "") {
3928 // Try to relax logical op to binary op.
3929 if (impliesPoison(RHS, LHS))
3930 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3931 if (Opc == Instruction::And)
3932 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3933 if (Opc == Instruction::Or)
3934 return Builder.CreateLogicalOr(LHS, RHS, Name);
3935 llvm_unreachable("Invalid logical opcode");
3936}
3937
3938/// Return true if either PBI or BI has branch weight available, and store
3939/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3940/// not have branch weight, use 1:1 as its weight.
3942 uint64_t &PredTrueWeight,
3943 uint64_t &PredFalseWeight,
3944 uint64_t &SuccTrueWeight,
3945 uint64_t &SuccFalseWeight) {
3946 bool PredHasWeights =
3947 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3948 bool SuccHasWeights =
3949 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3950 if (PredHasWeights || SuccHasWeights) {
3951 if (!PredHasWeights)
3952 PredTrueWeight = PredFalseWeight = 1;
3953 if (!SuccHasWeights)
3954 SuccTrueWeight = SuccFalseWeight = 1;
3955 return true;
3956 } else {
3957 return false;
3958 }
3959}
3960
3961/// Determine if the two branches share a common destination and deduce a glue
3962/// that joins the branches' conditions to arrive at the common destination if
3963/// that would be profitable.
3964static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3966 const TargetTransformInfo *TTI) {
3967 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3968 "Both blocks must end with a conditional branches.");
3970 "PredBB must be a predecessor of BB.");
3971
3972 // We have the potential to fold the conditions together, but if the
3973 // predecessor branch is predictable, we may not want to merge them.
3974 uint64_t PTWeight, PFWeight;
3975 BranchProbability PBITrueProb, Likely;
3976 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3977 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3978 (PTWeight + PFWeight) != 0) {
3979 PBITrueProb =
3980 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3981 Likely = TTI->getPredictableBranchThreshold();
3982 }
3983
3984 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3985 // Speculate the 2nd condition unless the 1st is probably true.
3986 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3987 return {{BI->getSuccessor(0), Instruction::Or, false}};
3988 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3989 // Speculate the 2nd condition unless the 1st is probably false.
3990 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3991 return {{BI->getSuccessor(1), Instruction::And, false}};
3992 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3993 // Speculate the 2nd condition unless the 1st is probably true.
3994 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3995 return {{BI->getSuccessor(1), Instruction::And, true}};
3996 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3997 // Speculate the 2nd condition unless the 1st is probably false.
3998 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3999 return {{BI->getSuccessor(0), Instruction::Or, true}};
4000 }
4001 return std::nullopt;
4002}
4003
4005 DomTreeUpdater *DTU,
4006 MemorySSAUpdater *MSSAU,
4007 const TargetTransformInfo *TTI) {
4008 BasicBlock *BB = BI->getParent();
4009 BasicBlock *PredBlock = PBI->getParent();
4010
4011 // Determine if the two branches share a common destination.
4012 BasicBlock *CommonSucc;
4014 bool InvertPredCond;
4015 std::tie(CommonSucc, Opc, InvertPredCond) =
4017
4018 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4019
4020 IRBuilder<> Builder(PBI);
4021 // The builder is used to create instructions to eliminate the branch in BB.
4022 // If BB's terminator has !annotation metadata, add it to the new
4023 // instructions.
4024 Builder.CollectMetadataToCopy(BB->getTerminator(),
4025 {LLVMContext::MD_annotation});
4026
4027 // If we need to invert the condition in the pred block to match, do so now.
4028 if (InvertPredCond) {
4029 InvertBranch(PBI, Builder);
4030 }
4031
4032 BasicBlock *UniqueSucc =
4033 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4034
4035 // Before cloning instructions, notify the successor basic block that it
4036 // is about to have a new predecessor. This will update PHI nodes,
4037 // which will allow us to update live-out uses of bonus instructions.
4038 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4039
4040 // Try to update branch weights.
4041 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4042 SmallVector<uint64_t, 2> MDWeights;
4043 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4044 SuccTrueWeight, SuccFalseWeight)) {
4045
4046 if (PBI->getSuccessor(0) == BB) {
4047 // PBI: br i1 %x, BB, FalseDest
4048 // BI: br i1 %y, UniqueSucc, FalseDest
4049 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4050 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4051 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4052 // TrueWeight for PBI * FalseWeight for BI.
4053 // We assume that total weights of a BranchInst can fit into 32 bits.
4054 // Therefore, we will not have overflow using 64-bit arithmetic.
4055 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4056 PredTrueWeight * SuccFalseWeight);
4057 } else {
4058 // PBI: br i1 %x, TrueDest, BB
4059 // BI: br i1 %y, TrueDest, UniqueSucc
4060 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4061 // FalseWeight for PBI * TrueWeight for BI.
4062 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4063 PredFalseWeight * SuccTrueWeight);
4064 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4065 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4066 }
4067
4068 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4069 /*ElideAllZero=*/true);
4070
4071 // TODO: If BB is reachable from all paths through PredBlock, then we
4072 // could replace PBI's branch probabilities with BI's.
4073 } else
4074 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4075
4076 // Now, update the CFG.
4077 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4078
4079 if (DTU)
4080 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4081 {DominatorTree::Delete, PredBlock, BB}});
4082
4083 // If BI was a loop latch, it may have had associated loop metadata.
4084 // We need to copy it to the new latch, that is, PBI.
4085 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4086 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4087
4088 ValueToValueMapTy VMap; // maps original values to cloned values
4090
4091 Module *M = BB->getModule();
4092
4093 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4094 for (DbgVariableRecord &DVR :
4096 RemapDbgRecord(M, &DVR, VMap,
4098 }
4099
4100 // Now that the Cond was cloned into the predecessor basic block,
4101 // or/and the two conditions together.
4102 Value *BICond = VMap[BI->getCondition()];
4103 PBI->setCondition(
4104 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4106 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4107 if (!MDWeights.empty()) {
4108 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4109 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4110 /*IsExpected=*/false, /*ElideAllZero=*/true);
4111 }
4112
4113 ++NumFoldBranchToCommonDest;
4114 return true;
4115}
4116
4117/// Return if an instruction's type or any of its operands' types are a vector
4118/// type.
4119static bool isVectorOp(Instruction &I) {
4120 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4121 return U->getType()->isVectorTy();
4122 });
4123}
4124
4125/// If this basic block is simple enough, and if a predecessor branches to us
4126/// and one of our successors, fold the block into the predecessor and use
4127/// logical operations to pick the right destination.
4129 MemorySSAUpdater *MSSAU,
4130 const TargetTransformInfo *TTI,
4131 unsigned BonusInstThreshold) {
4132 // If this block ends with an unconditional branch,
4133 // let speculativelyExecuteBB() deal with it.
4134 if (!BI->isConditional())
4135 return false;
4136
4137 BasicBlock *BB = BI->getParent();
4141
4143
4145 Cond->getParent() != BB || !Cond->hasOneUse())
4146 return false;
4147
4148 // Finally, don't infinitely unroll conditional loops.
4149 if (is_contained(successors(BB), BB))
4150 return false;
4151
4152 // With which predecessors will we want to deal with?
4154 for (BasicBlock *PredBlock : predecessors(BB)) {
4155 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4156
4157 // Check that we have two conditional branches. If there is a PHI node in
4158 // the common successor, verify that the same value flows in from both
4159 // blocks.
4160 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4161 continue;
4162
4163 // Determine if the two branches share a common destination.
4164 BasicBlock *CommonSucc;
4166 bool InvertPredCond;
4167 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4168 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4169 else
4170 continue;
4171
4172 // Check the cost of inserting the necessary logic before performing the
4173 // transformation.
4174 if (TTI) {
4175 Type *Ty = BI->getCondition()->getType();
4176 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4177 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4178 !isa<CmpInst>(PBI->getCondition())))
4179 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4180
4182 continue;
4183 }
4184
4185 // Ok, we do want to deal with this predecessor. Record it.
4186 Preds.emplace_back(PredBlock);
4187 }
4188
4189 // If there aren't any predecessors into which we can fold,
4190 // don't bother checking the cost.
4191 if (Preds.empty())
4192 return false;
4193
4194 // Only allow this transformation if computing the condition doesn't involve
4195 // too many instructions and these involved instructions can be executed
4196 // unconditionally. We denote all involved instructions except the condition
4197 // as "bonus instructions", and only allow this transformation when the
4198 // number of the bonus instructions we'll need to create when cloning into
4199 // each predecessor does not exceed a certain threshold.
4200 unsigned NumBonusInsts = 0;
4201 bool SawVectorOp = false;
4202 const unsigned PredCount = Preds.size();
4203 for (Instruction &I : *BB) {
4204 // Don't check the branch condition comparison itself.
4205 if (&I == Cond)
4206 continue;
4207 // Ignore the terminator.
4208 if (isa<BranchInst>(I))
4209 continue;
4210 // I must be safe to execute unconditionally.
4212 return false;
4213 SawVectorOp |= isVectorOp(I);
4214
4215 // Account for the cost of duplicating this instruction into each
4216 // predecessor. Ignore free instructions.
4217 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4219 NumBonusInsts += PredCount;
4220
4221 // Early exits once we reach the limit.
4222 if (NumBonusInsts >
4223 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4224 return false;
4225 }
4226
4227 auto IsBCSSAUse = [BB, &I](Use &U) {
4228 auto *UI = cast<Instruction>(U.getUser());
4229 if (auto *PN = dyn_cast<PHINode>(UI))
4230 return PN->getIncomingBlock(U) == BB;
4231 return UI->getParent() == BB && I.comesBefore(UI);
4232 };
4233
4234 // Does this instruction require rewriting of uses?
4235 if (!all_of(I.uses(), IsBCSSAUse))
4236 return false;
4237 }
4238 if (NumBonusInsts >
4239 BonusInstThreshold *
4240 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4241 return false;
4242
4243 // Ok, we have the budget. Perform the transformation.
4244 for (BasicBlock *PredBlock : Preds) {
4245 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4246 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4247 }
4248 return false;
4249}
4250
4251// If there is only one store in BB1 and BB2, return it, otherwise return
4252// nullptr.
4254 StoreInst *S = nullptr;
4255 for (auto *BB : {BB1, BB2}) {
4256 if (!BB)
4257 continue;
4258 for (auto &I : *BB)
4259 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4260 if (S)
4261 // Multiple stores seen.
4262 return nullptr;
4263 else
4264 S = SI;
4265 }
4266 }
4267 return S;
4268}
4269
4271 Value *AlternativeV = nullptr) {
4272 // PHI is going to be a PHI node that allows the value V that is defined in
4273 // BB to be referenced in BB's only successor.
4274 //
4275 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4276 // doesn't matter to us what the other operand is (it'll never get used). We
4277 // could just create a new PHI with an undef incoming value, but that could
4278 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4279 // other PHI. So here we directly look for some PHI in BB's successor with V
4280 // as an incoming operand. If we find one, we use it, else we create a new
4281 // one.
4282 //
4283 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4284 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4285 // where OtherBB is the single other predecessor of BB's only successor.
4286 PHINode *PHI = nullptr;
4287 BasicBlock *Succ = BB->getSingleSuccessor();
4288
4289 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4290 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4291 PHI = cast<PHINode>(I);
4292 if (!AlternativeV)
4293 break;
4294
4295 assert(Succ->hasNPredecessors(2));
4296 auto PredI = pred_begin(Succ);
4297 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4298 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4299 break;
4300 PHI = nullptr;
4301 }
4302 if (PHI)
4303 return PHI;
4304
4305 // If V is not an instruction defined in BB, just return it.
4306 if (!AlternativeV &&
4307 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4308 return V;
4309
4310 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4311 PHI->insertBefore(Succ->begin());
4312 PHI->addIncoming(V, BB);
4313 for (BasicBlock *PredBB : predecessors(Succ))
4314 if (PredBB != BB)
4315 PHI->addIncoming(
4316 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4317 return PHI;
4318}
4319
4321 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4322 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4323 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4324 // For every pointer, there must be exactly two stores, one coming from
4325 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4326 // store (to any address) in PTB,PFB or QTB,QFB.
4327 // FIXME: We could relax this restriction with a bit more work and performance
4328 // testing.
4329 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4330 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4331 if (!PStore || !QStore)
4332 return false;
4333
4334 // Now check the stores are compatible.
4335 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4336 PStore->getValueOperand()->getType() !=
4337 QStore->getValueOperand()->getType())
4338 return false;
4339
4340 // Check that sinking the store won't cause program behavior changes. Sinking
4341 // the store out of the Q blocks won't change any behavior as we're sinking
4342 // from a block to its unconditional successor. But we're moving a store from
4343 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4344 // So we need to check that there are no aliasing loads or stores in
4345 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4346 // operations between PStore and the end of its parent block.
4347 //
4348 // The ideal way to do this is to query AliasAnalysis, but we don't
4349 // preserve AA currently so that is dangerous. Be super safe and just
4350 // check there are no other memory operations at all.
4351 for (auto &I : *QFB->getSinglePredecessor())
4352 if (I.mayReadOrWriteMemory())
4353 return false;
4354 for (auto &I : *QFB)
4355 if (&I != QStore && I.mayReadOrWriteMemory())
4356 return false;
4357 if (QTB)
4358 for (auto &I : *QTB)
4359 if (&I != QStore && I.mayReadOrWriteMemory())
4360 return false;
4361 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4362 I != E; ++I)
4363 if (&*I != PStore && I->mayReadOrWriteMemory())
4364 return false;
4365
4366 // If we're not in aggressive mode, we only optimize if we have some
4367 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4368 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4369 if (!BB)
4370 return true;
4371 // Heuristic: if the block can be if-converted/phi-folded and the
4372 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4373 // thread this store.
4374 InstructionCost Cost = 0;
4375 InstructionCost Budget =
4377 for (auto &I : BB->instructionsWithoutDebug(false)) {
4378 // Consider terminator instruction to be free.
4379 if (I.isTerminator())
4380 continue;
4381 // If this is one the stores that we want to speculate out of this BB,
4382 // then don't count it's cost, consider it to be free.
4383 if (auto *S = dyn_cast<StoreInst>(&I))
4384 if (llvm::find(FreeStores, S))
4385 continue;
4386 // Else, we have a white-list of instructions that we are ak speculating.
4388 return false; // Not in white-list - not worthwhile folding.
4389 // And finally, if this is a non-free instruction that we are okay
4390 // speculating, ensure that we consider the speculation budget.
4391 Cost +=
4392 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4393 if (Cost > Budget)
4394 return false; // Eagerly refuse to fold as soon as we're out of budget.
4395 }
4396 assert(Cost <= Budget &&
4397 "When we run out of budget we will eagerly return from within the "
4398 "per-instruction loop.");
4399 return true;
4400 };
4401
4402 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4404 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4405 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4406 return false;
4407
4408 // If PostBB has more than two predecessors, we need to split it so we can
4409 // sink the store.
4410 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4411 // We know that QFB's only successor is PostBB. And QFB has a single
4412 // predecessor. If QTB exists, then its only successor is also PostBB.
4413 // If QTB does not exist, then QFB's only predecessor has a conditional
4414 // branch to QFB and PostBB.
4415 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4416 BasicBlock *NewBB =
4417 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4418 if (!NewBB)
4419 return false;
4420 PostBB = NewBB;
4421 }
4422
4423 // OK, we're going to sink the stores to PostBB. The store has to be
4424 // conditional though, so first create the predicate.
4425 BranchInst *PBranch =
4427 BranchInst *QBranch =
4429 Value *PCond = PBranch->getCondition();
4430 Value *QCond = QBranch->getCondition();
4431
4433 PStore->getParent());
4435 QStore->getParent(), PPHI);
4436
4437 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4438 IRBuilder<> QB(PostBB, PostBBFirst);
4439 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4440
4441 InvertPCond ^= (PStore->getParent() != PTB);
4442 InvertQCond ^= (QStore->getParent() != QTB);
4443 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4444 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4445
4446 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4447
4448 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4449 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4450 /*Unreachable=*/false,
4451 /*BranchWeights=*/nullptr, DTU);
4452 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4454 SmallVector<uint32_t, 2> PWeights, QWeights;
4455 extractBranchWeights(*PBranch, PWeights);
4456 extractBranchWeights(*QBranch, QWeights);
4457 if (InvertPCond)
4458 std::swap(PWeights[0], PWeights[1]);
4459 if (InvertQCond)
4460 std::swap(QWeights[0], QWeights[1]);
4461 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4463 {CombinedWeights[0], CombinedWeights[1]},
4464 /*IsExpected=*/false, /*ElideAllZero=*/true);
4465 }
4466
4467 QB.SetInsertPoint(T);
4468 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4469 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4470 // Choose the minimum alignment. If we could prove both stores execute, we
4471 // could use biggest one. In this case, though, we only know that one of the
4472 // stores executes. And we don't know it's safe to take the alignment from a
4473 // store that doesn't execute.
4474 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4475
4476 QStore->eraseFromParent();
4477 PStore->eraseFromParent();
4478
4479 return true;
4480}
4481
4483 DomTreeUpdater *DTU, const DataLayout &DL,
4484 const TargetTransformInfo &TTI) {
4485 // The intention here is to find diamonds or triangles (see below) where each
4486 // conditional block contains a store to the same address. Both of these
4487 // stores are conditional, so they can't be unconditionally sunk. But it may
4488 // be profitable to speculatively sink the stores into one merged store at the
4489 // end, and predicate the merged store on the union of the two conditions of
4490 // PBI and QBI.
4491 //
4492 // This can reduce the number of stores executed if both of the conditions are
4493 // true, and can allow the blocks to become small enough to be if-converted.
4494 // This optimization will also chain, so that ladders of test-and-set
4495 // sequences can be if-converted away.
4496 //
4497 // We only deal with simple diamonds or triangles:
4498 //
4499 // PBI or PBI or a combination of the two
4500 // / \ | \
4501 // PTB PFB | PFB
4502 // \ / | /
4503 // QBI QBI
4504 // / \ | \
4505 // QTB QFB | QFB
4506 // \ / | /
4507 // PostBB PostBB
4508 //
4509 // We model triangles as a type of diamond with a nullptr "true" block.
4510 // Triangles are canonicalized so that the fallthrough edge is represented by
4511 // a true condition, as in the diagram above.
4512 BasicBlock *PTB = PBI->getSuccessor(0);
4513 BasicBlock *PFB = PBI->getSuccessor(1);
4514 BasicBlock *QTB = QBI->getSuccessor(0);
4515 BasicBlock *QFB = QBI->getSuccessor(1);
4516 BasicBlock *PostBB = QFB->getSingleSuccessor();
4517
4518 // Make sure we have a good guess for PostBB. If QTB's only successor is
4519 // QFB, then QFB is a better PostBB.
4520 if (QTB->getSingleSuccessor() == QFB)
4521 PostBB = QFB;
4522
4523 // If we couldn't find a good PostBB, stop.
4524 if (!PostBB)
4525 return false;
4526
4527 bool InvertPCond = false, InvertQCond = false;
4528 // Canonicalize fallthroughs to the true branches.
4529 if (PFB == QBI->getParent()) {
4530 std::swap(PFB, PTB);
4531 InvertPCond = true;
4532 }
4533 if (QFB == PostBB) {
4534 std::swap(QFB, QTB);
4535 InvertQCond = true;
4536 }
4537
4538 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4539 // and QFB may not. Model fallthroughs as a nullptr block.
4540 if (PTB == QBI->getParent())
4541 PTB = nullptr;
4542 if (QTB == PostBB)
4543 QTB = nullptr;
4544
4545 // Legality bailouts. We must have at least the non-fallthrough blocks and
4546 // the post-dominating block, and the non-fallthroughs must only have one
4547 // predecessor.
4548 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4549 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4550 };
4551 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4552 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4553 return false;
4554 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4555 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4556 return false;
4557 if (!QBI->getParent()->hasNUses(2))
4558 return false;
4559
4560 // OK, this is a sequence of two diamonds or triangles.
4561 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4562 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4563 for (auto *BB : {PTB, PFB}) {
4564 if (!BB)
4565 continue;
4566 for (auto &I : *BB)
4568 PStoreAddresses.insert(SI->getPointerOperand());
4569 }
4570 for (auto *BB : {QTB, QFB}) {
4571 if (!BB)
4572 continue;
4573 for (auto &I : *BB)
4575 QStoreAddresses.insert(SI->getPointerOperand());
4576 }
4577
4578 set_intersect(PStoreAddresses, QStoreAddresses);
4579 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4580 // clear what it contains.
4581 auto &CommonAddresses = PStoreAddresses;
4582
4583 bool Changed = false;
4584 for (auto *Address : CommonAddresses)
4585 Changed |=
4586 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4587 InvertPCond, InvertQCond, DTU, DL, TTI);
4588 return Changed;
4589}
4590
4591/// If the previous block ended with a widenable branch, determine if reusing
4592/// the target block is profitable and legal. This will have the effect of
4593/// "widening" PBI, but doesn't require us to reason about hosting safety.
4595 DomTreeUpdater *DTU) {
4596 // TODO: This can be generalized in two important ways:
4597 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4598 // values from the PBI edge.
4599 // 2) We can sink side effecting instructions into BI's fallthrough
4600 // successor provided they doesn't contribute to computation of
4601 // BI's condition.
4602 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4603 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4604 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4605 !BI->getParent()->getSinglePredecessor())
4606 return false;
4607 if (!IfFalseBB->phis().empty())
4608 return false; // TODO
4609 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4610 // may undo the transform done here.
4611 // TODO: There might be a more fine-grained solution to this.
4612 if (!llvm::succ_empty(IfFalseBB))
4613 return false;
4614 // Use lambda to lazily compute expensive condition after cheap ones.
4615 auto NoSideEffects = [](BasicBlock &BB) {
4616 return llvm::none_of(BB, [](const Instruction &I) {
4617 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4618 });
4619 };
4620 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4621 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4622 NoSideEffects(*BI->getParent())) {
4623 auto *OldSuccessor = BI->getSuccessor(1);
4624 OldSuccessor->removePredecessor(BI->getParent());
4625 BI->setSuccessor(1, IfFalseBB);
4626 if (DTU)
4627 DTU->applyUpdates(
4628 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4629 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4630 return true;
4631 }
4632 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4633 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4634 NoSideEffects(*BI->getParent())) {
4635 auto *OldSuccessor = BI->getSuccessor(0);
4636 OldSuccessor->removePredecessor(BI->getParent());
4637 BI->setSuccessor(0, IfFalseBB);
4638 if (DTU)
4639 DTU->applyUpdates(
4640 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4641 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4642 return true;
4643 }
4644 return false;
4645}
4646
4647/// If we have a conditional branch as a predecessor of another block,
4648/// this function tries to simplify it. We know
4649/// that PBI and BI are both conditional branches, and BI is in one of the
4650/// successor blocks of PBI - PBI branches to BI.
4652 DomTreeUpdater *DTU,
4653 const DataLayout &DL,
4654 const TargetTransformInfo &TTI) {
4655 assert(PBI->isConditional() && BI->isConditional());
4656 BasicBlock *BB = BI->getParent();
4657
4658 // If this block ends with a branch instruction, and if there is a
4659 // predecessor that ends on a branch of the same condition, make
4660 // this conditional branch redundant.
4661 if (PBI->getCondition() == BI->getCondition() &&
4662 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4663 // Okay, the outcome of this conditional branch is statically
4664 // knowable. If this block had a single pred, handle specially, otherwise
4665 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4666 if (BB->getSinglePredecessor()) {
4667 // Turn this into a branch on constant.
4668 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4669 BI->setCondition(
4670 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4671 return true; // Nuke the branch on constant.
4672 }
4673 }
4674
4675 // If the previous block ended with a widenable branch, determine if reusing
4676 // the target block is profitable and legal. This will have the effect of
4677 // "widening" PBI, but doesn't require us to reason about hosting safety.
4678 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4679 return true;
4680
4681 // If both branches are conditional and both contain stores to the same
4682 // address, remove the stores from the conditionals and create a conditional
4683 // merged store at the end.
4684 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4685 return true;
4686
4687 // If this is a conditional branch in an empty block, and if any
4688 // predecessors are a conditional branch to one of our destinations,
4689 // fold the conditions into logical ops and one cond br.
4690
4691 // Ignore dbg intrinsics.
4692 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4693 return false;
4694
4695 int PBIOp, BIOp;
4696 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4697 PBIOp = 0;
4698 BIOp = 0;
4699 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4700 PBIOp = 0;
4701 BIOp = 1;
4702 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4703 PBIOp = 1;
4704 BIOp = 0;
4705 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4706 PBIOp = 1;
4707 BIOp = 1;
4708 } else {
4709 return false;
4710 }
4711
4712 // Check to make sure that the other destination of this branch
4713 // isn't BB itself. If so, this is an infinite loop that will
4714 // keep getting unwound.
4715 if (PBI->getSuccessor(PBIOp) == BB)
4716 return false;
4717
4718 // If predecessor's branch probability to BB is too low don't merge branches.
4719 SmallVector<uint32_t, 2> PredWeights;
4720 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4721 extractBranchWeights(*PBI, PredWeights) &&
4722 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4723
4725 PredWeights[PBIOp],
4726 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4727
4728 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4729 if (CommonDestProb >= Likely)
4730 return false;
4731 }
4732
4733 // Do not perform this transformation if it would require
4734 // insertion of a large number of select instructions. For targets
4735 // without predication/cmovs, this is a big pessimization.
4736
4737 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4738 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4739 unsigned NumPhis = 0;
4740 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4741 ++II, ++NumPhis) {
4742 if (NumPhis > 2) // Disable this xform.
4743 return false;
4744 }
4745
4746 // Finally, if everything is ok, fold the branches to logical ops.
4747 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4748
4749 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4750 << "AND: " << *BI->getParent());
4751
4753
4754 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4755 // branch in it, where one edge (OtherDest) goes back to itself but the other
4756 // exits. We don't *know* that the program avoids the infinite loop
4757 // (even though that seems likely). If we do this xform naively, we'll end up
4758 // recursively unpeeling the loop. Since we know that (after the xform is
4759 // done) that the block *is* infinite if reached, we just make it an obviously
4760 // infinite loop with no cond branch.
4761 if (OtherDest == BB) {
4762 // Insert it at the end of the function, because it's either code,
4763 // or it won't matter if it's hot. :)
4764 BasicBlock *InfLoopBlock =
4765 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4766 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4767 if (DTU)
4768 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4769 OtherDest = InfLoopBlock;
4770 }
4771
4772 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4773
4774 // BI may have other predecessors. Because of this, we leave
4775 // it alone, but modify PBI.
4776
4777 // Make sure we get to CommonDest on True&True directions.
4778 Value *PBICond = PBI->getCondition();
4779 IRBuilder<NoFolder> Builder(PBI);
4780 if (PBIOp)
4781 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4782
4783 Value *BICond = BI->getCondition();
4784 if (BIOp)
4785 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4786
4787 // Merge the conditions.
4788 Value *Cond =
4789 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4790
4791 // Modify PBI to branch on the new condition to the new dests.
4792 PBI->setCondition(Cond);
4793 PBI->setSuccessor(0, CommonDest);
4794 PBI->setSuccessor(1, OtherDest);
4795
4796 if (DTU) {
4797 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4798 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4799
4800 DTU->applyUpdates(Updates);
4801 }
4802
4803 // Update branch weight for PBI.
4804 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4805 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4806 bool HasWeights =
4807 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4808 SuccTrueWeight, SuccFalseWeight);
4809 if (HasWeights) {
4810 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4811 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4812 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4813 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4814 // The weight to CommonDest should be PredCommon * SuccTotal +
4815 // PredOther * SuccCommon.
4816 // The weight to OtherDest should be PredOther * SuccOther.
4817 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4818 PredOther * SuccCommon,
4819 PredOther * SuccOther};
4820
4821 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4822 /*ElideAllZero=*/true);
4823 // Cond may be a select instruction with the first operand set to "true", or
4824 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4826 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4827 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4828 // The select is predicated on PBICond
4830 // The corresponding probabilities are what was referred to above as
4831 // PredCommon and PredOther.
4832 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4833 /*IsExpected=*/false, /*ElideAllZero=*/true);
4834 }
4835 }
4836
4837 // OtherDest may have phi nodes. If so, add an entry from PBI's
4838 // block that are identical to the entries for BI's block.
4839 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4840
4841 // We know that the CommonDest already had an edge from PBI to
4842 // it. If it has PHIs though, the PHIs may have different
4843 // entries for BB and PBI's BB. If so, insert a select to make
4844 // them agree.
4845 for (PHINode &PN : CommonDest->phis()) {
4846 Value *BIV = PN.getIncomingValueForBlock(BB);
4847 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4848 Value *PBIV = PN.getIncomingValue(PBBIdx);
4849 if (BIV != PBIV) {
4850 // Insert a select in PBI to pick the right value.
4852 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4853 PN.setIncomingValue(PBBIdx, NV);
4854 // The select has the same condition as PBI, in the same BB. The
4855 // probabilities don't change.
4856 if (HasWeights) {
4857 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4858 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4859 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4860 /*IsExpected=*/false, /*ElideAllZero=*/true);
4861 }
4862 }
4863 }
4864
4865 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4866 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4867
4868 // This basic block is probably dead. We know it has at least
4869 // one fewer predecessor.
4870 return true;
4871}
4872
4873// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4874// true or to FalseBB if Cond is false.
4875// Takes care of updating the successors and removing the old terminator.
4876// Also makes sure not to introduce new successors by assuming that edges to
4877// non-successor TrueBBs and FalseBBs aren't reachable.
4878bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4879 Value *Cond, BasicBlock *TrueBB,
4880 BasicBlock *FalseBB,
4881 uint32_t TrueWeight,
4882 uint32_t FalseWeight) {
4883 auto *BB = OldTerm->getParent();
4884 // Remove any superfluous successor edges from the CFG.
4885 // First, figure out which successors to preserve.
4886 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4887 // successor.
4888 BasicBlock *KeepEdge1 = TrueBB;
4889 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4890
4891 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4892
4893 // Then remove the rest.
4894 for (BasicBlock *Succ : successors(OldTerm)) {
4895 // Make sure only to keep exactly one copy of each edge.
4896 if (Succ == KeepEdge1)
4897 KeepEdge1 = nullptr;
4898 else if (Succ == KeepEdge2)
4899 KeepEdge2 = nullptr;
4900 else {
4901 Succ->removePredecessor(BB,
4902 /*KeepOneInputPHIs=*/true);
4903
4904 if (Succ != TrueBB && Succ != FalseBB)
4905 RemovedSuccessors.insert(Succ);
4906 }
4907 }
4908
4909 IRBuilder<> Builder(OldTerm);
4910 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4911
4912 // Insert an appropriate new terminator.
4913 if (!KeepEdge1 && !KeepEdge2) {
4914 if (TrueBB == FalseBB) {
4915 // We were only looking for one successor, and it was present.
4916 // Create an unconditional branch to it.
4917 Builder.CreateBr(TrueBB);
4918 } else {
4919 // We found both of the successors we were looking for.
4920 // Create a conditional branch sharing the condition of the select.
4921 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4922 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4923 /*IsExpected=*/false, /*ElideAllZero=*/true);
4924 }
4925 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4926 // Neither of the selected blocks were successors, so this
4927 // terminator must be unreachable.
4928 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4929 } else {
4930 // One of the selected values was a successor, but the other wasn't.
4931 // Insert an unconditional branch to the one that was found;
4932 // the edge to the one that wasn't must be unreachable.
4933 if (!KeepEdge1) {
4934 // Only TrueBB was found.
4935 Builder.CreateBr(TrueBB);
4936 } else {
4937 // Only FalseBB was found.
4938 Builder.CreateBr(FalseBB);
4939 }
4940 }
4941
4943
4944 if (DTU) {
4945 SmallVector<DominatorTree::UpdateType, 2> Updates;
4946 Updates.reserve(RemovedSuccessors.size());
4947 for (auto *RemovedSuccessor : RemovedSuccessors)
4948 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4949 DTU->applyUpdates(Updates);
4950 }
4951
4952 return true;
4953}
4954
4955// Replaces
4956// (switch (select cond, X, Y)) on constant X, Y
4957// with a branch - conditional if X and Y lead to distinct BBs,
4958// unconditional otherwise.
4959bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4960 SelectInst *Select) {
4961 // Check for constant integer values in the select.
4962 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4963 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4964 if (!TrueVal || !FalseVal)
4965 return false;
4966
4967 // Find the relevant condition and destinations.
4968 Value *Condition = Select->getCondition();
4969 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4970 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4971
4972 // Get weight for TrueBB and FalseBB.
4973 uint32_t TrueWeight = 0, FalseWeight = 0;
4974 SmallVector<uint64_t, 8> Weights;
4975 bool HasWeights = hasBranchWeightMD(*SI);
4976 if (HasWeights) {
4977 getBranchWeights(SI, Weights);
4978 if (Weights.size() == 1 + SI->getNumCases()) {
4979 TrueWeight =
4980 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4981 FalseWeight =
4982 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4983 }
4984 }
4985
4986 // Perform the actual simplification.
4987 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4988 FalseWeight);
4989}
4990
4991// Replaces
4992// (indirectbr (select cond, blockaddress(@fn, BlockA),
4993// blockaddress(@fn, BlockB)))
4994// with
4995// (br cond, BlockA, BlockB).
4996bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4997 SelectInst *SI) {
4998 // Check that both operands of the select are block addresses.
4999 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5000 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5001 if (!TBA || !FBA)
5002 return false;
5003
5004 // Extract the actual blocks.
5005 BasicBlock *TrueBB = TBA->getBasicBlock();
5006 BasicBlock *FalseBB = FBA->getBasicBlock();
5007
5008 // The select's profile becomes the profile of the conditional branch that
5009 // replaces the indirect branch.
5010 SmallVector<uint32_t> SelectBranchWeights(2);
5012 extractBranchWeights(*SI, SelectBranchWeights);
5013 // Perform the actual simplification.
5014 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5015 SelectBranchWeights[0],
5016 SelectBranchWeights[1]);
5017}
5018
5019/// This is called when we find an icmp instruction
5020/// (a seteq/setne with a constant) as the only instruction in a
5021/// block that ends with an uncond branch. We are looking for a very specific
5022/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5023/// this case, we merge the first two "or's of icmp" into a switch, but then the
5024/// default value goes to an uncond block with a seteq in it, we get something
5025/// like:
5026///
5027/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5028/// DEFAULT:
5029/// %tmp = icmp eq i8 %A, 92
5030/// br label %end
5031/// end:
5032/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5033///
5034/// We prefer to split the edge to 'end' so that there is a true/false entry to
5035/// the PHI, merging the third icmp into the switch.
5036bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5037 ICmpInst *ICI, IRBuilder<> &Builder) {
5038 // Select == nullptr means we assume that there is a hidden no-op select
5039 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5040 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5041}
5042
5043/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5044/// case. This is called when we find an icmp instruction (a seteq/setne with a
5045/// constant) and its following select instruction as the only TWO instructions
5046/// in a block that ends with an uncond branch. We are looking for a very
5047/// specific pattern that occurs when "
5048/// if (A == 1) return C1;
5049/// if (A == 2) return C2;
5050/// if (A < 3) return C3;
5051/// return C4;
5052/// " gets simplified. In this case, we merge the first two "branches of icmp"
5053/// into a switch, but then the default value goes to an uncond block with a lt
5054/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5055/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5056/// get something like:
5057///
5058/// case1:
5059/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5060/// case2:
5061/// br label %end
5062/// DEFAULT:
5063/// %tmp = icmp eq i8 %A, 2
5064/// %val = select i1 %tmp, i8 C3, i8 C4
5065/// br label %end
5066/// end:
5067/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5068///
5069/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5070/// to the PHI, merging the icmp & select into the switch, as follows:
5071///
5072/// case1:
5073/// switch i8 %A, label %DEFAULT [
5074/// i8 0, label %end
5075/// i8 1, label %case2
5076/// i8 2, label %case3
5077/// ]
5078/// case2:
5079/// br label %end
5080/// case3:
5081/// br label %end
5082/// DEFAULT:
5083/// br label %end
5084/// end:
5085/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5086bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5087 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5088 BasicBlock *BB = ICI->getParent();
5089
5090 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5091 // too complex.
5092 /// TODO: support multi-phis in succ BB of select's BB.
5093 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5094 (Select && !Select->hasOneUse()))
5095 return false;
5096
5097 // The pattern we're looking for is where our only predecessor is a switch on
5098 // 'V' and this block is the default case for the switch. In this case we can
5099 // fold the compared value into the switch to simplify things.
5100 BasicBlock *Pred = BB->getSinglePredecessor();
5101 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5102 return false;
5103
5104 Value *IcmpCond;
5105 ConstantInt *NewCaseVal;
5106 CmpPredicate Predicate;
5107
5108 // Match icmp X, C
5109 if (!match(ICI,
5110 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5111 return false;
5112
5113 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5115 if (!Select) {
5116 // If Select == nullptr, we can assume that there is a hidden no-op select
5117 // just after icmp
5118 SelectCond = ICI;
5119 SelectTrueVal = Builder.getTrue();
5120 SelectFalseVal = Builder.getFalse();
5121 User = ICI->user_back();
5122 } else {
5123 SelectCond = Select->getCondition();
5124 // Check if the select condition is the same as the icmp condition.
5125 if (SelectCond != ICI)
5126 return false;
5127 SelectTrueVal = Select->getTrueValue();
5128 SelectFalseVal = Select->getFalseValue();
5129 User = Select->user_back();
5130 }
5131
5132 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5133 if (SI->getCondition() != IcmpCond)
5134 return false;
5135
5136 // If BB is reachable on a non-default case, then we simply know the value of
5137 // V in this block. Substitute it and constant fold the icmp instruction
5138 // away.
5139 if (SI->getDefaultDest() != BB) {
5140 ConstantInt *VVal = SI->findCaseDest(BB);
5141 assert(VVal && "Should have a unique destination value");
5142 ICI->setOperand(0, VVal);
5143
5144 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5145 ICI->replaceAllUsesWith(V);
5146 ICI->eraseFromParent();
5147 }
5148 // BB is now empty, so it is likely to simplify away.
5149 return requestResimplify();
5150 }
5151
5152 // Ok, the block is reachable from the default dest. If the constant we're
5153 // comparing exists in one of the other edges, then we can constant fold ICI
5154 // and zap it.
5155 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5156 Value *V;
5157 if (Predicate == ICmpInst::ICMP_EQ)
5159 else
5161
5162 ICI->replaceAllUsesWith(V);
5163 ICI->eraseFromParent();
5164 // BB is now empty, so it is likely to simplify away.
5165 return requestResimplify();
5166 }
5167
5168 // The use of the select has to be in the 'end' block, by the only PHI node in
5169 // the block.
5170 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5171 PHINode *PHIUse = dyn_cast<PHINode>(User);
5172 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5174 return false;
5175
5176 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5177 // edge gets SelectTrueVal in the PHI.
5178 Value *DefaultCst = SelectFalseVal;
5179 Value *NewCst = SelectTrueVal;
5180
5181 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5182 std::swap(DefaultCst, NewCst);
5183
5184 // Replace Select (which is used by the PHI for the default value) with
5185 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5186 if (Select) {
5187 Select->replaceAllUsesWith(DefaultCst);
5188 Select->eraseFromParent();
5189 } else {
5190 ICI->replaceAllUsesWith(DefaultCst);
5191 }
5192 ICI->eraseFromParent();
5193
5194 SmallVector<DominatorTree::UpdateType, 2> Updates;
5195
5196 // Okay, the switch goes to this block on a default value. Add an edge from
5197 // the switch to the merge point on the compared value.
5198 BasicBlock *NewBB =
5199 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5200 {
5201 SwitchInstProfUpdateWrapper SIW(*SI);
5202 auto W0 = SIW.getSuccessorWeight(0);
5204 if (W0) {
5205 NewW = ((uint64_t(*W0) + 1) >> 1);
5206 SIW.setSuccessorWeight(0, *NewW);
5207 }
5208 SIW.addCase(NewCaseVal, NewBB, NewW);
5209 if (DTU)
5210 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5211 }
5212
5213 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5214 Builder.SetInsertPoint(NewBB);
5215 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5216 Builder.CreateBr(SuccBlock);
5217 PHIUse->addIncoming(NewCst, NewBB);
5218 if (DTU) {
5219 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5220 DTU->applyUpdates(Updates);
5221 }
5222 return true;
5223}
5224
5225/// The specified branch is a conditional branch.
5226/// Check to see if it is branching on an or/and chain of icmp instructions, and
5227/// fold it into a switch instruction if so.
5228bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5229 IRBuilder<> &Builder,
5230 const DataLayout &DL) {
5232 if (!Cond)
5233 return false;
5234
5235 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5236 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5237 // 'setne's and'ed together, collect them.
5238
5239 // Try to gather values from a chain of and/or to be turned into a switch
5240 ConstantComparesGatherer ConstantCompare(Cond, DL);
5241 // Unpack the result
5242 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5243 Value *CompVal = ConstantCompare.CompValue;
5244 unsigned UsedICmps = ConstantCompare.UsedICmps;
5245 Value *ExtraCase = ConstantCompare.Extra;
5246 bool TrueWhenEqual = ConstantCompare.IsEq;
5247
5248 // If we didn't have a multiply compared value, fail.
5249 if (!CompVal)
5250 return false;
5251
5252 // Avoid turning single icmps into a switch.
5253 if (UsedICmps <= 1)
5254 return false;
5255
5256 // There might be duplicate constants in the list, which the switch
5257 // instruction can't handle, remove them now.
5258 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5259 Values.erase(llvm::unique(Values), Values.end());
5260
5261 // If Extra was used, we require at least two switch values to do the
5262 // transformation. A switch with one value is just a conditional branch.
5263 if (ExtraCase && Values.size() < 2)
5264 return false;
5265
5266 SmallVector<uint32_t> BranchWeights;
5267 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5268 extractBranchWeights(*BI, BranchWeights);
5269
5270 // Figure out which block is which destination.
5271 BasicBlock *DefaultBB = BI->getSuccessor(1);
5272 BasicBlock *EdgeBB = BI->getSuccessor(0);
5273 if (!TrueWhenEqual) {
5274 std::swap(DefaultBB, EdgeBB);
5275 if (HasProfile)
5276 std::swap(BranchWeights[0], BranchWeights[1]);
5277 }
5278
5279 BasicBlock *BB = BI->getParent();
5280
5281 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5282 << " cases into SWITCH. BB is:\n"
5283 << *BB);
5284
5285 SmallVector<DominatorTree::UpdateType, 2> Updates;
5286
5287 // If there are any extra values that couldn't be folded into the switch
5288 // then we evaluate them with an explicit branch first. Split the block
5289 // right before the condbr to handle it.
5290 if (ExtraCase) {
5291 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5292 /*MSSAU=*/nullptr, "switch.early.test");
5293
5294 // Remove the uncond branch added to the old block.
5295 Instruction *OldTI = BB->getTerminator();
5296 Builder.SetInsertPoint(OldTI);
5297
5298 // There can be an unintended UB if extra values are Poison. Before the
5299 // transformation, extra values may not be evaluated according to the
5300 // condition, and it will not raise UB. But after transformation, we are
5301 // evaluating extra values before checking the condition, and it will raise
5302 // UB. It can be solved by adding freeze instruction to extra values.
5303 AssumptionCache *AC = Options.AC;
5304
5305 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5306 ExtraCase = Builder.CreateFreeze(ExtraCase);
5307
5308 // We don't have any info about this condition.
5309 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5310 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5312
5313 OldTI->eraseFromParent();
5314
5315 if (DTU)
5316 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5317
5318 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5319 // for the edge we just added.
5320 addPredecessorToBlock(EdgeBB, BB, NewBB);
5321
5322 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5323 << "\nEXTRABB = " << *BB);
5324 BB = NewBB;
5325 }
5326
5327 Builder.SetInsertPoint(BI);
5328 // Convert pointer to int before we switch.
5329 if (CompVal->getType()->isPointerTy()) {
5330 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5331 "Should not end up here with unstable pointers");
5332 CompVal = Builder.CreatePtrToInt(
5333 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5334 }
5335
5336 // Check if we can represent the values as a contiguous range. If so, we use a
5337 // range check + conditional branch instead of a switch.
5338 if (Values.front()->getValue() - Values.back()->getValue() ==
5339 Values.size() - 1) {
5340 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5341 Values.back()->getValue(), Values.front()->getValue() + 1);
5342 APInt Offset, RHS;
5343 ICmpInst::Predicate Pred;
5344 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5345 Value *X = CompVal;
5346 if (!Offset.isZero())
5347 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5348 Value *Cond =
5349 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5350 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5351 if (HasProfile)
5352 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5353 // We don't need to update PHI nodes since we don't add any new edges.
5354 } else {
5355 // Create the new switch instruction now.
5356 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5357 if (HasProfile) {
5358 // We know the weight of the default case. We don't know the weight of the
5359 // other cases, but rather than completely lose profiling info, we split
5360 // the remaining probability equally over them.
5361 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5362 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5363 // if TrueWhenEqual.
5364 for (auto &V : drop_begin(NewWeights))
5365 V = BranchWeights[0] / Values.size();
5366 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5367 }
5368
5369 // Add all of the 'cases' to the switch instruction.
5370 for (ConstantInt *Val : Values)
5371 New->addCase(Val, EdgeBB);
5372
5373 // We added edges from PI to the EdgeBB. As such, if there were any
5374 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5375 // the number of edges added.
5376 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5377 PHINode *PN = cast<PHINode>(BBI);
5378 Value *InVal = PN->getIncomingValueForBlock(BB);
5379 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5380 PN->addIncoming(InVal, BB);
5381 }
5382 }
5383
5384 // Erase the old branch instruction.
5386 if (DTU)
5387 DTU->applyUpdates(Updates);
5388
5389 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5390 return true;
5391}
5392
5393bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5394 if (isa<PHINode>(RI->getValue()))
5395 return simplifyCommonResume(RI);
5396 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5397 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5398 // The resume must unwind the exception that caused control to branch here.
5399 return simplifySingleResume(RI);
5400
5401 return false;
5402}
5403
5404// Check if cleanup block is empty
5406 for (Instruction &I : R) {
5407 auto *II = dyn_cast<IntrinsicInst>(&I);
5408 if (!II)
5409 return false;
5410
5411 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5412 switch (IntrinsicID) {
5413 case Intrinsic::dbg_declare:
5414 case Intrinsic::dbg_value:
5415 case Intrinsic::dbg_label:
5416 case Intrinsic::lifetime_end:
5417 break;
5418 default:
5419 return false;
5420 }
5421 }
5422 return true;
5423}
5424
5425// Simplify resume that is shared by several landing pads (phi of landing pad).
5426bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5427 BasicBlock *BB = RI->getParent();
5428
5429 // Check that there are no other instructions except for debug and lifetime
5430 // intrinsics between the phi's and resume instruction.
5431 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5432 BB->getTerminator()->getIterator())))
5433 return false;
5434
5435 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5436 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5437
5438 // Check incoming blocks to see if any of them are trivial.
5439 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5440 Idx++) {
5441 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5442 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5443
5444 // If the block has other successors, we can not delete it because
5445 // it has other dependents.
5446 if (IncomingBB->getUniqueSuccessor() != BB)
5447 continue;
5448
5449 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5450 // Not the landing pad that caused the control to branch here.
5451 if (IncomingValue != LandingPad)
5452 continue;
5453
5455 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5456 TrivialUnwindBlocks.insert(IncomingBB);
5457 }
5458
5459 // If no trivial unwind blocks, don't do any simplifications.
5460 if (TrivialUnwindBlocks.empty())
5461 return false;
5462
5463 // Turn all invokes that unwind here into calls.
5464 for (auto *TrivialBB : TrivialUnwindBlocks) {
5465 // Blocks that will be simplified should be removed from the phi node.
5466 // Note there could be multiple edges to the resume block, and we need
5467 // to remove them all.
5468 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5469 BB->removePredecessor(TrivialBB, true);
5470
5471 for (BasicBlock *Pred :
5473 removeUnwindEdge(Pred, DTU);
5474 ++NumInvokes;
5475 }
5476
5477 // In each SimplifyCFG run, only the current processed block can be erased.
5478 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5479 // of erasing TrivialBB, we only remove the branch to the common resume
5480 // block so that we can later erase the resume block since it has no
5481 // predecessors.
5482 TrivialBB->getTerminator()->eraseFromParent();
5483 new UnreachableInst(RI->getContext(), TrivialBB);
5484 if (DTU)
5485 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5486 }
5487
5488 // Delete the resume block if all its predecessors have been removed.
5489 if (pred_empty(BB))
5490 DeleteDeadBlock(BB, DTU);
5491
5492 return !TrivialUnwindBlocks.empty();
5493}
5494
5495// Simplify resume that is only used by a single (non-phi) landing pad.
5496bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5497 BasicBlock *BB = RI->getParent();
5498 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5499 assert(RI->getValue() == LPInst &&
5500 "Resume must unwind the exception that caused control to here");
5501
5502 // Check that there are no other instructions except for debug intrinsics.
5504 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5505 return false;
5506
5507 // Turn all invokes that unwind here into calls and delete the basic block.
5508 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5509 removeUnwindEdge(Pred, DTU);
5510 ++NumInvokes;
5511 }
5512
5513 // The landingpad is now unreachable. Zap it.
5514 DeleteDeadBlock(BB, DTU);
5515 return true;
5516}
5517
5519 // If this is a trivial cleanup pad that executes no instructions, it can be
5520 // eliminated. If the cleanup pad continues to the caller, any predecessor
5521 // that is an EH pad will be updated to continue to the caller and any
5522 // predecessor that terminates with an invoke instruction will have its invoke
5523 // instruction converted to a call instruction. If the cleanup pad being
5524 // simplified does not continue to the caller, each predecessor will be
5525 // updated to continue to the unwind destination of the cleanup pad being
5526 // simplified.
5527 BasicBlock *BB = RI->getParent();
5528 CleanupPadInst *CPInst = RI->getCleanupPad();
5529 if (CPInst->getParent() != BB)
5530 // This isn't an empty cleanup.
5531 return false;
5532
5533 // We cannot kill the pad if it has multiple uses. This typically arises
5534 // from unreachable basic blocks.
5535 if (!CPInst->hasOneUse())
5536 return false;
5537
5538 // Check that there are no other instructions except for benign intrinsics.
5540 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5541 return false;
5542
5543 // If the cleanup return we are simplifying unwinds to the caller, this will
5544 // set UnwindDest to nullptr.
5545 BasicBlock *UnwindDest = RI->getUnwindDest();
5546
5547 // We're about to remove BB from the control flow. Before we do, sink any
5548 // PHINodes into the unwind destination. Doing this before changing the
5549 // control flow avoids some potentially slow checks, since we can currently
5550 // be certain that UnwindDest and BB have no common predecessors (since they
5551 // are both EH pads).
5552 if (UnwindDest) {
5553 // First, go through the PHI nodes in UnwindDest and update any nodes that
5554 // reference the block we are removing
5555 for (PHINode &DestPN : UnwindDest->phis()) {
5556 int Idx = DestPN.getBasicBlockIndex(BB);
5557 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5558 assert(Idx != -1);
5559 // This PHI node has an incoming value that corresponds to a control
5560 // path through the cleanup pad we are removing. If the incoming
5561 // value is in the cleanup pad, it must be a PHINode (because we
5562 // verified above that the block is otherwise empty). Otherwise, the
5563 // value is either a constant or a value that dominates the cleanup
5564 // pad being removed.
5565 //
5566 // Because BB and UnwindDest are both EH pads, all of their
5567 // predecessors must unwind to these blocks, and since no instruction
5568 // can have multiple unwind destinations, there will be no overlap in
5569 // incoming blocks between SrcPN and DestPN.
5570 Value *SrcVal = DestPN.getIncomingValue(Idx);
5571 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5572
5573 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5574 for (auto *Pred : predecessors(BB)) {
5575 Value *Incoming =
5576 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5577 DestPN.addIncoming(Incoming, Pred);
5578 }
5579 }
5580
5581 // Sink any remaining PHI nodes directly into UnwindDest.
5582 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5583 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5584 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5585 // If the PHI node has no uses or all of its uses are in this basic
5586 // block (meaning they are debug or lifetime intrinsics), just leave
5587 // it. It will be erased when we erase BB below.
5588 continue;
5589
5590 // Otherwise, sink this PHI node into UnwindDest.
5591 // Any predecessors to UnwindDest which are not already represented
5592 // must be back edges which inherit the value from the path through
5593 // BB. In this case, the PHI value must reference itself.
5594 for (auto *pred : predecessors(UnwindDest))
5595 if (pred != BB)
5596 PN.addIncoming(&PN, pred);
5597 PN.moveBefore(InsertPt);
5598 // Also, add a dummy incoming value for the original BB itself,
5599 // so that the PHI is well-formed until we drop said predecessor.
5600 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5601 }
5602 }
5603
5604 std::vector<DominatorTree::UpdateType> Updates;
5605
5606 // We use make_early_inc_range here because we will remove all predecessors.
5608 if (UnwindDest == nullptr) {
5609 if (DTU) {
5610 DTU->applyUpdates(Updates);
5611 Updates.clear();
5612 }
5613 removeUnwindEdge(PredBB, DTU);
5614 ++NumInvokes;
5615 } else {
5616 BB->removePredecessor(PredBB);
5617 Instruction *TI = PredBB->getTerminator();
5618 TI->replaceUsesOfWith(BB, UnwindDest);
5619 if (DTU) {
5620 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5621 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5622 }
5623 }
5624 }
5625
5626 if (DTU)
5627 DTU->applyUpdates(Updates);
5628
5629 DeleteDeadBlock(BB, DTU);
5630
5631 return true;
5632}
5633
5634// Try to merge two cleanuppads together.
5636 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5637 // with.
5638 BasicBlock *UnwindDest = RI->getUnwindDest();
5639 if (!UnwindDest)
5640 return false;
5641
5642 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5643 // be safe to merge without code duplication.
5644 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5645 return false;
5646
5647 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5648 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5649 if (!SuccessorCleanupPad)
5650 return false;
5651
5652 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5653 // Replace any uses of the successor cleanupad with the predecessor pad
5654 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5655 // funclet bundle operands.
5656 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5657 // Remove the old cleanuppad.
5658 SuccessorCleanupPad->eraseFromParent();
5659 // Now, we simply replace the cleanupret with a branch to the unwind
5660 // destination.
5661 BranchInst::Create(UnwindDest, RI->getParent());
5662 RI->eraseFromParent();
5663
5664 return true;
5665}
5666
5667bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5668 // It is possible to transiantly have an undef cleanuppad operand because we
5669 // have deleted some, but not all, dead blocks.
5670 // Eventually, this block will be deleted.
5671 if (isa<UndefValue>(RI->getOperand(0)))
5672 return false;
5673
5674 if (mergeCleanupPad(RI))
5675 return true;
5676
5677 if (removeEmptyCleanup(RI, DTU))
5678 return true;
5679
5680 return false;
5681}
5682
5683// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5684bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5685 BasicBlock *BB = UI->getParent();
5686
5687 bool Changed = false;
5688
5689 // Ensure that any debug-info records that used to occur after the Unreachable
5690 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5691 // the block.
5693
5694 // Debug-info records on the unreachable inst itself should be deleted, as
5695 // below we delete everything past the final executable instruction.
5696 UI->dropDbgRecords();
5697
5698 // If there are any instructions immediately before the unreachable that can
5699 // be removed, do so.
5700 while (UI->getIterator() != BB->begin()) {
5702 --BBI;
5703
5705 break; // Can not drop any more instructions. We're done here.
5706 // Otherwise, this instruction can be freely erased,
5707 // even if it is not side-effect free.
5708
5709 // Note that deleting EH's here is in fact okay, although it involves a bit
5710 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5711 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5712 // and we can therefore guarantee this block will be erased.
5713
5714 // If we're deleting this, we're deleting any subsequent debug info, so
5715 // delete DbgRecords.
5716 BBI->dropDbgRecords();
5717
5718 // Delete this instruction (any uses are guaranteed to be dead)
5719 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5720 BBI->eraseFromParent();
5721 Changed = true;
5722 }
5723
5724 // If the unreachable instruction is the first in the block, take a gander
5725 // at all of the predecessors of this instruction, and simplify them.
5726 if (&BB->front() != UI)
5727 return Changed;
5728
5729 std::vector<DominatorTree::UpdateType> Updates;
5730
5731 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5732 for (BasicBlock *Predecessor : Preds) {
5733 Instruction *TI = Predecessor->getTerminator();
5734 IRBuilder<> Builder(TI);
5735 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5736 // We could either have a proper unconditional branch,
5737 // or a degenerate conditional branch with matching destinations.
5738 if (all_of(BI->successors(),
5739 [BB](auto *Successor) { return Successor == BB; })) {
5740 new UnreachableInst(TI->getContext(), TI->getIterator());
5741 TI->eraseFromParent();
5742 Changed = true;
5743 } else {
5744 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5745 Value* Cond = BI->getCondition();
5746 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5747 "The destinations are guaranteed to be different here.");
5748 CallInst *Assumption;
5749 if (BI->getSuccessor(0) == BB) {
5750 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5751 Builder.CreateBr(BI->getSuccessor(1));
5752 } else {
5753 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5754 Assumption = Builder.CreateAssumption(Cond);
5755 Builder.CreateBr(BI->getSuccessor(0));
5756 }
5757 if (Options.AC)
5758 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5759
5761 Changed = true;
5762 }
5763 if (DTU)
5764 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5765 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5766 SwitchInstProfUpdateWrapper SU(*SI);
5767 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5768 if (i->getCaseSuccessor() != BB) {
5769 ++i;
5770 continue;
5771 }
5772 BB->removePredecessor(SU->getParent());
5773 i = SU.removeCase(i);
5774 e = SU->case_end();
5775 Changed = true;
5776 }
5777 // Note that the default destination can't be removed!
5778 if (DTU && SI->getDefaultDest() != BB)
5779 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5780 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5781 if (II->getUnwindDest() == BB) {
5782 if (DTU) {
5783 DTU->applyUpdates(Updates);
5784 Updates.clear();
5785 }
5786 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5787 if (!CI->doesNotThrow())
5788 CI->setDoesNotThrow();
5789 Changed = true;
5790 }
5791 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5792 if (CSI->getUnwindDest() == BB) {
5793 if (DTU) {
5794 DTU->applyUpdates(Updates);
5795 Updates.clear();
5796 }
5797 removeUnwindEdge(TI->getParent(), DTU);
5798 Changed = true;
5799 continue;
5800 }
5801
5802 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5803 E = CSI->handler_end();
5804 I != E; ++I) {
5805 if (*I == BB) {
5806 CSI->removeHandler(I);
5807 --I;
5808 --E;
5809 Changed = true;
5810 }
5811 }
5812 if (DTU)
5813 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5814 if (CSI->getNumHandlers() == 0) {
5815 if (CSI->hasUnwindDest()) {
5816 // Redirect all predecessors of the block containing CatchSwitchInst
5817 // to instead branch to the CatchSwitchInst's unwind destination.
5818 if (DTU) {
5819 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5820 Updates.push_back({DominatorTree::Insert,
5821 PredecessorOfPredecessor,
5822 CSI->getUnwindDest()});
5823 Updates.push_back({DominatorTree::Delete,
5824 PredecessorOfPredecessor, Predecessor});
5825 }
5826 }
5827 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5828 } else {
5829 // Rewrite all preds to unwind to caller (or from invoke to call).
5830 if (DTU) {
5831 DTU->applyUpdates(Updates);
5832 Updates.clear();
5833 }
5834 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5835 for (BasicBlock *EHPred : EHPreds)
5836 removeUnwindEdge(EHPred, DTU);
5837 }
5838 // The catchswitch is no longer reachable.
5839 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5840 CSI->eraseFromParent();
5841 Changed = true;
5842 }
5843 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5844 (void)CRI;
5845 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5846 "Expected to always have an unwind to BB.");
5847 if (DTU)
5848 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5849 new UnreachableInst(TI->getContext(), TI->getIterator());
5850 TI->eraseFromParent();
5851 Changed = true;
5852 }
5853 }
5854
5855 if (DTU)
5856 DTU->applyUpdates(Updates);
5857
5858 // If this block is now dead, remove it.
5859 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5860 DeleteDeadBlock(BB, DTU);
5861 return true;
5862 }
5863
5864 return Changed;
5865}
5866
5875
5876static std::optional<ContiguousCasesResult>
5879 BasicBlock *Dest, BasicBlock *OtherDest) {
5880 assert(Cases.size() >= 1);
5881
5883 const APInt &Min = Cases.back()->getValue();
5884 const APInt &Max = Cases.front()->getValue();
5885 APInt Offset = Max - Min;
5886 size_t ContiguousOffset = Cases.size() - 1;
5887 if (Offset == ContiguousOffset) {
5888 return ContiguousCasesResult{
5889 /*Min=*/Cases.back(),
5890 /*Max=*/Cases.front(),
5891 /*Dest=*/Dest,
5892 /*OtherDest=*/OtherDest,
5893 /*Cases=*/&Cases,
5894 /*OtherCases=*/&OtherCases,
5895 };
5896 }
5897 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5898 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5899 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5900 // contiguous range for the other destination. N.B. If CR is not a full range,
5901 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5902 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5903 assert(Cases.size() >= 2);
5904 auto *It =
5905 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5906 return L->getValue() != R->getValue() + 1;
5907 });
5908 if (It == Cases.end())
5909 return std::nullopt;
5910 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5911 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5912 Cases.size() - 2) {
5913 return ContiguousCasesResult{
5914 /*Min=*/cast<ConstantInt>(
5915 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5916 /*Max=*/
5918 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5919 /*Dest=*/OtherDest,
5920 /*OtherDest=*/Dest,
5921 /*Cases=*/&OtherCases,
5922 /*OtherCases=*/&Cases,
5923 };
5924 }
5925 }
5926 return std::nullopt;
5927}
5928
5930 DomTreeUpdater *DTU,
5931 bool RemoveOrigDefaultBlock = true) {
5932 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5933 auto *BB = Switch->getParent();
5934 auto *OrigDefaultBlock = Switch->getDefaultDest();
5935 if (RemoveOrigDefaultBlock)
5936 OrigDefaultBlock->removePredecessor(BB);
5937 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5938 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5939 OrigDefaultBlock);
5940 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5942 Switch->setDefaultDest(&*NewDefaultBlock);
5943 if (DTU) {
5945 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5946 if (RemoveOrigDefaultBlock &&
5947 !is_contained(successors(BB), OrigDefaultBlock))
5948 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5949 DTU->applyUpdates(Updates);
5950 }
5951}
5952
5953/// Turn a switch into an integer range comparison and branch.
5954/// Switches with more than 2 destinations are ignored.
5955/// Switches with 1 destination are also ignored.
5956bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5957 IRBuilder<> &Builder) {
5958 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5959
5960 bool HasDefault = !SI->defaultDestUnreachable();
5961
5962 auto *BB = SI->getParent();
5963 // Partition the cases into two sets with different destinations.
5964 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5965 BasicBlock *DestB = nullptr;
5968
5969 for (auto Case : SI->cases()) {
5970 BasicBlock *Dest = Case.getCaseSuccessor();
5971 if (!DestA)
5972 DestA = Dest;
5973 if (Dest == DestA) {
5974 CasesA.push_back(Case.getCaseValue());
5975 continue;
5976 }
5977 if (!DestB)
5978 DestB = Dest;
5979 if (Dest == DestB) {
5980 CasesB.push_back(Case.getCaseValue());
5981 continue;
5982 }
5983 return false; // More than two destinations.
5984 }
5985 if (!DestB)
5986 return false; // All destinations are the same and the default is unreachable
5987
5988 assert(DestA && DestB &&
5989 "Single-destination switch should have been folded.");
5990 assert(DestA != DestB);
5991 assert(DestB != SI->getDefaultDest());
5992 assert(!CasesB.empty() && "There must be non-default cases.");
5993 assert(!CasesA.empty() || HasDefault);
5994
5995 // Figure out if one of the sets of cases form a contiguous range.
5996 std::optional<ContiguousCasesResult> ContiguousCases;
5997
5998 // Only one icmp is needed when there is only one case.
5999 if (!HasDefault && CasesA.size() == 1)
6000 ContiguousCases = ContiguousCasesResult{
6001 /*Min=*/CasesA[0],
6002 /*Max=*/CasesA[0],
6003 /*Dest=*/DestA,
6004 /*OtherDest=*/DestB,
6005 /*Cases=*/&CasesA,
6006 /*OtherCases=*/&CasesB,
6007 };
6008 else if (CasesB.size() == 1)
6009 ContiguousCases = ContiguousCasesResult{
6010 /*Min=*/CasesB[0],
6011 /*Max=*/CasesB[0],
6012 /*Dest=*/DestB,
6013 /*OtherDest=*/DestA,
6014 /*Cases=*/&CasesB,
6015 /*OtherCases=*/&CasesA,
6016 };
6017 // Correctness: Cases to the default destination cannot be contiguous cases.
6018 else if (!HasDefault)
6019 ContiguousCases =
6020 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6021
6022 if (!ContiguousCases)
6023 ContiguousCases =
6024 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6025
6026 if (!ContiguousCases)
6027 return false;
6028
6029 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6030
6031 // Start building the compare and branch.
6032
6034 Constant *NumCases = ConstantInt::get(Offset->getType(),
6035 Max->getValue() - Min->getValue() + 1);
6036 BranchInst *NewBI;
6037 if (NumCases->isOneValue()) {
6038 assert(Max->getValue() == Min->getValue());
6039 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6040 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6041 }
6042 // If NumCases overflowed, then all possible values jump to the successor.
6043 else if (NumCases->isNullValue() && !Cases->empty()) {
6044 NewBI = Builder.CreateBr(Dest);
6045 } else {
6046 Value *Sub = SI->getCondition();
6047 if (!Offset->isNullValue())
6048 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6049 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6050 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6051 }
6052
6053 // Update weight for the newly-created conditional branch.
6054 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
6055 SmallVector<uint64_t, 8> Weights;
6056 getBranchWeights(SI, Weights);
6057 if (Weights.size() == 1 + SI->getNumCases()) {
6058 uint64_t TrueWeight = 0;
6059 uint64_t FalseWeight = 0;
6060 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6061 if (SI->getSuccessor(I) == Dest)
6062 TrueWeight += Weights[I];
6063 else
6064 FalseWeight += Weights[I];
6065 }
6066 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6067 TrueWeight /= 2;
6068 FalseWeight /= 2;
6069 }
6070 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6071 /*IsExpected=*/false, /*ElideAllZero=*/true);
6072 }
6073 }
6074
6075 // Prune obsolete incoming values off the successors' PHI nodes.
6076 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6077 unsigned PreviousEdges = Cases->size();
6078 if (Dest == SI->getDefaultDest())
6079 ++PreviousEdges;
6080 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6081 PHI.removeIncomingValue(SI->getParent());
6082 }
6083 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6084 unsigned PreviousEdges = OtherCases->size();
6085 if (OtherDest == SI->getDefaultDest())
6086 ++PreviousEdges;
6087 unsigned E = PreviousEdges - 1;
6088 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6089 if (NewBI->isUnconditional())
6090 ++E;
6091 for (unsigned I = 0; I != E; ++I)
6092 PHI.removeIncomingValue(SI->getParent());
6093 }
6094
6095 // Clean up the default block - it may have phis or other instructions before
6096 // the unreachable terminator.
6097 if (!HasDefault)
6099
6100 auto *UnreachableDefault = SI->getDefaultDest();
6101
6102 // Drop the switch.
6103 SI->eraseFromParent();
6104
6105 if (!HasDefault && DTU)
6106 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6107
6108 return true;
6109}
6110
6111/// Compute masked bits for the condition of a switch
6112/// and use it to remove dead cases.
6114 AssumptionCache *AC,
6115 const DataLayout &DL) {
6116 Value *Cond = SI->getCondition();
6117 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6119 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6120
6121 // We can also eliminate cases by determining that their values are outside of
6122 // the limited range of the condition based on how many significant (non-sign)
6123 // bits are in the condition value.
6124 unsigned MaxSignificantBitsInCond =
6126
6127 // Gather dead cases.
6129 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6130 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6131 for (const auto &Case : SI->cases()) {
6132 auto *Successor = Case.getCaseSuccessor();
6133 if (DTU) {
6134 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6135 if (Inserted)
6136 UniqueSuccessors.push_back(Successor);
6137 ++It->second;
6138 }
6139 ConstantInt *CaseC = Case.getCaseValue();
6140 const APInt &CaseVal = CaseC->getValue();
6141 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6142 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6143 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6144 DeadCases.push_back(CaseC);
6145 if (DTU)
6146 --NumPerSuccessorCases[Successor];
6147 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6148 << " is dead.\n");
6149 } else if (IsKnownValuesValid)
6150 KnownValues.erase(CaseC);
6151 }
6152
6153 // If we can prove that the cases must cover all possible values, the
6154 // default destination becomes dead and we can remove it. If we know some
6155 // of the bits in the value, we can use that to more precisely compute the
6156 // number of possible unique case values.
6157 bool HasDefault = !SI->defaultDestUnreachable();
6158 const unsigned NumUnknownBits =
6159 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6160 assert(NumUnknownBits <= Known.getBitWidth());
6161 if (HasDefault && DeadCases.empty()) {
6162 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6164 return true;
6165 }
6166
6167 if (NumUnknownBits < 64 /* avoid overflow */) {
6168 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6169 if (SI->getNumCases() == AllNumCases) {
6171 return true;
6172 }
6173 // When only one case value is missing, replace default with that case.
6174 // Eliminating the default branch will provide more opportunities for
6175 // optimization, such as lookup tables.
6176 if (SI->getNumCases() == AllNumCases - 1) {
6177 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6178 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6179 if (CondTy->getIntegerBitWidth() > 64 ||
6180 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6181 return false;
6182
6183 uint64_t MissingCaseVal = 0;
6184 for (const auto &Case : SI->cases())
6185 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6186 auto *MissingCase = cast<ConstantInt>(
6187 ConstantInt::get(Cond->getType(), MissingCaseVal));
6189 SIW.addCase(MissingCase, SI->getDefaultDest(),
6190 SIW.getSuccessorWeight(0));
6192 /*RemoveOrigDefaultBlock*/ false);
6193 SIW.setSuccessorWeight(0, 0);
6194 return true;
6195 }
6196 }
6197 }
6198
6199 if (DeadCases.empty())
6200 return false;
6201
6203 for (ConstantInt *DeadCase : DeadCases) {
6204 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6205 assert(CaseI != SI->case_default() &&
6206 "Case was not found. Probably mistake in DeadCases forming.");
6207 // Prune unused values from PHI nodes.
6208 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6209 SIW.removeCase(CaseI);
6210 }
6211
6212 if (DTU) {
6213 std::vector<DominatorTree::UpdateType> Updates;
6214 for (auto *Successor : UniqueSuccessors)
6215 if (NumPerSuccessorCases[Successor] == 0)
6216 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6217 DTU->applyUpdates(Updates);
6218 }
6219
6220 return true;
6221}
6222
6223/// If BB would be eligible for simplification by
6224/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6225/// by an unconditional branch), look at the phi node for BB in the successor
6226/// block and see if the incoming value is equal to CaseValue. If so, return
6227/// the phi node, and set PhiIndex to BB's index in the phi node.
6229 BasicBlock *BB, int *PhiIndex) {
6230 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6231 return nullptr; // BB must be empty to be a candidate for simplification.
6232 if (!BB->getSinglePredecessor())
6233 return nullptr; // BB must be dominated by the switch.
6234
6236 if (!Branch || !Branch->isUnconditional())
6237 return nullptr; // Terminator must be unconditional branch.
6238
6239 BasicBlock *Succ = Branch->getSuccessor(0);
6240
6241 for (PHINode &PHI : Succ->phis()) {
6242 int Idx = PHI.getBasicBlockIndex(BB);
6243 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6244
6245 Value *InValue = PHI.getIncomingValue(Idx);
6246 if (InValue != CaseValue)
6247 continue;
6248
6249 *PhiIndex = Idx;
6250 return &PHI;
6251 }
6252
6253 return nullptr;
6254}
6255
6256/// Try to forward the condition of a switch instruction to a phi node
6257/// dominated by the switch, if that would mean that some of the destination
6258/// blocks of the switch can be folded away. Return true if a change is made.
6260 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6261
6262 ForwardingNodesMap ForwardingNodes;
6263 BasicBlock *SwitchBlock = SI->getParent();
6264 bool Changed = false;
6265 for (const auto &Case : SI->cases()) {
6266 ConstantInt *CaseValue = Case.getCaseValue();
6267 BasicBlock *CaseDest = Case.getCaseSuccessor();
6268
6269 // Replace phi operands in successor blocks that are using the constant case
6270 // value rather than the switch condition variable:
6271 // switchbb:
6272 // switch i32 %x, label %default [
6273 // i32 17, label %succ
6274 // ...
6275 // succ:
6276 // %r = phi i32 ... [ 17, %switchbb ] ...
6277 // -->
6278 // %r = phi i32 ... [ %x, %switchbb ] ...
6279
6280 for (PHINode &Phi : CaseDest->phis()) {
6281 // This only works if there is exactly 1 incoming edge from the switch to
6282 // a phi. If there is >1, that means multiple cases of the switch map to 1
6283 // value in the phi, and that phi value is not the switch condition. Thus,
6284 // this transform would not make sense (the phi would be invalid because
6285 // a phi can't have different incoming values from the same block).
6286 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6287 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6288 count(Phi.blocks(), SwitchBlock) == 1) {
6289 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6290 Changed = true;
6291 }
6292 }
6293
6294 // Collect phi nodes that are indirectly using this switch's case constants.
6295 int PhiIdx;
6296 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6297 ForwardingNodes[Phi].push_back(PhiIdx);
6298 }
6299
6300 for (auto &ForwardingNode : ForwardingNodes) {
6301 PHINode *Phi = ForwardingNode.first;
6302 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6303 // Check if it helps to fold PHI.
6304 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6305 continue;
6306
6307 for (int Index : Indexes)
6308 Phi->setIncomingValue(Index, SI->getCondition());
6309 Changed = true;
6310 }
6311
6312 return Changed;
6313}
6314
6315/// Return true if the backend will be able to handle
6316/// initializing an array of constants like C.
6318 if (C->isThreadDependent())
6319 return false;
6320 if (C->isDLLImportDependent())
6321 return false;
6322
6323 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6326 return false;
6327
6329 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6330 // materializing the array of constants.
6331 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6332 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6333 return false;
6334 }
6335
6336 if (!TTI.shouldBuildLookupTablesForConstant(C))
6337 return false;
6338
6339 return true;
6340}
6341
6342/// If V is a Constant, return it. Otherwise, try to look up
6343/// its constant value in ConstantPool, returning 0 if it's not there.
6344static Constant *
6347 if (Constant *C = dyn_cast<Constant>(V))
6348 return C;
6349 return ConstantPool.lookup(V);
6350}
6351
6352/// Try to fold instruction I into a constant. This works for
6353/// simple instructions such as binary operations where both operands are
6354/// constant or can be replaced by constants from the ConstantPool. Returns the
6355/// resulting constant on success, 0 otherwise.
6356static Constant *
6360 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6361 if (!A)
6362 return nullptr;
6363 if (A->isAllOnesValue())
6364 return lookupConstant(Select->getTrueValue(), ConstantPool);
6365 if (A->isNullValue())
6366 return lookupConstant(Select->getFalseValue(), ConstantPool);
6367 return nullptr;
6368 }
6369
6371 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6372 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6373 COps.push_back(A);
6374 else
6375 return nullptr;
6376 }
6377
6378 return ConstantFoldInstOperands(I, COps, DL);
6379}
6380
6381/// Try to determine the resulting constant values in phi nodes
6382/// at the common destination basic block, *CommonDest, for one of the case
6383/// destionations CaseDest corresponding to value CaseVal (0 for the default
6384/// case), of a switch instruction SI.
6385static bool
6387 BasicBlock **CommonDest,
6388 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6389 const DataLayout &DL, const TargetTransformInfo &TTI) {
6390 // The block from which we enter the common destination.
6391 BasicBlock *Pred = SI->getParent();
6392
6393 // If CaseDest is empty except for some side-effect free instructions through
6394 // which we can constant-propagate the CaseVal, continue to its successor.
6396 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6397 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6398 if (I.isTerminator()) {
6399 // If the terminator is a simple branch, continue to the next block.
6400 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6401 return false;
6402 Pred = CaseDest;
6403 CaseDest = I.getSuccessor(0);
6404 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6405 // Instruction is side-effect free and constant.
6406
6407 // If the instruction has uses outside this block or a phi node slot for
6408 // the block, it is not safe to bypass the instruction since it would then
6409 // no longer dominate all its uses.
6410 for (auto &Use : I.uses()) {
6411 User *User = Use.getUser();
6413 if (I->getParent() == CaseDest)
6414 continue;
6415 if (PHINode *Phi = dyn_cast<PHINode>(User))
6416 if (Phi->getIncomingBlock(Use) == CaseDest)
6417 continue;
6418 return false;
6419 }
6420
6421 ConstantPool.insert(std::make_pair(&I, C));
6422 } else {
6423 break;
6424 }
6425 }
6426
6427 // If we did not have a CommonDest before, use the current one.
6428 if (!*CommonDest)
6429 *CommonDest = CaseDest;
6430 // If the destination isn't the common one, abort.
6431 if (CaseDest != *CommonDest)
6432 return false;
6433
6434 // Get the values for this case from phi nodes in the destination block.
6435 for (PHINode &PHI : (*CommonDest)->phis()) {
6436 int Idx = PHI.getBasicBlockIndex(Pred);
6437 if (Idx == -1)
6438 continue;
6439
6440 Constant *ConstVal =
6441 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6442 if (!ConstVal)
6443 return false;
6444
6445 // Be conservative about which kinds of constants we support.
6446 if (!validLookupTableConstant(ConstVal, TTI))
6447 return false;
6448
6449 Res.push_back(std::make_pair(&PHI, ConstVal));
6450 }
6451
6452 return Res.size() > 0;
6453}
6454
6455// Helper function used to add CaseVal to the list of cases that generate
6456// Result. Returns the updated number of cases that generate this result.
6457static size_t mapCaseToResult(ConstantInt *CaseVal,
6458 SwitchCaseResultVectorTy &UniqueResults,
6459 Constant *Result) {
6460 for (auto &I : UniqueResults) {
6461 if (I.first == Result) {
6462 I.second.push_back(CaseVal);
6463 return I.second.size();
6464 }
6465 }
6466 UniqueResults.push_back(
6467 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6468 return 1;
6469}
6470
6471// Helper function that initializes a map containing
6472// results for the PHI node of the common destination block for a switch
6473// instruction. Returns false if multiple PHI nodes have been found or if
6474// there is not a common destination block for the switch.
6476 BasicBlock *&CommonDest,
6477 SwitchCaseResultVectorTy &UniqueResults,
6478 Constant *&DefaultResult,
6479 const DataLayout &DL,
6480 const TargetTransformInfo &TTI,
6481 uintptr_t MaxUniqueResults) {
6482 for (const auto &I : SI->cases()) {
6483 ConstantInt *CaseVal = I.getCaseValue();
6484
6485 // Resulting value at phi nodes for this case value.
6486 SwitchCaseResultsTy Results;
6487 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6488 DL, TTI))
6489 return false;
6490
6491 // Only one value per case is permitted.
6492 if (Results.size() > 1)
6493 return false;
6494
6495 // Add the case->result mapping to UniqueResults.
6496 const size_t NumCasesForResult =
6497 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6498
6499 // Early out if there are too many cases for this result.
6500 if (NumCasesForResult > MaxSwitchCasesPerResult)
6501 return false;
6502
6503 // Early out if there are too many unique results.
6504 if (UniqueResults.size() > MaxUniqueResults)
6505 return false;
6506
6507 // Check the PHI consistency.
6508 if (!PHI)
6509 PHI = Results[0].first;
6510 else if (PHI != Results[0].first)
6511 return false;
6512 }
6513 // Find the default result value.
6515 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6516 DL, TTI);
6517 // If the default value is not found abort unless the default destination
6518 // is unreachable.
6519 DefaultResult =
6520 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6521
6522 return DefaultResult || SI->defaultDestUnreachable();
6523}
6524
6525// Helper function that checks if it is possible to transform a switch with only
6526// two cases (or two cases + default) that produces a result into a select.
6527// TODO: Handle switches with more than 2 cases that map to the same result.
6528// The branch weights correspond to the provided Condition (i.e. if Condition is
6529// modified from the original SwitchInst, the caller must adjust the weights)
6530static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6531 Constant *DefaultResult, Value *Condition,
6532 IRBuilder<> &Builder, const DataLayout &DL,
6533 ArrayRef<uint32_t> BranchWeights) {
6534 // If we are selecting between only two cases transform into a simple
6535 // select or a two-way select if default is possible.
6536 // Example:
6537 // switch (a) { %0 = icmp eq i32 %a, 10
6538 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6539 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6540 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6541 // }
6542
6543 const bool HasBranchWeights =
6544 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6545
6546 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6547 ResultVector[1].second.size() == 1) {
6548 ConstantInt *FirstCase = ResultVector[0].second[0];
6549 ConstantInt *SecondCase = ResultVector[1].second[0];
6550 Value *SelectValue = ResultVector[1].first;
6551 if (DefaultResult) {
6552 Value *ValueCompare =
6553 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6554 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6555 DefaultResult, "switch.select");
6556 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6557 SI && HasBranchWeights) {
6558 // We start with 3 probabilities, where the numerator is the
6559 // corresponding BranchWeights[i], and the denominator is the sum over
6560 // BranchWeights. We want the probability and negative probability of
6561 // Condition == SecondCase.
6562 assert(BranchWeights.size() == 3);
6564 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6565 /*IsExpected=*/false, /*ElideAllZero=*/true);
6566 }
6567 }
6568 Value *ValueCompare =
6569 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6570 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6571 SelectValue, "switch.select");
6572 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6573 // We may have had a DefaultResult. Base the position of the first and
6574 // second's branch weights accordingly. Also the proability that Condition
6575 // != FirstCase needs to take that into account.
6576 assert(BranchWeights.size() >= 2);
6577 size_t FirstCasePos = (Condition != nullptr);
6578 size_t SecondCasePos = FirstCasePos + 1;
6579 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6581 {BranchWeights[FirstCasePos],
6582 DefaultCase + BranchWeights[SecondCasePos]},
6583 /*IsExpected=*/false, /*ElideAllZero=*/true);
6584 }
6585 return Ret;
6586 }
6587
6588 // Handle the degenerate case where two cases have the same result value.
6589 if (ResultVector.size() == 1 && DefaultResult) {
6590 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6591 unsigned CaseCount = CaseValues.size();
6592 // n bits group cases map to the same result:
6593 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6594 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6595 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6596 if (isPowerOf2_32(CaseCount)) {
6597 ConstantInt *MinCaseVal = CaseValues[0];
6598 // If there are bits that are set exclusively by CaseValues, we
6599 // can transform the switch into a select if the conjunction of
6600 // all the values uniquely identify CaseValues.
6601 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6602
6603 // Find the minimum value and compute the and of all the case values.
6604 for (auto *Case : CaseValues) {
6605 if (Case->getValue().slt(MinCaseVal->getValue()))
6606 MinCaseVal = Case;
6607 AndMask &= Case->getValue();
6608 }
6609 KnownBits Known = computeKnownBits(Condition, DL);
6610
6611 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6612 // Compute the number of bits that are free to vary.
6613 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6614
6615 // Check if the number of values covered by the mask is equal
6616 // to the number of cases.
6617 if (FreeBits == Log2_32(CaseCount)) {
6618 Value *And = Builder.CreateAnd(Condition, AndMask);
6619 Value *Cmp = Builder.CreateICmpEQ(
6620 And, Constant::getIntegerValue(And->getType(), AndMask));
6621 Value *Ret =
6622 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6623 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6624 // We know there's a Default case. We base the resulting branch
6625 // weights off its probability.
6626 assert(BranchWeights.size() >= 2);
6628 *SI,
6629 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6630 /*IsExpected=*/false, /*ElideAllZero=*/true);
6631 }
6632 return Ret;
6633 }
6634 }
6635
6636 // Mark the bits case number touched.
6637 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6638 for (auto *Case : CaseValues)
6639 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6640
6641 // Check if cases with the same result can cover all number
6642 // in touched bits.
6643 if (BitMask.popcount() == Log2_32(CaseCount)) {
6644 if (!MinCaseVal->isNullValue())
6645 Condition = Builder.CreateSub(Condition, MinCaseVal);
6646 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6647 Value *Cmp = Builder.CreateICmpEQ(
6648 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6649 Value *Ret =
6650 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6651 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6652 assert(BranchWeights.size() >= 2);
6654 *SI,
6655 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6656 /*IsExpected=*/false, /*ElideAllZero=*/true);
6657 }
6658 return Ret;
6659 }
6660 }
6661
6662 // Handle the degenerate case where two cases have the same value.
6663 if (CaseValues.size() == 2) {
6664 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6665 "switch.selectcmp.case1");
6666 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6667 "switch.selectcmp.case2");
6668 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6669 Value *Ret =
6670 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6671 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6672 assert(BranchWeights.size() >= 2);
6674 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6675 /*IsExpected=*/false, /*ElideAllZero=*/true);
6676 }
6677 return Ret;
6678 }
6679 }
6680
6681 return nullptr;
6682}
6683
6684// Helper function to cleanup a switch instruction that has been converted into
6685// a select, fixing up PHI nodes and basic blocks.
6687 Value *SelectValue,
6688 IRBuilder<> &Builder,
6689 DomTreeUpdater *DTU) {
6690 std::vector<DominatorTree::UpdateType> Updates;
6691
6692 BasicBlock *SelectBB = SI->getParent();
6693 BasicBlock *DestBB = PHI->getParent();
6694
6695 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6696 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6697 Builder.CreateBr(DestBB);
6698
6699 // Remove the switch.
6700
6701 PHI->removeIncomingValueIf(
6702 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6703 PHI->addIncoming(SelectValue, SelectBB);
6704
6705 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6706 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6707 BasicBlock *Succ = SI->getSuccessor(i);
6708
6709 if (Succ == DestBB)
6710 continue;
6711 Succ->removePredecessor(SelectBB);
6712 if (DTU && RemovedSuccessors.insert(Succ).second)
6713 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6714 }
6715 SI->eraseFromParent();
6716 if (DTU)
6717 DTU->applyUpdates(Updates);
6718}
6719
6720/// If a switch is only used to initialize one or more phi nodes in a common
6721/// successor block with only two different constant values, try to replace the
6722/// switch with a select. Returns true if the fold was made.
6724 DomTreeUpdater *DTU, const DataLayout &DL,
6725 const TargetTransformInfo &TTI) {
6726 Value *const Cond = SI->getCondition();
6727 PHINode *PHI = nullptr;
6728 BasicBlock *CommonDest = nullptr;
6729 Constant *DefaultResult;
6730 SwitchCaseResultVectorTy UniqueResults;
6731 // Collect all the cases that will deliver the same value from the switch.
6732 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6733 DL, TTI, /*MaxUniqueResults*/ 2))
6734 return false;
6735
6736 assert(PHI != nullptr && "PHI for value select not found");
6737 Builder.SetInsertPoint(SI);
6738 SmallVector<uint32_t, 4> BranchWeights;
6740 [[maybe_unused]] auto HasWeights =
6742 assert(!HasWeights == (BranchWeights.empty()));
6743 }
6744 assert(BranchWeights.empty() ||
6745 (BranchWeights.size() >=
6746 UniqueResults.size() + (DefaultResult != nullptr)));
6747
6748 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6749 Builder, DL, BranchWeights);
6750 if (!SelectValue)
6751 return false;
6752
6753 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6754 return true;
6755}
6756
6757namespace {
6758
6759/// This class finds alternatives for switches to ultimately
6760/// replace the switch.
6761class SwitchReplacement {
6762public:
6763 /// Create a helper for optimizations to use as a switch replacement.
6764 /// Find a better representation for the content of Values,
6765 /// using DefaultValue to fill any holes in the table.
6766 SwitchReplacement(
6767 Module &M, uint64_t TableSize, ConstantInt *Offset,
6768 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6769 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6770
6771 /// Build instructions with Builder to retrieve values using Index
6772 /// and replace the switch.
6773 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6774 Function *Func);
6775
6776 /// Return true if a table with TableSize elements of
6777 /// type ElementType would fit in a target-legal register.
6778 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6779 Type *ElementType);
6780
6781 /// Return the default value of the switch.
6782 Constant *getDefaultValue();
6783
6784 /// Return true if the replacement is a lookup table.
6785 bool isLookupTable();
6786
6787 /// Return true if the replacement is a bit map.
6788 bool isBitMap();
6789
6790private:
6791 // Depending on the switch, there are different alternatives.
6792 enum {
6793 // For switches where each case contains the same value, we just have to
6794 // store that single value and return it for each lookup.
6795 SingleValueKind,
6796
6797 // For switches where there is a linear relationship between table index
6798 // and values. We calculate the result with a simple multiplication
6799 // and addition instead of a table lookup.
6800 LinearMapKind,
6801
6802 // For small tables with integer elements, we can pack them into a bitmap
6803 // that fits into a target-legal register. Values are retrieved by
6804 // shift and mask operations.
6805 BitMapKind,
6806
6807 // The table is stored as an array of values. Values are retrieved by load
6808 // instructions from the table.
6809 LookupTableKind
6810 } Kind;
6811
6812 // The default value of the switch.
6813 Constant *DefaultValue;
6814
6815 // The type of the output values.
6816 Type *ValueType;
6817
6818 // For SingleValueKind, this is the single value.
6819 Constant *SingleValue = nullptr;
6820
6821 // For BitMapKind, this is the bitmap.
6822 ConstantInt *BitMap = nullptr;
6823 IntegerType *BitMapElementTy = nullptr;
6824
6825 // For LinearMapKind, these are the constants used to derive the value.
6826 ConstantInt *LinearOffset = nullptr;
6827 ConstantInt *LinearMultiplier = nullptr;
6828 bool LinearMapValWrapped = false;
6829
6830 // For LookupTableKind, this is the table.
6831 Constant *Initializer = nullptr;
6832};
6833
6834} // end anonymous namespace
6835
6836SwitchReplacement::SwitchReplacement(
6837 Module &M, uint64_t TableSize, ConstantInt *Offset,
6838 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6839 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6840 : DefaultValue(DefaultValue) {
6841 assert(Values.size() && "Can't build lookup table without values!");
6842 assert(TableSize >= Values.size() && "Can't fit values in table!");
6843
6844 // If all values in the table are equal, this is that value.
6845 SingleValue = Values.begin()->second;
6846
6847 ValueType = Values.begin()->second->getType();
6848
6849 // Build up the table contents.
6850 SmallVector<Constant *, 64> TableContents(TableSize);
6851 for (const auto &[CaseVal, CaseRes] : Values) {
6852 assert(CaseRes->getType() == ValueType);
6853
6854 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6855 TableContents[Idx] = CaseRes;
6856
6857 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6858 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6859 }
6860
6861 // Fill in any holes in the table with the default result.
6862 if (Values.size() < TableSize) {
6863 assert(DefaultValue &&
6864 "Need a default value to fill the lookup table holes.");
6865 assert(DefaultValue->getType() == ValueType);
6866 for (uint64_t I = 0; I < TableSize; ++I) {
6867 if (!TableContents[I])
6868 TableContents[I] = DefaultValue;
6869 }
6870
6871 // If the default value is poison, all the holes are poison.
6872 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6873
6874 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6875 SingleValue = nullptr;
6876 }
6877
6878 // If each element in the table contains the same value, we only need to store
6879 // that single value.
6880 if (SingleValue) {
6881 Kind = SingleValueKind;
6882 return;
6883 }
6884
6885 // Check if we can derive the value with a linear transformation from the
6886 // table index.
6888 bool LinearMappingPossible = true;
6889 APInt PrevVal;
6890 APInt DistToPrev;
6891 // When linear map is monotonic and signed overflow doesn't happen on
6892 // maximum index, we can attach nsw on Add and Mul.
6893 bool NonMonotonic = false;
6894 assert(TableSize >= 2 && "Should be a SingleValue table.");
6895 // Check if there is the same distance between two consecutive values.
6896 for (uint64_t I = 0; I < TableSize; ++I) {
6897 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6898
6899 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6900 // This is an poison, so it's (probably) a lookup table hole.
6901 // To prevent any regressions from before we switched to using poison as
6902 // the default value, holes will fall back to using the first value.
6903 // This can be removed once we add proper handling for poisons in lookup
6904 // tables.
6905 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6906 }
6907
6908 if (!ConstVal) {
6909 // This is an undef. We could deal with it, but undefs in lookup tables
6910 // are very seldom. It's probably not worth the additional complexity.
6911 LinearMappingPossible = false;
6912 break;
6913 }
6914 const APInt &Val = ConstVal->getValue();
6915 if (I != 0) {
6916 APInt Dist = Val - PrevVal;
6917 if (I == 1) {
6918 DistToPrev = Dist;
6919 } else if (Dist != DistToPrev) {
6920 LinearMappingPossible = false;
6921 break;
6922 }
6923 NonMonotonic |=
6924 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6925 }
6926 PrevVal = Val;
6927 }
6928 if (LinearMappingPossible) {
6929 LinearOffset = cast<ConstantInt>(TableContents[0]);
6930 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6931 APInt M = LinearMultiplier->getValue();
6932 bool MayWrap = true;
6933 if (isIntN(M.getBitWidth(), TableSize - 1))
6934 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6935 LinearMapValWrapped = NonMonotonic || MayWrap;
6936 Kind = LinearMapKind;
6937 return;
6938 }
6939 }
6940
6941 // If the type is integer and the table fits in a register, build a bitmap.
6942 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6944 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6945 for (uint64_t I = TableSize; I > 0; --I) {
6946 TableInt <<= IT->getBitWidth();
6947 // Insert values into the bitmap. Undef values are set to zero.
6948 if (!isa<UndefValue>(TableContents[I - 1])) {
6949 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6950 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6951 }
6952 }
6953 BitMap = ConstantInt::get(M.getContext(), TableInt);
6954 BitMapElementTy = IT;
6955 Kind = BitMapKind;
6956 return;
6957 }
6958
6959 // Store the table in an array.
6960 auto *TableTy = ArrayType::get(ValueType, TableSize);
6961 Initializer = ConstantArray::get(TableTy, TableContents);
6962
6963 Kind = LookupTableKind;
6964}
6965
6966Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6967 const DataLayout &DL, Function *Func) {
6968 switch (Kind) {
6969 case SingleValueKind:
6970 return SingleValue;
6971 case LinearMapKind: {
6972 ++NumLinearMaps;
6973 // Derive the result value from the input value.
6974 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6975 false, "switch.idx.cast");
6976 if (!LinearMultiplier->isOne())
6977 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6978 /*HasNUW = */ false,
6979 /*HasNSW = */ !LinearMapValWrapped);
6980
6981 if (!LinearOffset->isZero())
6982 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6983 /*HasNUW = */ false,
6984 /*HasNSW = */ !LinearMapValWrapped);
6985 return Result;
6986 }
6987 case BitMapKind: {
6988 ++NumBitMaps;
6989 // Type of the bitmap (e.g. i59).
6990 IntegerType *MapTy = BitMap->getIntegerType();
6991
6992 // Cast Index to the same type as the bitmap.
6993 // Note: The Index is <= the number of elements in the table, so
6994 // truncating it to the width of the bitmask is safe.
6995 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6996
6997 // Multiply the shift amount by the element width. NUW/NSW can always be
6998 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6999 // BitMap's bit width.
7000 ShiftAmt = Builder.CreateMul(
7001 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7002 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7003
7004 // Shift down.
7005 Value *DownShifted =
7006 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7007 // Mask off.
7008 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7009 }
7010 case LookupTableKind: {
7011 ++NumLookupTables;
7012 auto *Table =
7013 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7014 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7015 Initializer, "switch.table." + Func->getName());
7016 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7017 // Set the alignment to that of an array items. We will be only loading one
7018 // value out of it.
7019 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7020 Type *IndexTy = DL.getIndexType(Table->getType());
7021 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7022
7023 if (Index->getType() != IndexTy) {
7024 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7025 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7026 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7027 Zext->setNonNeg(
7028 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7029 }
7030
7031 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7032 Value *GEP =
7033 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7034 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7035 }
7036 }
7037 llvm_unreachable("Unknown helper kind!");
7038}
7039
7040bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7041 uint64_t TableSize,
7042 Type *ElementType) {
7043 auto *IT = dyn_cast<IntegerType>(ElementType);
7044 if (!IT)
7045 return false;
7046 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7047 // are <= 15, we could try to narrow the type.
7048
7049 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7050 if (TableSize >= UINT_MAX / IT->getBitWidth())
7051 return false;
7052 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7053}
7054
7056 const DataLayout &DL) {
7057 // Allow any legal type.
7058 if (TTI.isTypeLegal(Ty))
7059 return true;
7060
7061 auto *IT = dyn_cast<IntegerType>(Ty);
7062 if (!IT)
7063 return false;
7064
7065 // Also allow power of 2 integer types that have at least 8 bits and fit in
7066 // a register. These types are common in frontend languages and targets
7067 // usually support loads of these types.
7068 // TODO: We could relax this to any integer that fits in a register and rely
7069 // on ABI alignment and padding in the table to allow the load to be widened.
7070 // Or we could widen the constants and truncate the load.
7071 unsigned BitWidth = IT->getBitWidth();
7072 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7073 DL.fitsInLegalInteger(IT->getBitWidth());
7074}
7075
7076Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7077
7078bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7079
7080bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7081
7082static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7083 // 40% is the default density for building a jump table in optsize/minsize
7084 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7085 // function was based on.
7086 const uint64_t MinDensity = 40;
7087
7088 if (CaseRange >= UINT64_MAX / 100)
7089 return false; // Avoid multiplication overflows below.
7090
7091 return NumCases * 100 >= CaseRange * MinDensity;
7092}
7093
7095 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7096 uint64_t Range = Diff + 1;
7097 if (Range < Diff)
7098 return false; // Overflow.
7099
7100 return isSwitchDense(Values.size(), Range);
7101}
7102
7103/// Determine whether a lookup table should be built for this switch, based on
7104/// the number of cases, size of the table, and the types of the results.
7105// TODO: We could support larger than legal types by limiting based on the
7106// number of loads required and/or table size. If the constants are small we
7107// could use smaller table entries and extend after the load.
7109 const TargetTransformInfo &TTI,
7110 const DataLayout &DL,
7111 const SmallVector<Type *> &ResultTypes) {
7112 if (SI->getNumCases() > TableSize)
7113 return false; // TableSize overflowed.
7114
7115 bool AllTablesFitInRegister = true;
7116 bool HasIllegalType = false;
7117 for (const auto &Ty : ResultTypes) {
7118 // Saturate this flag to true.
7119 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7120
7121 // Saturate this flag to false.
7122 AllTablesFitInRegister =
7123 AllTablesFitInRegister &&
7124 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7125
7126 // If both flags saturate, we're done. NOTE: This *only* works with
7127 // saturating flags, and all flags have to saturate first due to the
7128 // non-deterministic behavior of iterating over a dense map.
7129 if (HasIllegalType && !AllTablesFitInRegister)
7130 break;
7131 }
7132
7133 // If each table would fit in a register, we should build it anyway.
7134 if (AllTablesFitInRegister)
7135 return true;
7136
7137 // Don't build a table that doesn't fit in-register if it has illegal types.
7138 if (HasIllegalType)
7139 return false;
7140
7141 return isSwitchDense(SI->getNumCases(), TableSize);
7142}
7143
7145 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7146 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7147 const DataLayout &DL, const TargetTransformInfo &TTI) {
7148 if (MinCaseVal.isNullValue())
7149 return true;
7150 if (MinCaseVal.isNegative() ||
7151 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7152 !HasDefaultResults)
7153 return false;
7154 return all_of(ResultTypes, [&](const auto &ResultType) {
7155 return SwitchReplacement::wouldFitInRegister(
7156 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7157 });
7158}
7159
7160/// Try to reuse the switch table index compare. Following pattern:
7161/// \code
7162/// if (idx < tablesize)
7163/// r = table[idx]; // table does not contain default_value
7164/// else
7165/// r = default_value;
7166/// if (r != default_value)
7167/// ...
7168/// \endcode
7169/// Is optimized to:
7170/// \code
7171/// cond = idx < tablesize;
7172/// if (cond)
7173/// r = table[idx];
7174/// else
7175/// r = default_value;
7176/// if (cond)
7177/// ...
7178/// \endcode
7179/// Jump threading will then eliminate the second if(cond).
7181 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7182 Constant *DefaultValue,
7183 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7185 if (!CmpInst)
7186 return;
7187
7188 // We require that the compare is in the same block as the phi so that jump
7189 // threading can do its work afterwards.
7190 if (CmpInst->getParent() != PhiBlock)
7191 return;
7192
7194 if (!CmpOp1)
7195 return;
7196
7197 Value *RangeCmp = RangeCheckBranch->getCondition();
7198 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7199 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7200
7201 // Check if the compare with the default value is constant true or false.
7202 const DataLayout &DL = PhiBlock->getDataLayout();
7204 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7205 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7206 return;
7207
7208 // Check if the compare with the case values is distinct from the default
7209 // compare result.
7210 for (auto ValuePair : Values) {
7212 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7213 if (!CaseConst || CaseConst == DefaultConst ||
7214 (CaseConst != TrueConst && CaseConst != FalseConst))
7215 return;
7216 }
7217
7218 // Check if the branch instruction dominates the phi node. It's a simple
7219 // dominance check, but sufficient for our needs.
7220 // Although this check is invariant in the calling loops, it's better to do it
7221 // at this late stage. Practically we do it at most once for a switch.
7222 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7223 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7224 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7225 return;
7226 }
7227
7228 if (DefaultConst == FalseConst) {
7229 // The compare yields the same result. We can replace it.
7230 CmpInst->replaceAllUsesWith(RangeCmp);
7231 ++NumTableCmpReuses;
7232 } else {
7233 // The compare yields the same result, just inverted. We can replace it.
7234 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7235 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7236 RangeCheckBranch->getIterator());
7237 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7238 ++NumTableCmpReuses;
7239 }
7240}
7241
7242/// If the switch is only used to initialize one or more phi nodes in a common
7243/// successor block with different constant values, replace the switch with
7244/// lookup tables.
7246 DomTreeUpdater *DTU, const DataLayout &DL,
7247 const TargetTransformInfo &TTI,
7248 bool ConvertSwitchToLookupTable) {
7249 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7250
7251 BasicBlock *BB = SI->getParent();
7252 Function *Fn = BB->getParent();
7253
7254 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7255 // split off a dense part and build a lookup table for that.
7256
7257 // FIXME: This creates arrays of GEPs to constant strings, which means each
7258 // GEP needs a runtime relocation in PIC code. We should just build one big
7259 // string and lookup indices into that.
7260
7261 // Ignore switches with less than three cases. Lookup tables will not make
7262 // them faster, so we don't analyze them.
7263 if (SI->getNumCases() < 3)
7264 return false;
7265
7266 // Figure out the corresponding result for each case value and phi node in the
7267 // common destination, as well as the min and max case values.
7268 assert(!SI->cases().empty());
7269 SwitchInst::CaseIt CI = SI->case_begin();
7270 ConstantInt *MinCaseVal = CI->getCaseValue();
7271 ConstantInt *MaxCaseVal = CI->getCaseValue();
7272
7273 BasicBlock *CommonDest = nullptr;
7274
7275 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7277
7279 SmallVector<Type *> ResultTypes;
7281
7282 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7283 ConstantInt *CaseVal = CI->getCaseValue();
7284 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7285 MinCaseVal = CaseVal;
7286 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7287 MaxCaseVal = CaseVal;
7288
7289 // Resulting value at phi nodes for this case value.
7291 ResultsTy Results;
7292 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7293 Results, DL, TTI))
7294 return false;
7295
7296 // Append the result and result types from this case to the list for each
7297 // phi.
7298 for (const auto &I : Results) {
7299 PHINode *PHI = I.first;
7300 Constant *Value = I.second;
7301 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7302 if (Inserted)
7303 PHIs.push_back(PHI);
7304 It->second.push_back(std::make_pair(CaseVal, Value));
7305 ResultTypes.push_back(PHI->getType());
7306 }
7307 }
7308
7309 // If the table has holes, we need a constant result for the default case
7310 // or a bitmask that fits in a register.
7311 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7312 bool HasDefaultResults =
7313 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7314 DefaultResultsList, DL, TTI);
7315 for (const auto &I : DefaultResultsList) {
7316 PHINode *PHI = I.first;
7317 Constant *Result = I.second;
7318 DefaultResults[PHI] = Result;
7319 }
7320
7321 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7322 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7323 uint64_t TableSize;
7324 ConstantInt *TableIndexOffset;
7325 if (UseSwitchConditionAsTableIndex) {
7326 TableSize = MaxCaseVal->getLimitedValue() + 1;
7327 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7328 } else {
7329 TableSize =
7330 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7331
7332 TableIndexOffset = MinCaseVal;
7333 }
7334
7335 // If the default destination is unreachable, or if the lookup table covers
7336 // all values of the conditional variable, branch directly to the lookup table
7337 // BB. Otherwise, check that the condition is within the case range.
7338 uint64_t NumResults = ResultLists[PHIs[0]].size();
7339 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7340
7341 bool TableHasHoles = (NumResults < TableSize);
7342
7343 // If the table has holes but the default destination doesn't produce any
7344 // constant results, the lookup table entries corresponding to the holes will
7345 // contain poison.
7346 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7347
7348 // If the default destination doesn't produce a constant result but is still
7349 // reachable, and the lookup table has holes, we need to use a mask to
7350 // determine if the current index should load from the lookup table or jump
7351 // to the default case.
7352 // The mask is unnecessary if the table has holes but the default destination
7353 // is unreachable, as in that case the holes must also be unreachable.
7354 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7355 if (NeedMask) {
7356 // As an extra penalty for the validity test we require more cases.
7357 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7358 return false;
7359 if (!DL.fitsInLegalInteger(TableSize))
7360 return false;
7361 }
7362
7363 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7364 return false;
7365
7366 // Compute the table index value.
7367 Value *TableIndex;
7368 if (UseSwitchConditionAsTableIndex) {
7369 TableIndex = SI->getCondition();
7370 if (HasDefaultResults) {
7371 // Grow the table to cover all possible index values to avoid the range
7372 // check. It will use the default result to fill in the table hole later,
7373 // so make sure it exist.
7374 ConstantRange CR =
7375 computeConstantRange(TableIndex, /* ForSigned */ false);
7376 // Grow the table shouldn't have any size impact by checking
7377 // wouldFitInRegister.
7378 // TODO: Consider growing the table also when it doesn't fit in a register
7379 // if no optsize is specified.
7380 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7381 if (!CR.isUpperWrapped() &&
7382 all_of(ResultTypes, [&](const auto &ResultType) {
7383 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7384 ResultType);
7385 })) {
7386 // There may be some case index larger than the UpperBound (unreachable
7387 // case), so make sure the table size does not get smaller.
7388 TableSize = std::max(UpperBound, TableSize);
7389 // The default branch is unreachable after we enlarge the lookup table.
7390 // Adjust DefaultIsReachable to reuse code path.
7391 DefaultIsReachable = false;
7392 }
7393 }
7394 }
7395
7396 // Keep track of the switch replacement for each phi
7398 for (PHINode *PHI : PHIs) {
7399 const auto &ResultList = ResultLists[PHI];
7400
7401 Type *ResultType = ResultList.begin()->second->getType();
7402 // Use any value to fill the lookup table holes.
7404 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7405 StringRef FuncName = Fn->getName();
7406 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7407 ResultList, DefaultVal, DL, FuncName);
7408 PhiToReplacementMap.insert({PHI, Replacement});
7409 }
7410
7411 bool AnyLookupTables = any_of(
7412 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7413 bool AnyBitMaps = any_of(PhiToReplacementMap,
7414 [](auto &KV) { return KV.second.isBitMap(); });
7415
7416 // A few conditions prevent the generation of lookup tables:
7417 // 1. The target does not support lookup tables.
7418 // 2. The "no-jump-tables" function attribute is set.
7419 // However, these objections do not apply to other switch replacements, like
7420 // the bitmap, so we only stop here if any of these conditions are met and we
7421 // want to create a LUT. Otherwise, continue with the switch replacement.
7422 if (AnyLookupTables &&
7423 (!TTI.shouldBuildLookupTables() ||
7424 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7425 return false;
7426
7427 // In the early optimization pipeline, disable formation of lookup tables,
7428 // bit maps and mask checks, as they may inhibit further optimization.
7429 if (!ConvertSwitchToLookupTable &&
7430 (AnyLookupTables || AnyBitMaps || NeedMask))
7431 return false;
7432
7433 Builder.SetInsertPoint(SI);
7434 // TableIndex is the switch condition - TableIndexOffset if we don't
7435 // use the condition directly
7436 if (!UseSwitchConditionAsTableIndex) {
7437 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7438 // we can try to attach nsw.
7439 bool MayWrap = true;
7440 if (!DefaultIsReachable) {
7441 APInt Res =
7442 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7443 (void)Res;
7444 }
7445 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7446 "switch.tableidx", /*HasNUW =*/false,
7447 /*HasNSW =*/!MayWrap);
7448 }
7449
7450 std::vector<DominatorTree::UpdateType> Updates;
7451
7452 // Compute the maximum table size representable by the integer type we are
7453 // switching upon.
7454 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7455 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7456 assert(MaxTableSize >= TableSize &&
7457 "It is impossible for a switch to have more entries than the max "
7458 "representable value of its input integer type's size.");
7459
7460 // Create the BB that does the lookups.
7461 Module &Mod = *CommonDest->getParent()->getParent();
7462 BasicBlock *LookupBB = BasicBlock::Create(
7463 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7464
7465 BranchInst *RangeCheckBranch = nullptr;
7466 BranchInst *CondBranch = nullptr;
7467
7468 Builder.SetInsertPoint(SI);
7469 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7470 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7471 Builder.CreateBr(LookupBB);
7472 if (DTU)
7473 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7474 // Note: We call removeProdecessor later since we need to be able to get the
7475 // PHI value for the default case in case we're using a bit mask.
7476 } else {
7477 Value *Cmp = Builder.CreateICmpULT(
7478 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7479 RangeCheckBranch =
7480 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7481 CondBranch = RangeCheckBranch;
7482 if (DTU)
7483 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7484 }
7485
7486 // Populate the BB that does the lookups.
7487 Builder.SetInsertPoint(LookupBB);
7488
7489 if (NeedMask) {
7490 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7491 // re-purposed to do the hole check, and we create a new LookupBB.
7492 BasicBlock *MaskBB = LookupBB;
7493 MaskBB->setName("switch.hole_check");
7494 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7495 CommonDest->getParent(), CommonDest);
7496
7497 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7498 // unnecessary illegal types.
7499 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7500 APInt MaskInt(TableSizePowOf2, 0);
7501 APInt One(TableSizePowOf2, 1);
7502 // Build bitmask; fill in a 1 bit for every case.
7503 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7504 for (const auto &Result : ResultList) {
7505 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7506 .getLimitedValue();
7507 MaskInt |= One << Idx;
7508 }
7509 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7510
7511 // Get the TableIndex'th bit of the bitmask.
7512 // If this bit is 0 (meaning hole) jump to the default destination,
7513 // else continue with table lookup.
7514 IntegerType *MapTy = TableMask->getIntegerType();
7515 Value *MaskIndex =
7516 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7517 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7518 Value *LoBit = Builder.CreateTrunc(
7519 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7520 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7521 if (DTU) {
7522 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7523 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7524 }
7525 Builder.SetInsertPoint(LookupBB);
7526 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7527 }
7528
7529 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7530 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7531 // do not delete PHINodes here.
7532 SI->getDefaultDest()->removePredecessor(BB,
7533 /*KeepOneInputPHIs=*/true);
7534 if (DTU)
7535 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7536 }
7537
7538 for (PHINode *PHI : PHIs) {
7539 const ResultListTy &ResultList = ResultLists[PHI];
7540 auto Replacement = PhiToReplacementMap.at(PHI);
7541 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7542 // Do a small peephole optimization: re-use the switch table compare if
7543 // possible.
7544 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7545 BasicBlock *PhiBlock = PHI->getParent();
7546 // Search for compare instructions which use the phi.
7547 for (auto *User : PHI->users()) {
7548 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7549 Replacement.getDefaultValue(), ResultList);
7550 }
7551 }
7552
7553 PHI->addIncoming(Result, LookupBB);
7554 }
7555
7556 Builder.CreateBr(CommonDest);
7557 if (DTU)
7558 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7559
7560 SmallVector<uint32_t> BranchWeights;
7561 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7562 extractBranchWeights(*SI, BranchWeights);
7563 uint64_t ToLookupWeight = 0;
7564 uint64_t ToDefaultWeight = 0;
7565
7566 // Remove the switch.
7567 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7568 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7569 BasicBlock *Succ = SI->getSuccessor(I);
7570
7571 if (Succ == SI->getDefaultDest()) {
7572 if (HasBranchWeights)
7573 ToDefaultWeight += BranchWeights[I];
7574 continue;
7575 }
7576 Succ->removePredecessor(BB);
7577 if (DTU && RemovedSuccessors.insert(Succ).second)
7578 Updates.push_back({DominatorTree::Delete, BB, Succ});
7579 if (HasBranchWeights)
7580 ToLookupWeight += BranchWeights[I];
7581 }
7582 SI->eraseFromParent();
7583 if (HasBranchWeights)
7584 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7585 /*IsExpected=*/false);
7586 if (DTU)
7587 DTU->applyUpdates(Updates);
7588
7589 if (NeedMask)
7590 ++NumLookupTablesHoles;
7591 return true;
7592}
7593
7594/// Try to transform a switch that has "holes" in it to a contiguous sequence
7595/// of cases.
7596///
7597/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7598/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7599///
7600/// This converts a sparse switch into a dense switch which allows better
7601/// lowering and could also allow transforming into a lookup table.
7603 const DataLayout &DL,
7604 const TargetTransformInfo &TTI) {
7605 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7606 if (CondTy->getIntegerBitWidth() > 64 ||
7607 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7608 return false;
7609 // Only bother with this optimization if there are more than 3 switch cases;
7610 // SDAG will only bother creating jump tables for 4 or more cases.
7611 if (SI->getNumCases() < 4)
7612 return false;
7613
7614 // This transform is agnostic to the signedness of the input or case values. We
7615 // can treat the case values as signed or unsigned. We can optimize more common
7616 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7617 // as signed.
7619 for (const auto &C : SI->cases())
7620 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7621 llvm::sort(Values);
7622
7623 // If the switch is already dense, there's nothing useful to do here.
7624 if (isSwitchDense(Values))
7625 return false;
7626
7627 // First, transform the values such that they start at zero and ascend.
7628 int64_t Base = Values[0];
7629 for (auto &V : Values)
7630 V -= (uint64_t)(Base);
7631
7632 // Now we have signed numbers that have been shifted so that, given enough
7633 // precision, there are no negative values. Since the rest of the transform
7634 // is bitwise only, we switch now to an unsigned representation.
7635
7636 // This transform can be done speculatively because it is so cheap - it
7637 // results in a single rotate operation being inserted.
7638
7639 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7640 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7641 // less than 64.
7642 unsigned Shift = 64;
7643 for (auto &V : Values)
7644 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7645 assert(Shift < 64);
7646 if (Shift > 0)
7647 for (auto &V : Values)
7648 V = (int64_t)((uint64_t)V >> Shift);
7649
7650 if (!isSwitchDense(Values))
7651 // Transform didn't create a dense switch.
7652 return false;
7653
7654 // The obvious transform is to shift the switch condition right and emit a
7655 // check that the condition actually cleanly divided by GCD, i.e.
7656 // C & (1 << Shift - 1) == 0
7657 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7658 //
7659 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7660 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7661 // are nonzero then the switch condition will be very large and will hit the
7662 // default case.
7663
7664 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7665 Builder.SetInsertPoint(SI);
7666 Value *Sub =
7667 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7668 Value *Rot = Builder.CreateIntrinsic(
7669 Ty, Intrinsic::fshl,
7670 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7671 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7672
7673 for (auto Case : SI->cases()) {
7674 auto *Orig = Case.getCaseValue();
7675 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7676 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7677 }
7678 return true;
7679}
7680
7681/// Tries to transform the switch when the condition is umin with a constant.
7682/// In that case, the default branch can be replaced by the constant's branch.
7683/// This method also removes dead cases when the simplification cannot replace
7684/// the default branch.
7685///
7686/// For example:
7687/// switch(umin(a, 3)) {
7688/// case 0:
7689/// case 1:
7690/// case 2:
7691/// case 3:
7692/// case 4:
7693/// // ...
7694/// default:
7695/// unreachable
7696/// }
7697///
7698/// Transforms into:
7699///
7700/// switch(a) {
7701/// case 0:
7702/// case 1:
7703/// case 2:
7704/// default:
7705/// // This is case 3
7706/// }
7708 Value *A;
7710
7711 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7712 return false;
7713
7716 BasicBlock *BB = SIW->getParent();
7717
7718 // Dead cases are removed even when the simplification fails.
7719 // A case is dead when its value is higher than the Constant.
7720 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7721 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7722 ++I;
7723 continue;
7724 }
7725 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7726 DeadCaseBB->removePredecessor(BB);
7727 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7728 I = SIW->removeCase(I);
7729 E = SIW->case_end();
7730 }
7731
7732 auto Case = SI->findCaseValue(Constant);
7733 // If the case value is not found, `findCaseValue` returns the default case.
7734 // In this scenario, since there is no explicit `case 3:`, the simplification
7735 // fails. The simplification also fails when the switch’s default destination
7736 // is reachable.
7737 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7738 if (DTU)
7739 DTU->applyUpdates(Updates);
7740 return !Updates.empty();
7741 }
7742
7743 BasicBlock *Unreachable = SI->getDefaultDest();
7744 SIW.replaceDefaultDest(Case);
7745 SIW.removeCase(Case);
7746 SIW->setCondition(A);
7747
7748 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7749
7750 if (DTU)
7751 DTU->applyUpdates(Updates);
7752
7753 return true;
7754}
7755
7756/// Tries to transform switch of powers of two to reduce switch range.
7757/// For example, switch like:
7758/// switch (C) { case 1: case 2: case 64: case 128: }
7759/// will be transformed to:
7760/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7761///
7762/// This transformation allows better lowering and may transform the switch
7763/// instruction into a sequence of bit manipulation and a smaller
7764/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7765/// address of the jump target, and indirectly jump to it).
7767 DomTreeUpdater *DTU,
7768 const DataLayout &DL,
7769 const TargetTransformInfo &TTI) {
7770 Value *Condition = SI->getCondition();
7771 LLVMContext &Context = SI->getContext();
7772 auto *CondTy = cast<IntegerType>(Condition->getType());
7773
7774 if (CondTy->getIntegerBitWidth() > 64 ||
7775 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7776 return false;
7777
7778 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7779 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7780 {Condition, ConstantInt::getTrue(Context)});
7781 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7782 TTI::TCC_Basic * 2)
7783 return false;
7784
7785 // Only bother with this optimization if there are more than 3 switch cases.
7786 // SDAG will start emitting jump tables for 4 or more cases.
7787 if (SI->getNumCases() < 4)
7788 return false;
7789
7790 // Check that switch cases are powers of two.
7792 for (const auto &Case : SI->cases()) {
7793 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7794 if (llvm::has_single_bit(CaseValue))
7795 Values.push_back(CaseValue);
7796 else
7797 return false;
7798 }
7799
7800 // isSwichDense requires case values to be sorted.
7801 llvm::sort(Values);
7802 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7803 llvm::countr_zero(Values.front()) + 1))
7804 // Transform is unable to generate dense switch.
7805 return false;
7806
7807 Builder.SetInsertPoint(SI);
7808
7809 if (!SI->defaultDestUnreachable()) {
7810 // Let non-power-of-two inputs jump to the default case, when the latter is
7811 // reachable.
7812 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7813 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7814
7815 auto *OrigBB = SI->getParent();
7816 auto *DefaultCaseBB = SI->getDefaultDest();
7817 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7818 auto It = OrigBB->getTerminator()->getIterator();
7819 SmallVector<uint32_t> Weights;
7820 auto HasWeights =
7822 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7823 if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7824 // IsPow2 covers a subset of the cases in which we'd go to the default
7825 // label. The other is those powers of 2 that don't appear in the case
7826 // statement. We don't know the distribution of the values coming in, so
7827 // the safest is to split 50-50 the original probability to `default`.
7828 uint64_t OrigDenominator =
7830 SmallVector<uint64_t> NewWeights(2);
7831 NewWeights[1] = Weights[0] / 2;
7832 NewWeights[0] = OrigDenominator - NewWeights[1];
7833 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7834 // The probability of executing the default block stays constant. It was
7835 // p_d = Weights[0] / OrigDenominator
7836 // we rewrite as W/D
7837 // We want to find the probability of the default branch of the switch
7838 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7839 // i.e. the original probability is the probability we go to the default
7840 // branch from the BI branch, or we take the default branch on the SI.
7841 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7842 // This matches using W/2 for the default branch probability numerator and
7843 // D-W/2 as the denominator.
7844 Weights[0] = NewWeights[1];
7845 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7846 for (auto &W : drop_begin(Weights))
7847 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7848
7849 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7850 }
7851 // BI is handling the default case for SI, and so should share its DebugLoc.
7852 BI->setDebugLoc(SI->getDebugLoc());
7853 It->eraseFromParent();
7854
7855 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7856 if (DTU)
7857 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7858 }
7859
7860 // Replace each case with its trailing zeros number.
7861 for (auto &Case : SI->cases()) {
7862 auto *OrigValue = Case.getCaseValue();
7863 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7864 OrigValue->getValue().countr_zero()));
7865 }
7866
7867 // Replace condition with its trailing zeros number.
7868 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7869 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7870
7871 SI->setCondition(ConditionTrailingZeros);
7872
7873 return true;
7874}
7875
7876/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7877/// the same destination.
7879 DomTreeUpdater *DTU) {
7880 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7881 if (!Cmp || !Cmp->hasOneUse())
7882 return false;
7883
7885 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7886 if (!HasWeights)
7887 Weights.resize(4); // Avoid checking HasWeights everywhere.
7888
7889 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7890 int64_t Res;
7891 BasicBlock *Succ, *OtherSucc;
7892 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7893 BasicBlock *Unreachable = nullptr;
7894
7895 if (SI->getNumCases() == 2) {
7896 // Find which of 1, 0 or -1 is missing (handled by default dest).
7897 SmallSet<int64_t, 3> Missing;
7898 Missing.insert(1);
7899 Missing.insert(0);
7900 Missing.insert(-1);
7901
7902 Succ = SI->getDefaultDest();
7903 SuccWeight = Weights[0];
7904 OtherSucc = nullptr;
7905 for (auto &Case : SI->cases()) {
7906 std::optional<int64_t> Val =
7907 Case.getCaseValue()->getValue().trySExtValue();
7908 if (!Val)
7909 return false;
7910 if (!Missing.erase(*Val))
7911 return false;
7912 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7913 return false;
7914 OtherSucc = Case.getCaseSuccessor();
7915 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7916 }
7917
7918 assert(Missing.size() == 1 && "Should have one case left");
7919 Res = *Missing.begin();
7920 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7921 // Normalize so that Succ is taken once and OtherSucc twice.
7922 Unreachable = SI->getDefaultDest();
7923 Succ = OtherSucc = nullptr;
7924 for (auto &Case : SI->cases()) {
7925 BasicBlock *NewSucc = Case.getCaseSuccessor();
7926 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7927 if (!OtherSucc || OtherSucc == NewSucc) {
7928 OtherSucc = NewSucc;
7929 OtherSuccWeight += Weight;
7930 } else if (!Succ) {
7931 Succ = NewSucc;
7932 SuccWeight = Weight;
7933 } else if (Succ == NewSucc) {
7934 std::swap(Succ, OtherSucc);
7935 std::swap(SuccWeight, OtherSuccWeight);
7936 } else
7937 return false;
7938 }
7939 for (auto &Case : SI->cases()) {
7940 std::optional<int64_t> Val =
7941 Case.getCaseValue()->getValue().trySExtValue();
7942 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7943 return false;
7944 if (Case.getCaseSuccessor() == Succ) {
7945 Res = *Val;
7946 break;
7947 }
7948 }
7949 } else {
7950 return false;
7951 }
7952
7953 // Determine predicate for the missing case.
7955 switch (Res) {
7956 case 1:
7957 Pred = ICmpInst::ICMP_UGT;
7958 break;
7959 case 0:
7960 Pred = ICmpInst::ICMP_EQ;
7961 break;
7962 case -1:
7963 Pred = ICmpInst::ICMP_ULT;
7964 break;
7965 }
7966 if (Cmp->isSigned())
7967 Pred = ICmpInst::getSignedPredicate(Pred);
7968
7969 MDNode *NewWeights = nullptr;
7970 if (HasWeights)
7971 NewWeights = MDBuilder(SI->getContext())
7972 .createBranchWeights(SuccWeight, OtherSuccWeight);
7973
7974 BasicBlock *BB = SI->getParent();
7975 Builder.SetInsertPoint(SI->getIterator());
7976 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7977 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7978 SI->getMetadata(LLVMContext::MD_unpredictable));
7979 OtherSucc->removePredecessor(BB);
7980 if (Unreachable)
7981 Unreachable->removePredecessor(BB);
7982 SI->eraseFromParent();
7983 Cmp->eraseFromParent();
7984 if (DTU && Unreachable)
7985 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7986 return true;
7987}
7988
7989/// Checking whether two cases of SI are equal depends on the contents of the
7990/// BasicBlock and the incoming values of their successor PHINodes.
7991/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7992/// calling this function on each BasicBlock every time isEqual is called,
7993/// especially since the same BasicBlock may be passed as an argument multiple
7994/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7995/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7996/// of the incoming values.
8001
8004 return static_cast<SwitchSuccWrapper *>(
8006 }
8008 return static_cast<SwitchSuccWrapper *>(
8010 }
8011 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8012 BasicBlock *Succ = SSW->Dest;
8014 assert(BI->isUnconditional() &&
8015 "Only supporting unconditional branches for now");
8016 assert(BI->getNumSuccessors() == 1 &&
8017 "Expected unconditional branches to have one successor");
8018 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8019
8020 // Since we assume the BB is just a single BranchInst with a single
8021 // successor, we hash as the BB and the incoming Values of its successor
8022 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8023 // including the incoming PHI values leads to better performance.
8024 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8025 // time and passing it in SwitchSuccWrapper, but this slowed down the
8026 // average compile time without having any impact on the worst case compile
8027 // time.
8028 BasicBlock *BB = BI->getSuccessor(0);
8029 SmallVector<Value *> PhiValsForBB;
8030 for (PHINode &Phi : BB->phis())
8031 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
8032
8033 return hash_combine(BB, hash_combine_range(PhiValsForBB));
8034 }
8035 static bool isEqual(const SwitchSuccWrapper *LHS,
8036 const SwitchSuccWrapper *RHS) {
8039 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8040 return LHS == RHS;
8041
8042 BasicBlock *A = LHS->Dest;
8043 BasicBlock *B = RHS->Dest;
8044
8045 // FIXME: we checked that the size of A and B are both 1 in
8046 // simplifyDuplicateSwitchArms to make the Case list smaller to
8047 // improve performance. If we decide to support BasicBlocks with more
8048 // than just a single instruction, we need to check that A.size() ==
8049 // B.size() here, and we need to check more than just the BranchInsts
8050 // for equality.
8051
8052 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
8053 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
8054 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8055 "Only supporting unconditional branches for now");
8056 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
8057 return false;
8058
8059 // Need to check that PHIs in successor have matching values
8060 BasicBlock *Succ = ABI->getSuccessor(0);
8061 for (PHINode &Phi : Succ->phis()) {
8062 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8063 if (PredIVs[A] != PredIVs[B])
8064 return false;
8065 }
8066
8067 return true;
8068 }
8069};
8070
8071bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8072 DomTreeUpdater *DTU) {
8073 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8074 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8075 // an entire PHI at once after the loop, opposed to calling
8076 // getIncomingValueForBlock inside this loop, since each call to
8077 // getIncomingValueForBlock is O(|Preds|).
8083 Cases.reserve(SI->getNumSuccessors());
8084
8085 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8086 BasicBlock *BB = SI->getSuccessor(I);
8087
8088 // FIXME: Support more than just a single BranchInst. One way we could do
8089 // this is by taking a hashing approach of all insts in BB.
8090 if (BB->size() != 1)
8091 continue;
8092
8093 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8094 // on other kinds of terminators. We decide to only support unconditional
8095 // branches for now for compile time reasons.
8096 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
8097 if (!BI || BI->isConditional())
8098 continue;
8099
8100 if (!Seen.insert(BB).second) {
8101 auto It = BBToSuccessorIndexes.find(BB);
8102 if (It != BBToSuccessorIndexes.end())
8103 It->second.emplace_back(I);
8104 continue;
8105 }
8106
8107 // FIXME: This case needs some extra care because the terminators other than
8108 // SI need to be updated. For now, consider only backedges to the SI.
8109 if (BB->getUniquePredecessor() != SI->getParent())
8110 continue;
8111
8112 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8113 for (BasicBlock *Succ : BI->successors())
8115
8116 // Add the successor only if not previously visited.
8117 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
8118 BBToSuccessorIndexes[BB].emplace_back(I);
8119 }
8120
8121 // Precompute a data structure to improve performance of isEqual for
8122 // SwitchSuccWrapper.
8123 PhiPredIVs.reserve(Phis.size());
8124 for (PHINode *Phi : Phis) {
8125 auto &IVs =
8126 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8127 for (auto &IV : Phi->incoming_values())
8128 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8129 }
8130
8131 // Build a set such that if the SwitchSuccWrapper exists in the set and
8132 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8133 // which is not in the set should be replaced with the one in the set. If the
8134 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8135 // other SwitchSuccWrappers can check against it in the same manner. We use
8136 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8137 // around information to isEquality, getHashValue, and when doing the
8138 // replacement with better performance.
8139 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8140 ReplaceWith.reserve(Cases.size());
8141
8143 Updates.reserve(ReplaceWith.size());
8144 bool MadeChange = false;
8145 for (auto &SSW : Cases) {
8146 // SSW is a candidate for simplification. If we find a duplicate BB,
8147 // replace it.
8148 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
8149 if (!Inserted) {
8150 // We know that SI's parent BB no longer dominates the old case successor
8151 // since we are making it dead.
8152 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
8153 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
8154 for (unsigned Idx : Successors)
8155 SI->setSuccessor(Idx, (*It)->Dest);
8156 MadeChange = true;
8157 }
8158 }
8159
8160 if (DTU)
8161 DTU->applyUpdates(Updates);
8162
8163 return MadeChange;
8164}
8165
8166bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8167 BasicBlock *BB = SI->getParent();
8168
8169 if (isValueEqualityComparison(SI)) {
8170 // If we only have one predecessor, and if it is a branch on this value,
8171 // see if that predecessor totally determines the outcome of this switch.
8172 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8173 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8174 return requestResimplify();
8175
8176 Value *Cond = SI->getCondition();
8177 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8178 if (simplifySwitchOnSelect(SI, Select))
8179 return requestResimplify();
8180
8181 // If the block only contains the switch, see if we can fold the block
8182 // away into any preds.
8183 if (SI == &*BB->instructionsWithoutDebug(false).begin())
8184 if (foldValueComparisonIntoPredecessors(SI, Builder))
8185 return requestResimplify();
8186 }
8187
8188 // Try to transform the switch into an icmp and a branch.
8189 // The conversion from switch to comparison may lose information on
8190 // impossible switch values, so disable it early in the pipeline.
8191 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8192 return requestResimplify();
8193
8194 // Remove unreachable cases.
8195 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8196 return requestResimplify();
8197
8198 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8199 return requestResimplify();
8200
8201 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8202 return requestResimplify();
8203
8204 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8205 return requestResimplify();
8206
8207 // The conversion of switches to arithmetic or lookup table is disabled in
8208 // the early optimization pipeline, as it may lose information or make the
8209 // resulting code harder to analyze.
8210 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8211 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8212 Options.ConvertSwitchToLookupTable))
8213 return requestResimplify();
8214
8215 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8216 return requestResimplify();
8217
8218 if (reduceSwitchRange(SI, Builder, DL, TTI))
8219 return requestResimplify();
8220
8221 if (HoistCommon &&
8222 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8223 return requestResimplify();
8224
8225 if (simplifyDuplicateSwitchArms(SI, DTU))
8226 return requestResimplify();
8227
8228 if (simplifySwitchWhenUMin(SI, DTU))
8229 return requestResimplify();
8230
8231 return false;
8232}
8233
8234bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8235 BasicBlock *BB = IBI->getParent();
8236 bool Changed = false;
8237 SmallVector<uint32_t> BranchWeights;
8238 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8239 extractBranchWeights(*IBI, BranchWeights);
8240
8241 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8242 if (HasBranchWeights)
8243 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8244 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8245
8246 // Eliminate redundant destinations.
8247 SmallPtrSet<Value *, 8> Succs;
8248 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8249 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8250 BasicBlock *Dest = IBI->getDestination(I);
8251 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8252 if (!Dest->hasAddressTaken())
8253 RemovedSuccs.insert(Dest);
8254 Dest->removePredecessor(BB);
8255 IBI->removeDestination(I);
8256 --I;
8257 --E;
8258 Changed = true;
8259 }
8260 }
8261
8262 if (DTU) {
8263 std::vector<DominatorTree::UpdateType> Updates;
8264 Updates.reserve(RemovedSuccs.size());
8265 for (auto *RemovedSucc : RemovedSuccs)
8266 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8267 DTU->applyUpdates(Updates);
8268 }
8269
8270 if (IBI->getNumDestinations() == 0) {
8271 // If the indirectbr has no successors, change it to unreachable.
8272 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8274 return true;
8275 }
8276
8277 if (IBI->getNumDestinations() == 1) {
8278 // If the indirectbr has one successor, change it to a direct branch.
8281 return true;
8282 }
8283 if (HasBranchWeights) {
8284 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8285 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8286 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8287 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8288 }
8289 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8290 if (simplifyIndirectBrOnSelect(IBI, SI))
8291 return requestResimplify();
8292 }
8293 return Changed;
8294}
8295
8296/// Given an block with only a single landing pad and a unconditional branch
8297/// try to find another basic block which this one can be merged with. This
8298/// handles cases where we have multiple invokes with unique landing pads, but
8299/// a shared handler.
8300///
8301/// We specifically choose to not worry about merging non-empty blocks
8302/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8303/// practice, the optimizer produces empty landing pad blocks quite frequently
8304/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8305/// sinking in this file)
8306///
8307/// This is primarily a code size optimization. We need to avoid performing
8308/// any transform which might inhibit optimization (such as our ability to
8309/// specialize a particular handler via tail commoning). We do this by not
8310/// merging any blocks which require us to introduce a phi. Since the same
8311/// values are flowing through both blocks, we don't lose any ability to
8312/// specialize. If anything, we make such specialization more likely.
8313///
8314/// TODO - This transformation could remove entries from a phi in the target
8315/// block when the inputs in the phi are the same for the two blocks being
8316/// merged. In some cases, this could result in removal of the PHI entirely.
8318 BasicBlock *BB, DomTreeUpdater *DTU) {
8319 auto Succ = BB->getUniqueSuccessor();
8320 assert(Succ);
8321 // If there's a phi in the successor block, we'd likely have to introduce
8322 // a phi into the merged landing pad block.
8323 if (isa<PHINode>(*Succ->begin()))
8324 return false;
8325
8326 for (BasicBlock *OtherPred : predecessors(Succ)) {
8327 if (BB == OtherPred)
8328 continue;
8329 BasicBlock::iterator I = OtherPred->begin();
8331 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8332 continue;
8333 ++I;
8335 if (!BI2 || !BI2->isIdenticalTo(BI))
8336 continue;
8337
8338 std::vector<DominatorTree::UpdateType> Updates;
8339
8340 // We've found an identical block. Update our predecessors to take that
8341 // path instead and make ourselves dead.
8343 for (BasicBlock *Pred : UniquePreds) {
8344 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8345 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8346 "unexpected successor");
8347 II->setUnwindDest(OtherPred);
8348 if (DTU) {
8349 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8350 Updates.push_back({DominatorTree::Delete, Pred, BB});
8351 }
8352 }
8353
8355 for (BasicBlock *Succ : UniqueSuccs) {
8356 Succ->removePredecessor(BB);
8357 if (DTU)
8358 Updates.push_back({DominatorTree::Delete, BB, Succ});
8359 }
8360
8361 IRBuilder<> Builder(BI);
8362 Builder.CreateUnreachable();
8363 BI->eraseFromParent();
8364 if (DTU)
8365 DTU->applyUpdates(Updates);
8366 return true;
8367 }
8368 return false;
8369}
8370
8371bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8372 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8373 : simplifyCondBranch(Branch, Builder);
8374}
8375
8376bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8377 IRBuilder<> &Builder) {
8378 BasicBlock *BB = BI->getParent();
8379 BasicBlock *Succ = BI->getSuccessor(0);
8380
8381 // If the Terminator is the only non-phi instruction, simplify the block.
8382 // If LoopHeader is provided, check if the block or its successor is a loop
8383 // header. (This is for early invocations before loop simplify and
8384 // vectorization to keep canonical loop forms for nested loops. These blocks
8385 // can be eliminated when the pass is invoked later in the back-end.)
8386 // Note that if BB has only one predecessor then we do not introduce new
8387 // backedge, so we can eliminate BB.
8388 bool NeedCanonicalLoop =
8389 Options.NeedCanonicalLoop &&
8390 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8391 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8393 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8394 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8395 return true;
8396
8397 // If the only instruction in the block is a seteq/setne comparison against a
8398 // constant, try to simplify the block.
8399 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8400 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8401 ++I;
8402 if (I->isTerminator() &&
8403 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8404 return true;
8405 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8406 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8407 Builder))
8408 return true;
8409 }
8410 }
8411
8412 // See if we can merge an empty landing pad block with another which is
8413 // equivalent.
8414 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8415 ++I;
8416 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8417 return true;
8418 }
8419
8420 // If this basic block is ONLY a compare and a branch, and if a predecessor
8421 // branches to us and our successor, fold the comparison into the
8422 // predecessor and use logical operations to update the incoming value
8423 // for PHI nodes in common successor.
8424 if (Options.SpeculateBlocks &&
8425 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8426 Options.BonusInstThreshold))
8427 return requestResimplify();
8428 return false;
8429}
8430
8432 BasicBlock *PredPred = nullptr;
8433 for (auto *P : predecessors(BB)) {
8434 BasicBlock *PPred = P->getSinglePredecessor();
8435 if (!PPred || (PredPred && PredPred != PPred))
8436 return nullptr;
8437 PredPred = PPred;
8438 }
8439 return PredPred;
8440}
8441
8442/// Fold the following pattern:
8443/// bb0:
8444/// br i1 %cond1, label %bb1, label %bb2
8445/// bb1:
8446/// br i1 %cond2, label %bb3, label %bb4
8447/// bb2:
8448/// br i1 %cond2, label %bb4, label %bb3
8449/// bb3:
8450/// ...
8451/// bb4:
8452/// ...
8453/// into
8454/// bb0:
8455/// %cond = xor i1 %cond1, %cond2
8456/// br i1 %cond, label %bb4, label %bb3
8457/// bb3:
8458/// ...
8459/// bb4:
8460/// ...
8461/// NOTE: %cond2 always dominates the terminator of bb0.
8463 BasicBlock *BB = BI->getParent();
8464 BasicBlock *BB1 = BI->getSuccessor(0);
8465 BasicBlock *BB2 = BI->getSuccessor(1);
8466 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8467 if (Succ == BB)
8468 return false;
8469 if (&Succ->front() != Succ->getTerminator())
8470 return false;
8471 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8472 if (!SuccBI || !SuccBI->isConditional())
8473 return false;
8474 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8475 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8476 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8477 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8478 };
8479 BranchInst *BB1BI, *BB2BI;
8480 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8481 return false;
8482
8483 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8484 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8485 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8486 return false;
8487
8488 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8489 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8490 IRBuilder<> Builder(BI);
8491 BI->setCondition(
8492 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8493 BB1->removePredecessor(BB);
8494 BI->setSuccessor(0, BB4);
8495 BB2->removePredecessor(BB);
8496 BI->setSuccessor(1, BB3);
8497 if (DTU) {
8499 Updates.push_back({DominatorTree::Delete, BB, BB1});
8500 Updates.push_back({DominatorTree::Insert, BB, BB4});
8501 Updates.push_back({DominatorTree::Delete, BB, BB2});
8502 Updates.push_back({DominatorTree::Insert, BB, BB3});
8503
8504 DTU->applyUpdates(Updates);
8505 }
8506 bool HasWeight = false;
8507 uint64_t BBTWeight, BBFWeight;
8508 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8509 HasWeight = true;
8510 else
8511 BBTWeight = BBFWeight = 1;
8512 uint64_t BB1TWeight, BB1FWeight;
8513 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8514 HasWeight = true;
8515 else
8516 BB1TWeight = BB1FWeight = 1;
8517 uint64_t BB2TWeight, BB2FWeight;
8518 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8519 HasWeight = true;
8520 else
8521 BB2TWeight = BB2FWeight = 1;
8522 if (HasWeight) {
8523 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8524 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8525 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8526 /*ElideAllZero=*/true);
8527 }
8528 return true;
8529}
8530
8531bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8532 assert(
8534 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8535 "Tautological conditional branch should have been eliminated already.");
8536
8537 BasicBlock *BB = BI->getParent();
8538 if (!Options.SimplifyCondBranch ||
8539 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8540 return false;
8541
8542 // Conditional branch
8543 if (isValueEqualityComparison(BI)) {
8544 // If we only have one predecessor, and if it is a branch on this value,
8545 // see if that predecessor totally determines the outcome of this
8546 // switch.
8547 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8548 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8549 return requestResimplify();
8550
8551 // This block must be empty, except for the setcond inst, if it exists.
8552 // Ignore dbg and pseudo intrinsics.
8553 auto I = BB->instructionsWithoutDebug(true).begin();
8554 if (&*I == BI) {
8555 if (foldValueComparisonIntoPredecessors(BI, Builder))
8556 return requestResimplify();
8557 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8558 ++I;
8559 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8560 return requestResimplify();
8561 }
8562 }
8563
8564 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8565 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8566 return true;
8567
8568 // If this basic block has dominating predecessor blocks and the dominating
8569 // blocks' conditions imply BI's condition, we know the direction of BI.
8570 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8571 if (Imp) {
8572 // Turn this into a branch on constant.
8573 auto *OldCond = BI->getCondition();
8574 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8575 : ConstantInt::getFalse(BB->getContext());
8576 BI->setCondition(TorF);
8578 return requestResimplify();
8579 }
8580
8581 // If this basic block is ONLY a compare and a branch, and if a predecessor
8582 // branches to us and one of our successors, fold the comparison into the
8583 // predecessor and use logical operations to pick the right destination.
8584 if (Options.SpeculateBlocks &&
8585 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8586 Options.BonusInstThreshold))
8587 return requestResimplify();
8588
8589 // We have a conditional branch to two blocks that are only reachable
8590 // from BI. We know that the condbr dominates the two blocks, so see if
8591 // there is any identical code in the "then" and "else" blocks. If so, we
8592 // can hoist it up to the branching block.
8593 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8594 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8595 if (HoistCommon &&
8596 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8597 return requestResimplify();
8598
8599 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8600 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8601 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8602 auto CanSpeculateConditionalLoadsStores = [&]() {
8603 for (auto *Succ : successors(BB)) {
8604 for (Instruction &I : *Succ) {
8605 if (I.isTerminator()) {
8606 if (I.getNumSuccessors() > 1)
8607 return false;
8608 continue;
8609 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8610 SpeculatedConditionalLoadsStores.size() ==
8612 return false;
8613 }
8614 SpeculatedConditionalLoadsStores.push_back(&I);
8615 }
8616 }
8617 return !SpeculatedConditionalLoadsStores.empty();
8618 };
8619
8620 if (CanSpeculateConditionalLoadsStores()) {
8621 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8622 std::nullopt, nullptr);
8623 return requestResimplify();
8624 }
8625 }
8626 } else {
8627 // If Successor #1 has multiple preds, we may be able to conditionally
8628 // execute Successor #0 if it branches to Successor #1.
8629 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8630 if (Succ0TI->getNumSuccessors() == 1 &&
8631 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8632 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8633 return requestResimplify();
8634 }
8635 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8636 // If Successor #0 has multiple preds, we may be able to conditionally
8637 // execute Successor #1 if it branches to Successor #0.
8638 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8639 if (Succ1TI->getNumSuccessors() == 1 &&
8640 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8641 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8642 return requestResimplify();
8643 }
8644
8645 // If this is a branch on something for which we know the constant value in
8646 // predecessors (e.g. a phi node in the current block), thread control
8647 // through this block.
8648 if (foldCondBranchOnValueKnownInPredecessor(BI))
8649 return requestResimplify();
8650
8651 // Scan predecessor blocks for conditional branches.
8652 for (BasicBlock *Pred : predecessors(BB))
8653 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8654 if (PBI != BI && PBI->isConditional())
8655 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8656 return requestResimplify();
8657
8658 // Look for diamond patterns.
8659 if (MergeCondStores)
8660 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8661 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8662 if (PBI != BI && PBI->isConditional())
8663 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8664 return requestResimplify();
8665
8666 // Look for nested conditional branches.
8667 if (mergeNestedCondBranch(BI, DTU))
8668 return requestResimplify();
8669
8670 return false;
8671}
8672
8673/// Check if passing a value to an instruction will cause undefined behavior.
8674static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8675 assert(V->getType() == I->getType() && "Mismatched types");
8677 if (!C)
8678 return false;
8679
8680 if (I->use_empty())
8681 return false;
8682
8683 if (C->isNullValue() || isa<UndefValue>(C)) {
8684 // Only look at the first use we can handle, avoid hurting compile time with
8685 // long uselists
8686 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8687 auto *Use = cast<Instruction>(U.getUser());
8688 // Change this list when we want to add new instructions.
8689 switch (Use->getOpcode()) {
8690 default:
8691 return false;
8692 case Instruction::GetElementPtr:
8693 case Instruction::Ret:
8694 case Instruction::BitCast:
8695 case Instruction::Load:
8696 case Instruction::Store:
8697 case Instruction::Call:
8698 case Instruction::CallBr:
8699 case Instruction::Invoke:
8700 case Instruction::UDiv:
8701 case Instruction::URem:
8702 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8703 // implemented to avoid code complexity as it is unclear how useful such
8704 // logic is.
8705 case Instruction::SDiv:
8706 case Instruction::SRem:
8707 return true;
8708 }
8709 });
8710 if (FindUse == I->use_end())
8711 return false;
8712 auto &Use = *FindUse;
8713 auto *User = cast<Instruction>(Use.getUser());
8714 // Bail out if User is not in the same BB as I or User == I or User comes
8715 // before I in the block. The latter two can be the case if User is a
8716 // PHI node.
8717 if (User->getParent() != I->getParent() || User == I ||
8718 User->comesBefore(I))
8719 return false;
8720
8721 // Now make sure that there are no instructions in between that can alter
8722 // control flow (eg. calls)
8723 auto InstrRange =
8724 make_range(std::next(I->getIterator()), User->getIterator());
8725 if (any_of(InstrRange, [](Instruction &I) {
8727 }))
8728 return false;
8729
8730 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8732 if (GEP->getPointerOperand() == I) {
8733 // The type of GEP may differ from the type of base pointer.
8734 // Bail out on vector GEPs, as they are not handled by other checks.
8735 if (GEP->getType()->isVectorTy())
8736 return false;
8737 // The current base address is null, there are four cases to consider:
8738 // getelementptr (TY, null, 0) -> null
8739 // getelementptr (TY, null, not zero) -> may be modified
8740 // getelementptr inbounds (TY, null, 0) -> null
8741 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8742 // undefined?
8743 if (!GEP->hasAllZeroIndices() &&
8744 (!GEP->isInBounds() ||
8745 NullPointerIsDefined(GEP->getFunction(),
8746 GEP->getPointerAddressSpace())))
8747 PtrValueMayBeModified = true;
8748 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8749 }
8750
8751 // Look through return.
8752 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8753 bool HasNoUndefAttr =
8754 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8755 // Return undefined to a noundef return value is undefined.
8756 if (isa<UndefValue>(C) && HasNoUndefAttr)
8757 return true;
8758 // Return null to a nonnull+noundef return value is undefined.
8759 if (C->isNullValue() && HasNoUndefAttr &&
8760 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8761 return !PtrValueMayBeModified;
8762 }
8763 }
8764
8765 // Load from null is undefined.
8766 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8767 if (!LI->isVolatile())
8768 return !NullPointerIsDefined(LI->getFunction(),
8769 LI->getPointerAddressSpace());
8770
8771 // Store to null is undefined.
8773 if (!SI->isVolatile())
8774 return (!NullPointerIsDefined(SI->getFunction(),
8775 SI->getPointerAddressSpace())) &&
8776 SI->getPointerOperand() == I;
8777
8778 // llvm.assume(false/undef) always triggers immediate UB.
8779 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8780 // Ignore assume operand bundles.
8781 if (I == Assume->getArgOperand(0))
8782 return true;
8783 }
8784
8785 if (auto *CB = dyn_cast<CallBase>(User)) {
8786 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8787 return false;
8788 // A call to null is undefined.
8789 if (CB->getCalledOperand() == I)
8790 return true;
8791
8792 if (CB->isArgOperand(&Use)) {
8793 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8794 // Passing null to a nonnnull+noundef argument is undefined.
8796 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8797 return !PtrValueMayBeModified;
8798 // Passing undef to a noundef argument is undefined.
8799 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8800 return true;
8801 }
8802 }
8803 // Div/Rem by zero is immediate UB
8804 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8805 return true;
8806 }
8807 return false;
8808}
8809
8810/// If BB has an incoming value that will always trigger undefined behavior
8811/// (eg. null pointer dereference), remove the branch leading here.
8813 DomTreeUpdater *DTU,
8814 AssumptionCache *AC) {
8815 for (PHINode &PHI : BB->phis())
8816 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8817 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8818 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8819 Instruction *T = Predecessor->getTerminator();
8820 IRBuilder<> Builder(T);
8821 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8822 BB->removePredecessor(Predecessor);
8823 // Turn unconditional branches into unreachables and remove the dead
8824 // destination from conditional branches.
8825 if (BI->isUnconditional())
8826 Builder.CreateUnreachable();
8827 else {
8828 // Preserve guarding condition in assume, because it might not be
8829 // inferrable from any dominating condition.
8830 Value *Cond = BI->getCondition();
8831 CallInst *Assumption;
8832 if (BI->getSuccessor(0) == BB)
8833 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8834 else
8835 Assumption = Builder.CreateAssumption(Cond);
8836 if (AC)
8837 AC->registerAssumption(cast<AssumeInst>(Assumption));
8838 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8839 : BI->getSuccessor(0));
8840 }
8841 BI->eraseFromParent();
8842 if (DTU)
8843 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8844 return true;
8845 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8846 // Redirect all branches leading to UB into
8847 // a newly created unreachable block.
8848 BasicBlock *Unreachable = BasicBlock::Create(
8849 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8850 Builder.SetInsertPoint(Unreachable);
8851 // The new block contains only one instruction: Unreachable
8852 Builder.CreateUnreachable();
8853 for (const auto &Case : SI->cases())
8854 if (Case.getCaseSuccessor() == BB) {
8855 BB->removePredecessor(Predecessor);
8856 Case.setSuccessor(Unreachable);
8857 }
8858 if (SI->getDefaultDest() == BB) {
8859 BB->removePredecessor(Predecessor);
8860 SI->setDefaultDest(Unreachable);
8861 }
8862
8863 if (DTU)
8864 DTU->applyUpdates(
8865 { { DominatorTree::Insert, Predecessor, Unreachable },
8866 { DominatorTree::Delete, Predecessor, BB } });
8867 return true;
8868 }
8869 }
8870
8871 return false;
8872}
8873
8874bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8875 bool Changed = false;
8876
8877 assert(BB && BB->getParent() && "Block not embedded in function!");
8878 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8879
8880 // Remove basic blocks that have no predecessors (except the entry block)...
8881 // or that just have themself as a predecessor. These are unreachable.
8882 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8883 BB->getSinglePredecessor() == BB) {
8884 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8885 DeleteDeadBlock(BB, DTU);
8886 return true;
8887 }
8888
8889 // Check to see if we can constant propagate this terminator instruction
8890 // away...
8891 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8892 /*TLI=*/nullptr, DTU);
8893
8894 // Check for and eliminate duplicate PHI nodes in this block.
8896
8897 // Check for and remove branches that will always cause undefined behavior.
8899 return requestResimplify();
8900
8901 // Merge basic blocks into their predecessor if there is only one distinct
8902 // pred, and if there is only one distinct successor of the predecessor, and
8903 // if there are no PHI nodes.
8904 if (MergeBlockIntoPredecessor(BB, DTU))
8905 return true;
8906
8907 if (SinkCommon && Options.SinkCommonInsts)
8908 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8909 mergeCompatibleInvokes(BB, DTU)) {
8910 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8911 // so we may now how duplicate PHI's.
8912 // Let's rerun EliminateDuplicatePHINodes() first,
8913 // before foldTwoEntryPHINode() potentially converts them into select's,
8914 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8915 return true;
8916 }
8917
8918 IRBuilder<> Builder(BB);
8919
8920 if (Options.SpeculateBlocks &&
8921 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8922 // If there is a trivial two-entry PHI node in this basic block, and we can
8923 // eliminate it, do so now.
8924 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8925 if (PN->getNumIncomingValues() == 2)
8926 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8927 Options.SpeculateUnpredictables))
8928 return true;
8929 }
8930
8932 Builder.SetInsertPoint(Terminator);
8933 switch (Terminator->getOpcode()) {
8934 case Instruction::Br:
8935 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8936 break;
8937 case Instruction::Resume:
8938 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8939 break;
8940 case Instruction::CleanupRet:
8941 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8942 break;
8943 case Instruction::Switch:
8944 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8945 break;
8946 case Instruction::Unreachable:
8947 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8948 break;
8949 case Instruction::IndirectBr:
8950 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8951 break;
8952 }
8953
8954 return Changed;
8955}
8956
8957bool SimplifyCFGOpt::run(BasicBlock *BB) {
8958 bool Changed = false;
8959
8960 // Repeated simplify BB as long as resimplification is requested.
8961 do {
8962 Resimplify = false;
8963
8964 // Perform one round of simplifcation. Resimplify flag will be set if
8965 // another iteration is requested.
8966 Changed |= simplifyOnce(BB);
8967 } while (Resimplify);
8968
8969 return Changed;
8970}
8971
8974 ArrayRef<WeakVH> LoopHeaders) {
8975 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8976 Options)
8977 .run(BB);
8978}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1167
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1575
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
back - Get the last element.
Definition ArrayRef.h:151
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:939
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1130
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
LLVM_ABI CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:292
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1700
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2088
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1789
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2140
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3875
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1715
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1594
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2100
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:320
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276