LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305
306 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
307 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
308 Instruction *TI, Instruction *I1,
309 SmallVectorImpl<Instruction *> &OtherSuccTIs);
310 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
311 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
312 BasicBlock *TrueBB, BasicBlock *FalseBB,
313 uint32_t TrueWeight, uint32_t FalseWeight);
314 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
315 const DataLayout &DL);
316 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
317 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
318 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
319
320public:
321 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
322 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
323 const SimplifyCFGOptions &Opts)
324 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
325 assert((!DTU || !DTU->hasPostDomTree()) &&
326 "SimplifyCFG is not yet capable of maintaining validity of a "
327 "PostDomTree, so don't ask for it.");
328 }
329
330 bool simplifyOnce(BasicBlock *BB);
331 bool run(BasicBlock *BB);
332
333 // Helper to set Resimplify and return change indication.
334 bool requestResimplify() {
335 Resimplify = true;
336 return true;
337 }
338};
339
340// we synthesize a || b as select a, true, b
341// we synthesize a && b as select a, b, false
342// this function determines if SI is playing one of those roles.
343[[maybe_unused]] bool
344isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
345 return ((isa<ConstantInt>(SI->getTrueValue()) &&
346 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
347 (isa<ConstantInt>(SI->getFalseValue()) &&
348 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
349}
350
351} // end anonymous namespace
352
353/// Return true if all the PHI nodes in the basic block \p BB
354/// receive compatible (identical) incoming values when coming from
355/// all of the predecessor blocks that are specified in \p IncomingBlocks.
356///
357/// Note that if the values aren't exactly identical, but \p EquivalenceSet
358/// is provided, and *both* of the values are present in the set,
359/// then they are considered equal.
361 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
362 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
363 assert(IncomingBlocks.size() == 2 &&
364 "Only for a pair of incoming blocks at the time!");
365
366 // FIXME: it is okay if one of the incoming values is an `undef` value,
367 // iff the other incoming value is guaranteed to be a non-poison value.
368 // FIXME: it is okay if one of the incoming values is a `poison` value.
369 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
370 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
371 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
372 if (IV0 == IV1)
373 return true;
374 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
375 EquivalenceSet->contains(IV1))
376 return true;
377 return false;
378 });
379}
380
381/// Return true if it is safe to merge these two
382/// terminator instructions together.
383static bool
385 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
386 if (SI1 == SI2)
387 return false; // Can't merge with self!
388
389 // It is not safe to merge these two switch instructions if they have a common
390 // successor, and if that successor has a PHI node, and if *that* PHI node has
391 // conflicting incoming values from the two switch blocks.
392 BasicBlock *SI1BB = SI1->getParent();
393 BasicBlock *SI2BB = SI2->getParent();
394
396 bool Fail = false;
397 for (BasicBlock *Succ : successors(SI2BB)) {
398 if (!SI1Succs.count(Succ))
399 continue;
400 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
401 continue;
402 Fail = true;
403 if (FailBlocks)
404 FailBlocks->insert(Succ);
405 else
406 break;
407 }
408
409 return !Fail;
410}
411
412/// Update PHI nodes in Succ to indicate that there will now be entries in it
413/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
414/// will be the same as those coming in from ExistPred, an existing predecessor
415/// of Succ.
416static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
417 BasicBlock *ExistPred,
418 MemorySSAUpdater *MSSAU = nullptr) {
419 for (PHINode &PN : Succ->phis())
420 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
421 if (MSSAU)
422 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
423 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
424}
425
426/// Compute an abstract "cost" of speculating the given instruction,
427/// which is assumed to be safe to speculate. TCC_Free means cheap,
428/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
429/// expensive.
431 const TargetTransformInfo &TTI) {
432 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
433}
434
435/// If we have a merge point of an "if condition" as accepted above,
436/// return true if the specified value dominates the block. We don't handle
437/// the true generality of domination here, just a special case which works
438/// well enough for us.
439///
440/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
441/// see if V (which must be an instruction) and its recursive operands
442/// that do not dominate BB have a combined cost lower than Budget and
443/// are non-trapping. If both are true, the instruction is inserted into the
444/// set and true is returned.
445///
446/// The cost for most non-trapping instructions is defined as 1 except for
447/// Select whose cost is 2.
448///
449/// After this function returns, Cost is increased by the cost of
450/// V plus its non-dominating operands. If that cost is greater than
451/// Budget, false is returned and Cost is undefined.
453 Value *V, BasicBlock *BB, Instruction *InsertPt,
454 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
456 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
457 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
458 // so limit the recursion depth.
459 // TODO: While this recursion limit does prevent pathological behavior, it
460 // would be better to track visited instructions to avoid cycles.
462 return false;
463
465 if (!I) {
466 // Non-instructions dominate all instructions and can be executed
467 // unconditionally.
468 return true;
469 }
470 BasicBlock *PBB = I->getParent();
471
472 // We don't want to allow weird loops that might have the "if condition" in
473 // the bottom of this block.
474 if (PBB == BB)
475 return false;
476
477 // If this instruction is defined in a block that contains an unconditional
478 // branch to BB, then it must be in the 'conditional' part of the "if
479 // statement". If not, it definitely dominates the region.
481 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
482 return true;
483
484 // If we have seen this instruction before, don't count it again.
485 if (AggressiveInsts.count(I))
486 return true;
487
488 // Okay, it looks like the instruction IS in the "condition". Check to
489 // see if it's a cheap instruction to unconditionally compute, and if it
490 // only uses stuff defined outside of the condition. If so, hoist it out.
491 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
492 return false;
493
494 // Overflow arithmetic instruction plus extract value are usually generated
495 // when a division is being replaced. But, in this case, the zero check may
496 // still be kept in the code. In that case it would be worth to hoist these
497 // two instruction out of the basic block. Let's treat this pattern as one
498 // single cheap instruction here!
499 WithOverflowInst *OverflowInst;
500 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
501 ZeroCostInstructions.insert(OverflowInst);
502 Cost += 1;
503 } else if (!ZeroCostInstructions.contains(I))
504 Cost += computeSpeculationCost(I, TTI);
505
506 // Allow exactly one instruction to be speculated regardless of its cost
507 // (as long as it is safe to do so).
508 // This is intended to flatten the CFG even if the instruction is a division
509 // or other expensive operation. The speculation of an expensive instruction
510 // is expected to be undone in CodeGenPrepare if the speculation has not
511 // enabled further IR optimizations.
512 if (Cost > Budget &&
513 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
514 !Cost.isValid()))
515 return false;
516
517 // Okay, we can only really hoist these out if their operands do
518 // not take us over the cost threshold.
519 for (Use &Op : I->operands())
520 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
521 TTI, AC, ZeroCostInstructions, Depth + 1))
522 return false;
523 // Okay, it's safe to do this! Remember this instruction.
524 AggressiveInsts.insert(I);
525 return true;
526}
527
528/// Extract ConstantInt from value, looking through IntToPtr
529/// and PointerNullValue. Return NULL if value is not a constant int.
531 // Normal constant int.
533 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
534 return CI;
535
536 // It is not safe to look through inttoptr or ptrtoint when using unstable
537 // pointer types.
538 if (DL.hasUnstableRepresentation(V->getType()))
539 return nullptr;
540
541 // This is some kind of pointer constant. Turn it into a pointer-sized
542 // ConstantInt if possible.
543 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
544
545 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
547 return ConstantInt::get(IntPtrTy, 0);
548
549 // IntToPtr const int, we can look through this if the semantics of
550 // inttoptr for this address space are a simple (truncating) bitcast.
552 if (CE->getOpcode() == Instruction::IntToPtr)
553 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
554 // The constant is very likely to have the right type already.
555 if (CI->getType() == IntPtrTy)
556 return CI;
557 else
558 return cast<ConstantInt>(
559 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
560 }
561 return nullptr;
562}
563
564namespace {
565
566/// Given a chain of or (||) or and (&&) comparison of a value against a
567/// constant, this will try to recover the information required for a switch
568/// structure.
569/// It will depth-first traverse the chain of comparison, seeking for patterns
570/// like %a == 12 or %a < 4 and combine them to produce a set of integer
571/// representing the different cases for the switch.
572/// Note that if the chain is composed of '||' it will build the set of elements
573/// that matches the comparisons (i.e. any of this value validate the chain)
574/// while for a chain of '&&' it will build the set elements that make the test
575/// fail.
576struct ConstantComparesGatherer {
577 const DataLayout &DL;
578
579 /// Value found for the switch comparison
580 Value *CompValue = nullptr;
581
582 /// Extra clause to be checked before the switch
583 Value *Extra = nullptr;
584
585 /// Set of integers to match in switch
587
588 /// Number of comparisons matched in the and/or chain
589 unsigned UsedICmps = 0;
590
591 /// If the elements in Vals matches the comparisons
592 bool IsEq = false;
593
594 // Used to check if the first matched CompValue shall be the Extra check.
595 bool IgnoreFirstMatch = false;
596 bool MultipleMatches = false;
597
598 /// Construct and compute the result for the comparison instruction Cond
599 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
600 gather(Cond);
601 if (CompValue || !MultipleMatches)
602 return;
603 Extra = nullptr;
604 Vals.clear();
605 UsedICmps = 0;
606 IgnoreFirstMatch = true;
607 gather(Cond);
608 }
609
610 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
611 ConstantComparesGatherer &
612 operator=(const ConstantComparesGatherer &) = delete;
613
614private:
615 /// Try to set the current value used for the comparison, it succeeds only if
616 /// it wasn't set before or if the new value is the same as the old one
617 bool setValueOnce(Value *NewVal) {
618 if (IgnoreFirstMatch) {
619 IgnoreFirstMatch = false;
620 return false;
621 }
622 if (CompValue && CompValue != NewVal) {
623 MultipleMatches = true;
624 return false;
625 }
626 CompValue = NewVal;
627 return true;
628 }
629
630 /// Try to match Instruction "I" as a comparison against a constant and
631 /// populates the array Vals with the set of values that match (or do not
632 /// match depending on isEQ).
633 /// Return false on failure. On success, the Value the comparison matched
634 /// against is placed in CompValue.
635 /// If CompValue is already set, the function is expected to fail if a match
636 /// is found but the value compared to is different.
637 bool matchInstruction(Instruction *I, bool isEQ) {
638 if (match(I, m_Not(m_Instruction(I))))
639 isEQ = !isEQ;
640
641 Value *Val;
642 if (match(I, m_NUWTrunc(m_Value(Val)))) {
643 // If we already have a value for the switch, it has to match!
644 if (!setValueOnce(Val))
645 return false;
646 UsedICmps++;
647 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
648 return true;
649 }
650 // If this is an icmp against a constant, handle this as one of the cases.
651 ICmpInst *ICI;
652 ConstantInt *C;
653 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
654 (C = getConstantInt(I->getOperand(1), DL)))) {
655 return false;
656 }
657
658 Value *RHSVal;
659 const APInt *RHSC;
660
661 // Pattern match a special case
662 // (x & ~2^z) == y --> x == y || x == y|2^z
663 // This undoes a transformation done by instcombine to fuse 2 compares.
664 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
665 // It's a little bit hard to see why the following transformations are
666 // correct. Here is a CVC3 program to verify them for 64-bit values:
667
668 /*
669 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
670 x : BITVECTOR(64);
671 y : BITVECTOR(64);
672 z : BITVECTOR(64);
673 mask : BITVECTOR(64) = BVSHL(ONE, z);
674 QUERY( (y & ~mask = y) =>
675 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
676 );
677 QUERY( (y | mask = y) =>
678 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
679 );
680 */
681
682 // Please note that each pattern must be a dual implication (<--> or
683 // iff). One directional implication can create spurious matches. If the
684 // implication is only one-way, an unsatisfiable condition on the left
685 // side can imply a satisfiable condition on the right side. Dual
686 // implication ensures that satisfiable conditions are transformed to
687 // other satisfiable conditions and unsatisfiable conditions are
688 // transformed to other unsatisfiable conditions.
689
690 // Here is a concrete example of a unsatisfiable condition on the left
691 // implying a satisfiable condition on the right:
692 //
693 // mask = (1 << z)
694 // (x & ~mask) == y --> (x == y || x == (y | mask))
695 //
696 // Substituting y = 3, z = 0 yields:
697 // (x & -2) == 3 --> (x == 3 || x == 2)
698
699 // Pattern match a special case:
700 /*
701 QUERY( (y & ~mask = y) =>
702 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
703 );
704 */
705 if (match(ICI->getOperand(0),
706 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
707 APInt Mask = ~*RHSC;
708 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
709 // If we already have a value for the switch, it has to match!
710 if (!setValueOnce(RHSVal))
711 return false;
712
713 Vals.push_back(C);
714 Vals.push_back(
715 ConstantInt::get(C->getContext(),
716 C->getValue() | Mask));
717 UsedICmps++;
718 return true;
719 }
720 }
721
722 // Pattern match a special case:
723 /*
724 QUERY( (y | mask = y) =>
725 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
726 );
727 */
728 if (match(ICI->getOperand(0),
729 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
730 APInt Mask = *RHSC;
731 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
732 // If we already have a value for the switch, it has to match!
733 if (!setValueOnce(RHSVal))
734 return false;
735
736 Vals.push_back(C);
737 Vals.push_back(ConstantInt::get(C->getContext(),
738 C->getValue() & ~Mask));
739 UsedICmps++;
740 return true;
741 }
742 }
743
744 // If we already have a value for the switch, it has to match!
745 if (!setValueOnce(ICI->getOperand(0)))
746 return false;
747
748 UsedICmps++;
749 Vals.push_back(C);
750 return true;
751 }
752
753 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
754 ConstantRange Span =
756
757 // Shift the range if the compare is fed by an add. This is the range
758 // compare idiom as emitted by instcombine.
759 Value *CandidateVal = I->getOperand(0);
760 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
761 Span = Span.subtract(*RHSC);
762 CandidateVal = RHSVal;
763 }
764
765 // If this is an and/!= check, then we are looking to build the set of
766 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
767 // x != 0 && x != 1.
768 if (!isEQ)
769 Span = Span.inverse();
770
771 // If there are a ton of values, we don't want to make a ginormous switch.
772 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
773 return false;
774 }
775
776 // If we already have a value for the switch, it has to match!
777 if (!setValueOnce(CandidateVal))
778 return false;
779
780 // Add all values from the range to the set
781 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
782 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
783
784 UsedICmps++;
785 return true;
786 }
787
788 /// Given a potentially 'or'd or 'and'd together collection of icmp
789 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
790 /// the value being compared, and stick the list constants into the Vals
791 /// vector.
792 /// One "Extra" case is allowed to differ from the other.
793 void gather(Value *V) {
794 Value *Op0, *Op1;
795 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
796 IsEq = true;
797 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
798 IsEq = false;
799 else
800 return;
801 // Keep a stack (SmallVector for efficiency) for depth-first traversal
802 SmallVector<Value *, 8> DFT{Op0, Op1};
803 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
804
805 while (!DFT.empty()) {
806 V = DFT.pop_back_val();
807
808 if (Instruction *I = dyn_cast<Instruction>(V)) {
809 // If it is a || (or && depending on isEQ), process the operands.
810 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
811 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
812 if (Visited.insert(Op1).second)
813 DFT.push_back(Op1);
814 if (Visited.insert(Op0).second)
815 DFT.push_back(Op0);
816
817 continue;
818 }
819
820 // Try to match the current instruction
821 if (matchInstruction(I, IsEq))
822 // Match succeed, continue the loop
823 continue;
824 }
825
826 // One element of the sequence of || (or &&) could not be match as a
827 // comparison against the same value as the others.
828 // We allow only one "Extra" case to be checked before the switch
829 if (!Extra) {
830 Extra = V;
831 continue;
832 }
833 // Failed to parse a proper sequence, abort now
834 CompValue = nullptr;
835 break;
836 }
837 }
838};
839
840} // end anonymous namespace
841
843 MemorySSAUpdater *MSSAU = nullptr) {
844 Instruction *Cond = nullptr;
846 Cond = dyn_cast<Instruction>(SI->getCondition());
847 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
848 if (BI->isConditional())
849 Cond = dyn_cast<Instruction>(BI->getCondition());
850 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
851 Cond = dyn_cast<Instruction>(IBI->getAddress());
852 }
853
854 TI->eraseFromParent();
855 if (Cond)
857}
858
859/// Return true if the specified terminator checks
860/// to see if a value is equal to constant integer value.
861Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
862 Value *CV = nullptr;
863 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
864 // Do not permit merging of large switch instructions into their
865 // predecessors unless there is only one predecessor.
866 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
867 CV = SI->getCondition();
868 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
869 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
870 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
871 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
872 CV = ICI->getOperand(0);
873 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
874 if (Trunc->hasNoUnsignedWrap())
875 CV = Trunc->getOperand(0);
876 }
877 }
878
879 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
880 if (CV) {
881 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
882 Value *Ptr = PTII->getPointerOperand();
883 if (DL.hasUnstableRepresentation(Ptr->getType()))
884 return CV;
885 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
886 CV = Ptr;
887 }
888 }
889 return CV;
890}
891
892/// Given a value comparison instruction,
893/// decode all of the 'cases' that it represents and return the 'default' block.
894BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
895 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
896 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
897 Cases.reserve(SI->getNumCases());
898 for (auto Case : SI->cases())
899 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
900 Case.getCaseSuccessor()));
901 return SI->getDefaultDest();
902 }
903
904 BranchInst *BI = cast<BranchInst>(TI);
905 Value *Cond = BI->getCondition();
906 ICmpInst::Predicate Pred;
907 ConstantInt *C;
908 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
909 Pred = ICI->getPredicate();
910 C = getConstantInt(ICI->getOperand(1), DL);
911 } else {
912 Pred = ICmpInst::ICMP_NE;
913 auto *Trunc = cast<TruncInst>(Cond);
914 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
915 }
916 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
917 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
918 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
919}
920
921/// Given a vector of bb/value pairs, remove any entries
922/// in the list that match the specified block.
923static void
925 std::vector<ValueEqualityComparisonCase> &Cases) {
926 llvm::erase(Cases, BB);
927}
928
929/// Return true if there are any keys in C1 that exist in C2 as well.
930static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
931 std::vector<ValueEqualityComparisonCase> &C2) {
932 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
933
934 // Make V1 be smaller than V2.
935 if (V1->size() > V2->size())
936 std::swap(V1, V2);
937
938 if (V1->empty())
939 return false;
940 if (V1->size() == 1) {
941 // Just scan V2.
942 ConstantInt *TheVal = (*V1)[0].Value;
943 for (const ValueEqualityComparisonCase &VECC : *V2)
944 if (TheVal == VECC.Value)
945 return true;
946 }
947
948 // Otherwise, just sort both lists and compare element by element.
949 array_pod_sort(V1->begin(), V1->end());
950 array_pod_sort(V2->begin(), V2->end());
951 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
952 while (i1 != e1 && i2 != e2) {
953 if ((*V1)[i1].Value == (*V2)[i2].Value)
954 return true;
955 if ((*V1)[i1].Value < (*V2)[i2].Value)
956 ++i1;
957 else
958 ++i2;
959 }
960 return false;
961}
962
963/// If TI is known to be a terminator instruction and its block is known to
964/// only have a single predecessor block, check to see if that predecessor is
965/// also a value comparison with the same value, and if that comparison
966/// determines the outcome of this comparison. If so, simplify TI. This does a
967/// very limited form of jump threading.
968bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
969 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
970 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
971 if (!PredVal)
972 return false; // Not a value comparison in predecessor.
973
974 Value *ThisVal = isValueEqualityComparison(TI);
975 assert(ThisVal && "This isn't a value comparison!!");
976 if (ThisVal != PredVal)
977 return false; // Different predicates.
978
979 // TODO: Preserve branch weight metadata, similarly to how
980 // foldValueComparisonIntoPredecessors preserves it.
981
982 // Find out information about when control will move from Pred to TI's block.
983 std::vector<ValueEqualityComparisonCase> PredCases;
984 BasicBlock *PredDef =
985 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
986 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
987
988 // Find information about how control leaves this block.
989 std::vector<ValueEqualityComparisonCase> ThisCases;
990 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
991 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
992
993 // If TI's block is the default block from Pred's comparison, potentially
994 // simplify TI based on this knowledge.
995 if (PredDef == TI->getParent()) {
996 // If we are here, we know that the value is none of those cases listed in
997 // PredCases. If there are any cases in ThisCases that are in PredCases, we
998 // can simplify TI.
999 if (!valuesOverlap(PredCases, ThisCases))
1000 return false;
1001
1002 if (isa<BranchInst>(TI)) {
1003 // Okay, one of the successors of this condbr is dead. Convert it to a
1004 // uncond br.
1005 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1006 // Insert the new branch.
1007 Instruction *NI = Builder.CreateBr(ThisDef);
1008 (void)NI;
1009
1010 // Remove PHI node entries for the dead edge.
1011 ThisCases[0].Dest->removePredecessor(PredDef);
1012
1013 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1014 << "Through successor TI: " << *TI << "Leaving: " << *NI
1015 << "\n");
1016
1018
1019 if (DTU)
1020 DTU->applyUpdates(
1021 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1022
1023 return true;
1024 }
1025
1026 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1027 // Okay, TI has cases that are statically dead, prune them away.
1028 SmallPtrSet<Constant *, 16> DeadCases;
1029 for (const ValueEqualityComparisonCase &Case : PredCases)
1030 DeadCases.insert(Case.Value);
1031
1032 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1033 << "Through successor TI: " << *TI);
1034
1035 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1036 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1037 --i;
1038 auto *Successor = i->getCaseSuccessor();
1039 if (DTU)
1040 ++NumPerSuccessorCases[Successor];
1041 if (DeadCases.count(i->getCaseValue())) {
1042 Successor->removePredecessor(PredDef);
1043 SI.removeCase(i);
1044 if (DTU)
1045 --NumPerSuccessorCases[Successor];
1046 }
1047 }
1048
1049 if (DTU) {
1050 std::vector<DominatorTree::UpdateType> Updates;
1051 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1052 if (I.second == 0)
1053 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1054 DTU->applyUpdates(Updates);
1055 }
1056
1057 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1058 return true;
1059 }
1060
1061 // Otherwise, TI's block must correspond to some matched value. Find out
1062 // which value (or set of values) this is.
1063 ConstantInt *TIV = nullptr;
1064 BasicBlock *TIBB = TI->getParent();
1065 for (const auto &[Value, Dest] : PredCases)
1066 if (Dest == TIBB) {
1067 if (TIV)
1068 return false; // Cannot handle multiple values coming to this block.
1069 TIV = Value;
1070 }
1071 assert(TIV && "No edge from pred to succ?");
1072
1073 // Okay, we found the one constant that our value can be if we get into TI's
1074 // BB. Find out which successor will unconditionally be branched to.
1075 BasicBlock *TheRealDest = nullptr;
1076 for (const auto &[Value, Dest] : ThisCases)
1077 if (Value == TIV) {
1078 TheRealDest = Dest;
1079 break;
1080 }
1081
1082 // If not handled by any explicit cases, it is handled by the default case.
1083 if (!TheRealDest)
1084 TheRealDest = ThisDef;
1085
1086 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1087
1088 // Remove PHI node entries for dead edges.
1089 BasicBlock *CheckEdge = TheRealDest;
1090 for (BasicBlock *Succ : successors(TIBB))
1091 if (Succ != CheckEdge) {
1092 if (Succ != TheRealDest)
1093 RemovedSuccs.insert(Succ);
1094 Succ->removePredecessor(TIBB);
1095 } else
1096 CheckEdge = nullptr;
1097
1098 // Insert the new branch.
1099 Instruction *NI = Builder.CreateBr(TheRealDest);
1100 (void)NI;
1101
1102 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1103 << "Through successor TI: " << *TI << "Leaving: " << *NI
1104 << "\n");
1105
1107 if (DTU) {
1108 SmallVector<DominatorTree::UpdateType, 2> Updates;
1109 Updates.reserve(RemovedSuccs.size());
1110 for (auto *RemovedSucc : RemovedSuccs)
1111 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1112 DTU->applyUpdates(Updates);
1113 }
1114 return true;
1115}
1116
1117namespace {
1118
1119/// This class implements a stable ordering of constant
1120/// integers that does not depend on their address. This is important for
1121/// applications that sort ConstantInt's to ensure uniqueness.
1122struct ConstantIntOrdering {
1123 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1124 return LHS->getValue().ult(RHS->getValue());
1125 }
1126};
1127
1128} // end anonymous namespace
1129
1131 ConstantInt *const *P2) {
1132 const ConstantInt *LHS = *P1;
1133 const ConstantInt *RHS = *P2;
1134 if (LHS == RHS)
1135 return 0;
1136 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1137}
1138
1139/// Get Weights of a given terminator, the default weight is at the front
1140/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1141/// metadata.
1143 SmallVectorImpl<uint64_t> &Weights) {
1144 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1145 assert(MD && "Invalid branch-weight metadata");
1146 extractFromBranchWeightMD64(MD, Weights);
1147
1148 // If TI is a conditional eq, the default case is the false case,
1149 // and the corresponding branch-weight data is at index 2. We swap the
1150 // default weight to be the first entry.
1151 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1152 assert(Weights.size() == 2);
1153 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1154 if (!ICI)
1155 return;
1156
1157 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1158 std::swap(Weights.front(), Weights.back());
1159 }
1160}
1161
1163 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1164 Instruction *PTI = PredBlock->getTerminator();
1165
1166 // If we have bonus instructions, clone them into the predecessor block.
1167 // Note that there may be multiple predecessor blocks, so we cannot move
1168 // bonus instructions to a predecessor block.
1169 for (Instruction &BonusInst : *BB) {
1170 if (BonusInst.isTerminator())
1171 continue;
1172
1173 Instruction *NewBonusInst = BonusInst.clone();
1174
1175 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1176 // Unless the instruction has the same !dbg location as the original
1177 // branch, drop it. When we fold the bonus instructions we want to make
1178 // sure we reset their debug locations in order to avoid stepping on
1179 // dead code caused by folding dead branches.
1180 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1181 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1182 mapAtomInstance(DL, VMap);
1183 }
1184
1185 RemapInstruction(NewBonusInst, VMap,
1187
1188 // If we speculated an instruction, we need to drop any metadata that may
1189 // result in undefined behavior, as the metadata might have been valid
1190 // only given the branch precondition.
1191 // Similarly strip attributes on call parameters that may cause UB in
1192 // location the call is moved to.
1193 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1194
1195 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1196 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1197 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1199
1200 NewBonusInst->takeName(&BonusInst);
1201 BonusInst.setName(NewBonusInst->getName() + ".old");
1202 VMap[&BonusInst] = NewBonusInst;
1203
1204 // Update (liveout) uses of bonus instructions,
1205 // now that the bonus instruction has been cloned into predecessor.
1206 // Note that we expect to be in a block-closed SSA form for this to work!
1207 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1208 auto *UI = cast<Instruction>(U.getUser());
1209 auto *PN = dyn_cast<PHINode>(UI);
1210 if (!PN) {
1211 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1212 "If the user is not a PHI node, then it should be in the same "
1213 "block as, and come after, the original bonus instruction.");
1214 continue; // Keep using the original bonus instruction.
1215 }
1216 // Is this the block-closed SSA form PHI node?
1217 if (PN->getIncomingBlock(U) == BB)
1218 continue; // Great, keep using the original bonus instruction.
1219 // The only other alternative is an "use" when coming from
1220 // the predecessor block - here we should refer to the cloned bonus instr.
1221 assert(PN->getIncomingBlock(U) == PredBlock &&
1222 "Not in block-closed SSA form?");
1223 U.set(NewBonusInst);
1224 }
1225 }
1226
1227 // Key Instructions: We may have propagated atom info into the pred. If the
1228 // pred's terminator already has atom info do nothing as merging would drop
1229 // one atom group anyway. If it doesn't, propagte the remapped atom group
1230 // from BB's terminator.
1231 if (auto &PredDL = PTI->getDebugLoc()) {
1232 auto &DL = BB->getTerminator()->getDebugLoc();
1233 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1234 PredDL.isSameSourceLocation(DL)) {
1235 PTI->setDebugLoc(DL);
1236 RemapSourceAtom(PTI, VMap);
1237 }
1238 }
1239}
1240
1241bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1242 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1243 BasicBlock *BB = TI->getParent();
1244 BasicBlock *Pred = PTI->getParent();
1245
1247
1248 // Figure out which 'cases' to copy from SI to PSI.
1249 std::vector<ValueEqualityComparisonCase> BBCases;
1250 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1251
1252 std::vector<ValueEqualityComparisonCase> PredCases;
1253 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1254
1255 // Based on whether the default edge from PTI goes to BB or not, fill in
1256 // PredCases and PredDefault with the new switch cases we would like to
1257 // build.
1258 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1259
1260 // Update the branch weight metadata along the way
1261 SmallVector<uint64_t, 8> Weights;
1262 bool PredHasWeights = hasBranchWeightMD(*PTI);
1263 bool SuccHasWeights = hasBranchWeightMD(*TI);
1264
1265 if (PredHasWeights) {
1266 getBranchWeights(PTI, Weights);
1267 // branch-weight metadata is inconsistent here.
1268 if (Weights.size() != 1 + PredCases.size())
1269 PredHasWeights = SuccHasWeights = false;
1270 } else if (SuccHasWeights)
1271 // If there are no predecessor weights but there are successor weights,
1272 // populate Weights with 1, which will later be scaled to the sum of
1273 // successor's weights
1274 Weights.assign(1 + PredCases.size(), 1);
1275
1276 SmallVector<uint64_t, 8> SuccWeights;
1277 if (SuccHasWeights) {
1278 getBranchWeights(TI, SuccWeights);
1279 // branch-weight metadata is inconsistent here.
1280 if (SuccWeights.size() != 1 + BBCases.size())
1281 PredHasWeights = SuccHasWeights = false;
1282 } else if (PredHasWeights)
1283 SuccWeights.assign(1 + BBCases.size(), 1);
1284
1285 if (PredDefault == BB) {
1286 // If this is the default destination from PTI, only the edges in TI
1287 // that don't occur in PTI, or that branch to BB will be activated.
1288 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1289 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1290 if (PredCases[i].Dest != BB)
1291 PTIHandled.insert(PredCases[i].Value);
1292 else {
1293 // The default destination is BB, we don't need explicit targets.
1294 std::swap(PredCases[i], PredCases.back());
1295
1296 if (PredHasWeights || SuccHasWeights) {
1297 // Increase weight for the default case.
1298 Weights[0] += Weights[i + 1];
1299 std::swap(Weights[i + 1], Weights.back());
1300 Weights.pop_back();
1301 }
1302
1303 PredCases.pop_back();
1304 --i;
1305 --e;
1306 }
1307
1308 // Reconstruct the new switch statement we will be building.
1309 if (PredDefault != BBDefault) {
1310 PredDefault->removePredecessor(Pred);
1311 if (DTU && PredDefault != BB)
1312 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1313 PredDefault = BBDefault;
1314 ++NewSuccessors[BBDefault];
1315 }
1316
1317 unsigned CasesFromPred = Weights.size();
1318 uint64_t ValidTotalSuccWeight = 0;
1319 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1320 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1321 PredCases.push_back(BBCases[i]);
1322 ++NewSuccessors[BBCases[i].Dest];
1323 if (SuccHasWeights || PredHasWeights) {
1324 // The default weight is at index 0, so weight for the ith case
1325 // should be at index i+1. Scale the cases from successor by
1326 // PredDefaultWeight (Weights[0]).
1327 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1328 ValidTotalSuccWeight += SuccWeights[i + 1];
1329 }
1330 }
1331
1332 if (SuccHasWeights || PredHasWeights) {
1333 ValidTotalSuccWeight += SuccWeights[0];
1334 // Scale the cases from predecessor by ValidTotalSuccWeight.
1335 for (unsigned i = 1; i < CasesFromPred; ++i)
1336 Weights[i] *= ValidTotalSuccWeight;
1337 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1338 Weights[0] *= SuccWeights[0];
1339 }
1340 } else {
1341 // If this is not the default destination from PSI, only the edges
1342 // in SI that occur in PSI with a destination of BB will be
1343 // activated.
1344 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1345 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1346 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1347 if (PredCases[i].Dest == BB) {
1348 PTIHandled.insert(PredCases[i].Value);
1349
1350 if (PredHasWeights || SuccHasWeights) {
1351 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1352 std::swap(Weights[i + 1], Weights.back());
1353 Weights.pop_back();
1354 }
1355
1356 std::swap(PredCases[i], PredCases.back());
1357 PredCases.pop_back();
1358 --i;
1359 --e;
1360 }
1361
1362 // Okay, now we know which constants were sent to BB from the
1363 // predecessor. Figure out where they will all go now.
1364 for (const ValueEqualityComparisonCase &Case : BBCases)
1365 if (PTIHandled.count(Case.Value)) {
1366 // If this is one we are capable of getting...
1367 if (PredHasWeights || SuccHasWeights)
1368 Weights.push_back(WeightsForHandled[Case.Value]);
1369 PredCases.push_back(Case);
1370 ++NewSuccessors[Case.Dest];
1371 PTIHandled.erase(Case.Value); // This constant is taken care of
1372 }
1373
1374 // If there are any constants vectored to BB that TI doesn't handle,
1375 // they must go to the default destination of TI.
1376 for (ConstantInt *I : PTIHandled) {
1377 if (PredHasWeights || SuccHasWeights)
1378 Weights.push_back(WeightsForHandled[I]);
1379 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1380 ++NewSuccessors[BBDefault];
1381 }
1382 }
1383
1384 // Okay, at this point, we know which new successor Pred will get. Make
1385 // sure we update the number of entries in the PHI nodes for these
1386 // successors.
1387 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1388 if (DTU) {
1389 SuccsOfPred = {llvm::from_range, successors(Pred)};
1390 Updates.reserve(Updates.size() + NewSuccessors.size());
1391 }
1392 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1393 NewSuccessors) {
1394 for (auto I : seq(NewSuccessor.second)) {
1395 (void)I;
1396 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1397 }
1398 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1399 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1400 }
1401
1402 Builder.SetInsertPoint(PTI);
1403 // Convert pointer to int before we switch.
1404 if (CV->getType()->isPointerTy()) {
1405 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1406 "Should not end up here with unstable pointers");
1407 CV =
1408 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1409 }
1410
1411 // Now that the successors are updated, create the new Switch instruction.
1412 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1413 NewSI->setDebugLoc(PTI->getDebugLoc());
1414 for (ValueEqualityComparisonCase &V : PredCases)
1415 NewSI->addCase(V.Value, V.Dest);
1416
1417 if (PredHasWeights || SuccHasWeights)
1418 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1419 /*ElideAllZero=*/true);
1420
1422
1423 // Okay, last check. If BB is still a successor of PSI, then we must
1424 // have an infinite loop case. If so, add an infinitely looping block
1425 // to handle the case to preserve the behavior of the code.
1426 BasicBlock *InfLoopBlock = nullptr;
1427 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1428 if (NewSI->getSuccessor(i) == BB) {
1429 if (!InfLoopBlock) {
1430 // Insert it at the end of the function, because it's either code,
1431 // or it won't matter if it's hot. :)
1432 InfLoopBlock =
1433 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1434 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1435 if (DTU)
1436 Updates.push_back(
1437 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1438 }
1439 NewSI->setSuccessor(i, InfLoopBlock);
1440 }
1441
1442 if (DTU) {
1443 if (InfLoopBlock)
1444 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1445
1446 Updates.push_back({DominatorTree::Delete, Pred, BB});
1447
1448 DTU->applyUpdates(Updates);
1449 }
1450
1451 ++NumFoldValueComparisonIntoPredecessors;
1452 return true;
1453}
1454
1455/// The specified terminator is a value equality comparison instruction
1456/// (either a switch or a branch on "X == c").
1457/// See if any of the predecessors of the terminator block are value comparisons
1458/// on the same value. If so, and if safe to do so, fold them together.
1459bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1460 IRBuilder<> &Builder) {
1461 BasicBlock *BB = TI->getParent();
1462 Value *CV = isValueEqualityComparison(TI); // CondVal
1463 assert(CV && "Not a comparison?");
1464
1465 bool Changed = false;
1466
1467 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1468 while (!Preds.empty()) {
1469 BasicBlock *Pred = Preds.pop_back_val();
1470 Instruction *PTI = Pred->getTerminator();
1471
1472 // Don't try to fold into itself.
1473 if (Pred == BB)
1474 continue;
1475
1476 // See if the predecessor is a comparison with the same value.
1477 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1478 if (PCV != CV)
1479 continue;
1480
1481 SmallSetVector<BasicBlock *, 4> FailBlocks;
1482 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1483 for (auto *Succ : FailBlocks) {
1484 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1485 return false;
1486 }
1487 }
1488
1489 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1490 Changed = true;
1491 }
1492 return Changed;
1493}
1494
1495// If we would need to insert a select that uses the value of this invoke
1496// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1497// need to do this), we can't hoist the invoke, as there is nowhere to put the
1498// select in this case.
1500 Instruction *I1, Instruction *I2) {
1501 for (BasicBlock *Succ : successors(BB1)) {
1502 for (const PHINode &PN : Succ->phis()) {
1503 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1504 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1505 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1506 return false;
1507 }
1508 }
1509 }
1510 return true;
1511}
1512
1513// Get interesting characteristics of instructions that
1514// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1515// instructions can be reordered across.
1521
1523 unsigned Flags = 0;
1524 if (I->mayReadFromMemory())
1525 Flags |= SkipReadMem;
1526 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1527 // inalloca) across stacksave/stackrestore boundaries.
1528 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1529 Flags |= SkipSideEffect;
1531 Flags |= SkipImplicitControlFlow;
1532 return Flags;
1533}
1534
1535// Returns true if it is safe to reorder an instruction across preceding
1536// instructions in a basic block.
1537static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1538 // Don't reorder a store over a load.
1539 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1540 return false;
1541
1542 // If we have seen an instruction with side effects, it's unsafe to reorder an
1543 // instruction which reads memory or itself has side effects.
1544 if ((Flags & SkipSideEffect) &&
1545 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1546 return false;
1547
1548 // Reordering across an instruction which does not necessarily transfer
1549 // control to the next instruction is speculation.
1551 return false;
1552
1553 // Hoisting of llvm.deoptimize is only legal together with the next return
1554 // instruction, which this pass is not always able to do.
1555 if (auto *CB = dyn_cast<CallBase>(I))
1556 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1557 return false;
1558
1559 // It's also unsafe/illegal to hoist an instruction above its instruction
1560 // operands
1561 BasicBlock *BB = I->getParent();
1562 for (Value *Op : I->operands()) {
1563 if (auto *J = dyn_cast<Instruction>(Op))
1564 if (J->getParent() == BB)
1565 return false;
1566 }
1567
1568 return true;
1569}
1570
1571static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1572
1573/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1574/// instructions \p I1 and \p I2 can and should be hoisted.
1576 const TargetTransformInfo &TTI) {
1577 // If we're going to hoist a call, make sure that the two instructions
1578 // we're commoning/hoisting are both marked with musttail, or neither of
1579 // them is marked as such. Otherwise, we might end up in a situation where
1580 // we hoist from a block where the terminator is a `ret` to a block where
1581 // the terminator is a `br`, and `musttail` calls expect to be followed by
1582 // a return.
1583 auto *C1 = dyn_cast<CallInst>(I1);
1584 auto *C2 = dyn_cast<CallInst>(I2);
1585 if (C1 && C2)
1586 if (C1->isMustTailCall() != C2->isMustTailCall())
1587 return false;
1588
1589 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1590 return false;
1591
1592 // If any of the two call sites has nomerge or convergent attribute, stop
1593 // hoisting.
1594 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1595 if (CB1->cannotMerge() || CB1->isConvergent())
1596 return false;
1597 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1598 if (CB2->cannotMerge() || CB2->isConvergent())
1599 return false;
1600
1601 return true;
1602}
1603
1604/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1605/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1606/// hoistCommonCodeFromSuccessors. e.g. The input:
1607/// I1 DVRs: { x, z },
1608/// OtherInsts: { I2 DVRs: { x, y, z } }
1609/// would result in hoisting only DbgVariableRecord x.
1611 Instruction *TI, Instruction *I1,
1612 SmallVectorImpl<Instruction *> &OtherInsts) {
1613 if (!I1->hasDbgRecords())
1614 return;
1615 using CurrentAndEndIt =
1616 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1617 // Vector of {Current, End} iterators.
1619 Itrs.reserve(OtherInsts.size() + 1);
1620 // Helper lambdas for lock-step checks:
1621 // Return true if this Current == End.
1622 auto atEnd = [](const CurrentAndEndIt &Pair) {
1623 return Pair.first == Pair.second;
1624 };
1625 // Return true if all Current are identical.
1626 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1627 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1629 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1630 });
1631 };
1632
1633 // Collect the iterators.
1634 Itrs.push_back(
1635 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1636 for (Instruction *Other : OtherInsts) {
1637 if (!Other->hasDbgRecords())
1638 return;
1639 Itrs.push_back(
1640 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1641 }
1642
1643 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1644 // the lock-step DbgRecord are identical, hoist all of them to TI.
1645 // This replicates the dbg.* intrinsic behaviour in
1646 // hoistCommonCodeFromSuccessors.
1647 while (none_of(Itrs, atEnd)) {
1648 bool HoistDVRs = allIdentical(Itrs);
1649 for (CurrentAndEndIt &Pair : Itrs) {
1650 // Increment Current iterator now as we may be about to move the
1651 // DbgRecord.
1652 DbgRecord &DR = *Pair.first++;
1653 if (HoistDVRs) {
1654 DR.removeFromParent();
1655 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1656 }
1657 }
1658 }
1659}
1660
1662 const Instruction *I2) {
1663 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1664 return true;
1665
1666 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1667 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1668 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1669 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1670 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1671
1672 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1673 return I1->getOperand(0) == I2->getOperand(1) &&
1674 I1->getOperand(1) == I2->getOperand(0) &&
1675 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1676 }
1677
1678 return false;
1679}
1680
1681/// If the target supports conditional faulting,
1682/// we look for the following pattern:
1683/// \code
1684/// BB:
1685/// ...
1686/// %cond = icmp ult %x, %y
1687/// br i1 %cond, label %TrueBB, label %FalseBB
1688/// FalseBB:
1689/// store i32 1, ptr %q, align 4
1690/// ...
1691/// TrueBB:
1692/// %maskedloadstore = load i32, ptr %b, align 4
1693/// store i32 %maskedloadstore, ptr %p, align 4
1694/// ...
1695/// \endcode
1696///
1697/// and transform it into:
1698///
1699/// \code
1700/// BB:
1701/// ...
1702/// %cond = icmp ult %x, %y
1703/// %maskedloadstore = cload i32, ptr %b, %cond
1704/// cstore i32 %maskedloadstore, ptr %p, %cond
1705/// cstore i32 1, ptr %q, ~%cond
1706/// br i1 %cond, label %TrueBB, label %FalseBB
1707/// FalseBB:
1708/// ...
1709/// TrueBB:
1710/// ...
1711/// \endcode
1712///
1713/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1714/// e.g.
1715///
1716/// \code
1717/// %vcond = bitcast i1 %cond to <1 x i1>
1718/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1719/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1720/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1721/// call void @llvm.masked.store.v1i32.p0
1722/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1723/// %cond.not = xor i1 %cond, true
1724/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1725/// call void @llvm.masked.store.v1i32.p0
1726/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1727/// \endcode
1728///
1729/// So we need to turn hoisted load/store into cload/cstore.
1730///
1731/// \param BI The branch instruction.
1732/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1733/// will be speculated.
1734/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1736 BranchInst *BI,
1737 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1738 std::optional<bool> Invert, Instruction *Sel) {
1739 auto &Context = BI->getParent()->getContext();
1740 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1741 auto *Cond = BI->getOperand(0);
1742 // Construct the condition if needed.
1743 BasicBlock *BB = BI->getParent();
1744 Value *Mask = nullptr;
1745 Value *MaskFalse = nullptr;
1746 Value *MaskTrue = nullptr;
1747 if (Invert.has_value()) {
1748 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1749 Mask = Builder.CreateBitCast(
1750 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1751 VCondTy);
1752 } else {
1753 IRBuilder<> Builder(BI);
1754 MaskFalse = Builder.CreateBitCast(
1755 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1756 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1757 }
1758 auto PeekThroughBitcasts = [](Value *V) {
1759 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1760 V = BitCast->getOperand(0);
1761 return V;
1762 };
1763 for (auto *I : SpeculatedConditionalLoadsStores) {
1764 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1765 if (!Invert.has_value())
1766 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1767 // We currently assume conditional faulting load/store is supported for
1768 // scalar types only when creating new instructions. This can be easily
1769 // extended for vector types in the future.
1770 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1771 auto *Op0 = I->getOperand(0);
1772 CallInst *MaskedLoadStore = nullptr;
1773 if (auto *LI = dyn_cast<LoadInst>(I)) {
1774 // Handle Load.
1775 auto *Ty = I->getType();
1776 PHINode *PN = nullptr;
1777 Value *PassThru = nullptr;
1778 if (Invert.has_value())
1779 for (User *U : I->users()) {
1780 if ((PN = dyn_cast<PHINode>(U))) {
1781 PassThru = Builder.CreateBitCast(
1782 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1783 FixedVectorType::get(Ty, 1));
1784 } else if (auto *Ins = cast<Instruction>(U);
1785 Sel && Ins->getParent() == BB) {
1786 // This happens when store or/and a speculative instruction between
1787 // load and store were hoisted to the BB. Make sure the masked load
1788 // inserted before its use.
1789 // We assume there's one of such use.
1790 Builder.SetInsertPoint(Ins);
1791 }
1792 }
1793 MaskedLoadStore = Builder.CreateMaskedLoad(
1794 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1795 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1796 if (PN)
1797 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1798 I->replaceAllUsesWith(NewLoadStore);
1799 } else {
1800 // Handle Store.
1801 auto *StoredVal = Builder.CreateBitCast(
1802 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1803 MaskedLoadStore = Builder.CreateMaskedStore(
1804 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1805 }
1806 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1807 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1808 //
1809 // !nonnull, !align : Not support pointer type, no need to keep.
1810 // !range: Load type is changed from scalar to vector, but the metadata on
1811 // vector specifies a per-element range, so the semantics stay the
1812 // same. Keep it.
1813 // !annotation: Not impact semantics. Keep it.
1814 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1815 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1816 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1817 // FIXME: DIAssignID is not supported for masked store yet.
1818 // (Verifier::visitDIAssignIDMetadata)
1820 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1821 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1822 });
1823 MaskedLoadStore->copyMetadata(*I);
1824 I->eraseFromParent();
1825 }
1826}
1827
1829 const TargetTransformInfo &TTI) {
1830 // Not handle volatile or atomic.
1831 bool IsStore = false;
1832 if (auto *L = dyn_cast<LoadInst>(I)) {
1833 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1834 return false;
1835 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1836 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1837 return false;
1838 IsStore = true;
1839 } else
1840 return false;
1841
1842 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1843 // That's why we have the alignment limitation.
1844 // FIXME: Update the prototype of the intrinsics?
1845 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1847}
1848
1849/// Hoist any common code in the successor blocks up into the block. This
1850/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1851/// given, only perform hoisting in case all successors blocks contain matching
1852/// instructions only. In that case, all instructions can be hoisted and the
1853/// original branch will be replaced and selects for PHIs are added.
1854bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1855 bool AllInstsEqOnly) {
1856 // This does very trivial matching, with limited scanning, to find identical
1857 // instructions in the two blocks. In particular, we don't want to get into
1858 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1859 // such, we currently just scan for obviously identical instructions in an
1860 // identical order, possibly separated by the same number of non-identical
1861 // instructions.
1862 BasicBlock *BB = TI->getParent();
1863 unsigned int SuccSize = succ_size(BB);
1864 if (SuccSize < 2)
1865 return false;
1866
1867 // If either of the blocks has it's address taken, then we can't do this fold,
1868 // because the code we'd hoist would no longer run when we jump into the block
1869 // by it's address.
1870 for (auto *Succ : successors(BB)) {
1871 if (Succ->hasAddressTaken())
1872 return false;
1873 if (Succ->getSinglePredecessor())
1874 continue;
1875 // If Succ has >1 predecessors, continue to check if the Succ contains only
1876 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1877 // can relax the condition based on the assumptiom that the program would
1878 // never enter Succ and trigger such an UB.
1879 if (isa<UnreachableInst>(*Succ->begin()))
1880 continue;
1881 return false;
1882 }
1883 // The second of pair is a SkipFlags bitmask.
1884 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1885 SmallVector<SuccIterPair, 8> SuccIterPairs;
1886 for (auto *Succ : successors(BB)) {
1887 BasicBlock::iterator SuccItr = Succ->begin();
1888 if (isa<PHINode>(*SuccItr))
1889 return false;
1890 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1891 }
1892
1893 if (AllInstsEqOnly) {
1894 // Check if all instructions in the successor blocks match. This allows
1895 // hoisting all instructions and removing the blocks we are hoisting from,
1896 // so does not add any new instructions.
1898 // Check if sizes and terminators of all successors match.
1899 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1900 Instruction *Term0 = Succs[0]->getTerminator();
1901 Instruction *Term = Succ->getTerminator();
1902 return !Term->isSameOperationAs(Term0) ||
1903 !equal(Term->operands(), Term0->operands()) ||
1904 Succs[0]->size() != Succ->size();
1905 });
1906 if (!AllSame)
1907 return false;
1908 if (AllSame) {
1909 LockstepReverseIterator<true> LRI(Succs);
1910 while (LRI.isValid()) {
1911 Instruction *I0 = (*LRI)[0];
1912 if (any_of(*LRI, [I0](Instruction *I) {
1913 return !areIdenticalUpToCommutativity(I0, I);
1914 })) {
1915 return false;
1916 }
1917 --LRI;
1918 }
1919 }
1920 // Now we know that all instructions in all successors can be hoisted. Let
1921 // the loop below handle the hoisting.
1922 }
1923
1924 // Count how many instructions were not hoisted so far. There's a limit on how
1925 // many instructions we skip, serving as a compilation time control as well as
1926 // preventing excessive increase of life ranges.
1927 unsigned NumSkipped = 0;
1928 // If we find an unreachable instruction at the beginning of a basic block, we
1929 // can still hoist instructions from the rest of the basic blocks.
1930 if (SuccIterPairs.size() > 2) {
1931 erase_if(SuccIterPairs,
1932 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1933 if (SuccIterPairs.size() < 2)
1934 return false;
1935 }
1936
1937 bool Changed = false;
1938
1939 for (;;) {
1940 auto *SuccIterPairBegin = SuccIterPairs.begin();
1941 auto &BB1ItrPair = *SuccIterPairBegin++;
1942 auto OtherSuccIterPairRange =
1943 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1944 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1945
1946 Instruction *I1 = &*BB1ItrPair.first;
1947
1948 bool AllInstsAreIdentical = true;
1949 bool HasTerminator = I1->isTerminator();
1950 for (auto &SuccIter : OtherSuccIterRange) {
1951 Instruction *I2 = &*SuccIter;
1952 HasTerminator |= I2->isTerminator();
1953 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1954 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1955 AllInstsAreIdentical = false;
1956 }
1957
1958 SmallVector<Instruction *, 8> OtherInsts;
1959 for (auto &SuccIter : OtherSuccIterRange)
1960 OtherInsts.push_back(&*SuccIter);
1961
1962 // If we are hoisting the terminator instruction, don't move one (making a
1963 // broken BB), instead clone it, and remove BI.
1964 if (HasTerminator) {
1965 // Even if BB, which contains only one unreachable instruction, is ignored
1966 // at the beginning of the loop, we can hoist the terminator instruction.
1967 // If any instructions remain in the block, we cannot hoist terminators.
1968 if (NumSkipped || !AllInstsAreIdentical) {
1969 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1970 return Changed;
1971 }
1972
1973 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1974 Changed;
1975 }
1976
1977 if (AllInstsAreIdentical) {
1978 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1979 AllInstsAreIdentical =
1980 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1981 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1982 Instruction *I2 = &*Pair.first;
1983 unsigned SkipFlagsBB2 = Pair.second;
1984 // Even if the instructions are identical, it may not
1985 // be safe to hoist them if we have skipped over
1986 // instructions with side effects or their operands
1987 // weren't hoisted.
1988 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1990 });
1991 }
1992
1993 if (AllInstsAreIdentical) {
1994 BB1ItrPair.first++;
1995 // For a normal instruction, we just move one to right before the
1996 // branch, then replace all uses of the other with the first. Finally,
1997 // we remove the now redundant second instruction.
1998 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1999 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2000 // and leave any that were not hoisted behind (by calling moveBefore
2001 // rather than moveBeforePreserving).
2002 I1->moveBefore(TI->getIterator());
2003 for (auto &SuccIter : OtherSuccIterRange) {
2004 Instruction *I2 = &*SuccIter++;
2005 assert(I2 != I1);
2006 if (!I2->use_empty())
2007 I2->replaceAllUsesWith(I1);
2008 I1->andIRFlags(I2);
2009 if (auto *CB = dyn_cast<CallBase>(I1)) {
2010 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2011 assert(Success && "We should not be trying to hoist callbases "
2012 "with non-intersectable attributes");
2013 // For NDEBUG Compile.
2014 (void)Success;
2015 }
2016
2017 combineMetadataForCSE(I1, I2, true);
2018 // I1 and I2 are being combined into a single instruction. Its debug
2019 // location is the merged locations of the original instructions.
2020 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2021 I2->eraseFromParent();
2022 }
2023 if (!Changed)
2024 NumHoistCommonCode += SuccIterPairs.size();
2025 Changed = true;
2026 NumHoistCommonInstrs += SuccIterPairs.size();
2027 } else {
2028 if (NumSkipped >= HoistCommonSkipLimit) {
2029 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2030 return Changed;
2031 }
2032 // We are about to skip over a pair of non-identical instructions. Record
2033 // if any have characteristics that would prevent reordering instructions
2034 // across them.
2035 for (auto &SuccIterPair : SuccIterPairs) {
2036 Instruction *I = &*SuccIterPair.first++;
2037 SuccIterPair.second |= skippedInstrFlags(I);
2038 }
2039 ++NumSkipped;
2040 }
2041 }
2042}
2043
2044bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2045 Instruction *TI, Instruction *I1,
2046 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2047
2048 auto *BI = dyn_cast<BranchInst>(TI);
2049
2050 bool Changed = false;
2051 BasicBlock *TIParent = TI->getParent();
2052 BasicBlock *BB1 = I1->getParent();
2053
2054 // Use only for an if statement.
2055 auto *I2 = *OtherSuccTIs.begin();
2056 auto *BB2 = I2->getParent();
2057 if (BI) {
2058 assert(OtherSuccTIs.size() == 1);
2059 assert(BI->getSuccessor(0) == I1->getParent());
2060 assert(BI->getSuccessor(1) == I2->getParent());
2061 }
2062
2063 // In the case of an if statement, we try to hoist an invoke.
2064 // FIXME: Can we define a safety predicate for CallBr?
2065 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2066 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2067 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2068 return false;
2069
2070 // TODO: callbr hoisting currently disabled pending further study.
2071 if (isa<CallBrInst>(I1))
2072 return false;
2073
2074 for (BasicBlock *Succ : successors(BB1)) {
2075 for (PHINode &PN : Succ->phis()) {
2076 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2077 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2078 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2079 if (BB1V == BB2V)
2080 continue;
2081
2082 // In the case of an if statement, check for
2083 // passingValueIsAlwaysUndefined here because we would rather eliminate
2084 // undefined control flow then converting it to a select.
2085 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2087 return false;
2088 }
2089 }
2090 }
2091
2092 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2093 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2094 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2095 // Clone the terminator and hoist it into the pred, without any debug info.
2096 Instruction *NT = I1->clone();
2097 NT->insertInto(TIParent, TI->getIterator());
2098 if (!NT->getType()->isVoidTy()) {
2099 I1->replaceAllUsesWith(NT);
2100 for (Instruction *OtherSuccTI : OtherSuccTIs)
2101 OtherSuccTI->replaceAllUsesWith(NT);
2102 NT->takeName(I1);
2103 }
2104 Changed = true;
2105 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2106
2107 // Ensure terminator gets a debug location, even an unknown one, in case
2108 // it involves inlinable calls.
2110 Locs.push_back(I1->getDebugLoc());
2111 for (auto *OtherSuccTI : OtherSuccTIs)
2112 Locs.push_back(OtherSuccTI->getDebugLoc());
2113 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2114
2115 // PHIs created below will adopt NT's merged DebugLoc.
2116 IRBuilder<NoFolder> Builder(NT);
2117
2118 // In the case of an if statement, hoisting one of the terminators from our
2119 // successor is a great thing. Unfortunately, the successors of the if/else
2120 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2121 // must agree for all PHI nodes, so we insert select instruction to compute
2122 // the final result.
2123 if (BI) {
2124 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2125 for (BasicBlock *Succ : successors(BB1)) {
2126 for (PHINode &PN : Succ->phis()) {
2127 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2128 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2129 if (BB1V == BB2V)
2130 continue;
2131
2132 // These values do not agree. Insert a select instruction before NT
2133 // that determines the right value.
2134 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2135 if (!SI) {
2136 // Propagate fast-math-flags from phi node to its replacement select.
2138 BI->getCondition(), BB1V, BB2V,
2139 isa<FPMathOperator>(PN) ? &PN : nullptr,
2140 BB1V->getName() + "." + BB2V->getName(), BI));
2141 }
2142
2143 // Make the PHI node use the select for all incoming values for BB1/BB2
2144 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2145 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2146 PN.setIncomingValue(i, SI);
2147 }
2148 }
2149 }
2150
2152
2153 // Update any PHI nodes in our new successors.
2154 for (BasicBlock *Succ : successors(BB1)) {
2155 addPredecessorToBlock(Succ, TIParent, BB1);
2156 if (DTU)
2157 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2158 }
2159
2160 if (DTU)
2161 for (BasicBlock *Succ : successors(TI))
2162 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2163
2165 if (DTU)
2166 DTU->applyUpdates(Updates);
2167 return Changed;
2168}
2169
2170// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2171// into variables.
2173 int OpIdx) {
2174 // Divide/Remainder by constant is typically much cheaper than by variable.
2175 if (I->isIntDivRem())
2176 return OpIdx != 1;
2177 return !isa<IntrinsicInst>(I);
2178}
2179
2180// All instructions in Insts belong to different blocks that all unconditionally
2181// branch to a common successor. Analyze each instruction and return true if it
2182// would be possible to sink them into their successor, creating one common
2183// instruction instead. For every value that would be required to be provided by
2184// PHI node (because an operand varies in each input block), add to PHIOperands.
2187 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2188 // Prune out obviously bad instructions to move. Each instruction must have
2189 // the same number of uses, and we check later that the uses are consistent.
2190 std::optional<unsigned> NumUses;
2191 for (auto *I : Insts) {
2192 // These instructions may change or break semantics if moved.
2193 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2194 I->getType()->isTokenTy())
2195 return false;
2196
2197 // Do not try to sink an instruction in an infinite loop - it can cause
2198 // this algorithm to infinite loop.
2199 if (I->getParent()->getSingleSuccessor() == I->getParent())
2200 return false;
2201
2202 // Conservatively return false if I is an inline-asm instruction. Sinking
2203 // and merging inline-asm instructions can potentially create arguments
2204 // that cannot satisfy the inline-asm constraints.
2205 // If the instruction has nomerge or convergent attribute, return false.
2206 if (const auto *C = dyn_cast<CallBase>(I))
2207 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2208 return false;
2209
2210 if (!NumUses)
2211 NumUses = I->getNumUses();
2212 else if (NumUses != I->getNumUses())
2213 return false;
2214 }
2215
2216 const Instruction *I0 = Insts.front();
2217 const auto I0MMRA = MMRAMetadata(*I0);
2218 for (auto *I : Insts) {
2219 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2220 return false;
2221
2222 // Treat MMRAs conservatively. This pass can be quite aggressive and
2223 // could drop a lot of MMRAs otherwise.
2224 if (MMRAMetadata(*I) != I0MMRA)
2225 return false;
2226 }
2227
2228 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2229 // then the other phi operands must match the instructions from Insts. This
2230 // also has to hold true for any phi nodes that would be created as a result
2231 // of sinking. Both of these cases are represented by PhiOperands.
2232 for (const Use &U : I0->uses()) {
2233 auto It = PHIOperands.find(&U);
2234 if (It == PHIOperands.end())
2235 // There may be uses in other blocks when sinking into a loop header.
2236 return false;
2237 if (!equal(Insts, It->second))
2238 return false;
2239 }
2240
2241 // For calls to be sinkable, they must all be indirect, or have same callee.
2242 // I.e. if we have two direct calls to different callees, we don't want to
2243 // turn that into an indirect call. Likewise, if we have an indirect call,
2244 // and a direct call, we don't actually want to have a single indirect call.
2245 if (isa<CallBase>(I0)) {
2246 auto IsIndirectCall = [](const Instruction *I) {
2247 return cast<CallBase>(I)->isIndirectCall();
2248 };
2249 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2250 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2251 if (HaveIndirectCalls) {
2252 if (!AllCallsAreIndirect)
2253 return false;
2254 } else {
2255 // All callees must be identical.
2256 Value *Callee = nullptr;
2257 for (const Instruction *I : Insts) {
2258 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2259 if (!Callee)
2260 Callee = CurrCallee;
2261 else if (Callee != CurrCallee)
2262 return false;
2263 }
2264 }
2265 }
2266
2267 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2268 Value *Op = I0->getOperand(OI);
2269 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2270 assert(I->getNumOperands() == I0->getNumOperands());
2271 return I->getOperand(OI) == I0->getOperand(OI);
2272 };
2273 if (!all_of(Insts, SameAsI0)) {
2276 // We can't create a PHI from this GEP.
2277 return false;
2278 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2279 for (auto *I : Insts)
2280 Ops.push_back(I->getOperand(OI));
2281 }
2282 }
2283 return true;
2284}
2285
2286// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2287// instruction of every block in Blocks to their common successor, commoning
2288// into one instruction.
2290 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2291
2292 // canSinkInstructions returning true guarantees that every block has at
2293 // least one non-terminator instruction.
2295 for (auto *BB : Blocks) {
2296 Instruction *I = BB->getTerminator();
2297 I = I->getPrevNode();
2298 Insts.push_back(I);
2299 }
2300
2301 // We don't need to do any more checking here; canSinkInstructions should
2302 // have done it all for us.
2303 SmallVector<Value*, 4> NewOperands;
2304 Instruction *I0 = Insts.front();
2305 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2306 // This check is different to that in canSinkInstructions. There, we
2307 // cared about the global view once simplifycfg (and instcombine) have
2308 // completed - it takes into account PHIs that become trivially
2309 // simplifiable. However here we need a more local view; if an operand
2310 // differs we create a PHI and rely on instcombine to clean up the very
2311 // small mess we may make.
2312 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2313 return I->getOperand(O) != I0->getOperand(O);
2314 });
2315 if (!NeedPHI) {
2316 NewOperands.push_back(I0->getOperand(O));
2317 continue;
2318 }
2319
2320 // Create a new PHI in the successor block and populate it.
2321 auto *Op = I0->getOperand(O);
2322 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2323 auto *PN =
2324 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2325 PN->insertBefore(BBEnd->begin());
2326 for (auto *I : Insts)
2327 PN->addIncoming(I->getOperand(O), I->getParent());
2328 NewOperands.push_back(PN);
2329 }
2330
2331 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2332 // and move it to the start of the successor block.
2333 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2334 I0->getOperandUse(O).set(NewOperands[O]);
2335
2336 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2337
2338 // Update metadata and IR flags, and merge debug locations.
2339 for (auto *I : Insts)
2340 if (I != I0) {
2341 // The debug location for the "common" instruction is the merged locations
2342 // of all the commoned instructions. We start with the original location
2343 // of the "common" instruction and iteratively merge each location in the
2344 // loop below.
2345 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2346 // However, as N-way merge for CallInst is rare, so we use simplified API
2347 // instead of using complex API for N-way merge.
2348 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2349 combineMetadataForCSE(I0, I, true);
2350 I0->andIRFlags(I);
2351 if (auto *CB = dyn_cast<CallBase>(I0)) {
2352 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2353 assert(Success && "We should not be trying to sink callbases "
2354 "with non-intersectable attributes");
2355 // For NDEBUG Compile.
2356 (void)Success;
2357 }
2358 }
2359
2360 for (User *U : make_early_inc_range(I0->users())) {
2361 // canSinkLastInstruction checked that all instructions are only used by
2362 // phi nodes in a way that allows replacing the phi node with the common
2363 // instruction.
2364 auto *PN = cast<PHINode>(U);
2365 PN->replaceAllUsesWith(I0);
2366 PN->eraseFromParent();
2367 }
2368
2369 // Finally nuke all instructions apart from the common instruction.
2370 for (auto *I : Insts) {
2371 if (I == I0)
2372 continue;
2373 // The remaining uses are debug users, replace those with the common inst.
2374 // In most (all?) cases this just introduces a use-before-def.
2375 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2376 I->replaceAllUsesWith(I0);
2377 I->eraseFromParent();
2378 }
2379}
2380
2381/// Check whether BB's predecessors end with unconditional branches. If it is
2382/// true, sink any common code from the predecessors to BB.
2384 DomTreeUpdater *DTU) {
2385 // We support two situations:
2386 // (1) all incoming arcs are unconditional
2387 // (2) there are non-unconditional incoming arcs
2388 //
2389 // (2) is very common in switch defaults and
2390 // else-if patterns;
2391 //
2392 // if (a) f(1);
2393 // else if (b) f(2);
2394 //
2395 // produces:
2396 //
2397 // [if]
2398 // / \
2399 // [f(1)] [if]
2400 // | | \
2401 // | | |
2402 // | [f(2)]|
2403 // \ | /
2404 // [ end ]
2405 //
2406 // [end] has two unconditional predecessor arcs and one conditional. The
2407 // conditional refers to the implicit empty 'else' arc. This conditional
2408 // arc can also be caused by an empty default block in a switch.
2409 //
2410 // In this case, we attempt to sink code from all *unconditional* arcs.
2411 // If we can sink instructions from these arcs (determined during the scan
2412 // phase below) we insert a common successor for all unconditional arcs and
2413 // connect that to [end], to enable sinking:
2414 //
2415 // [if]
2416 // / \
2417 // [x(1)] [if]
2418 // | | \
2419 // | | \
2420 // | [x(2)] |
2421 // \ / |
2422 // [sink.split] |
2423 // \ /
2424 // [ end ]
2425 //
2426 SmallVector<BasicBlock*,4> UnconditionalPreds;
2427 bool HaveNonUnconditionalPredecessors = false;
2428 for (auto *PredBB : predecessors(BB)) {
2429 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2430 if (PredBr && PredBr->isUnconditional())
2431 UnconditionalPreds.push_back(PredBB);
2432 else
2433 HaveNonUnconditionalPredecessors = true;
2434 }
2435 if (UnconditionalPreds.size() < 2)
2436 return false;
2437
2438 // We take a two-step approach to tail sinking. First we scan from the end of
2439 // each block upwards in lockstep. If the n'th instruction from the end of each
2440 // block can be sunk, those instructions are added to ValuesToSink and we
2441 // carry on. If we can sink an instruction but need to PHI-merge some operands
2442 // (because they're not identical in each instruction) we add these to
2443 // PHIOperands.
2444 // We prepopulate PHIOperands with the phis that already exist in BB.
2446 for (PHINode &PN : BB->phis()) {
2448 for (const Use &U : PN.incoming_values())
2449 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2450 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2451 for (BasicBlock *Pred : UnconditionalPreds)
2452 Ops.push_back(*IncomingVals[Pred]);
2453 }
2454
2455 int ScanIdx = 0;
2456 SmallPtrSet<Value*,4> InstructionsToSink;
2457 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2458 while (LRI.isValid() &&
2459 canSinkInstructions(*LRI, PHIOperands)) {
2460 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2461 << "\n");
2462 InstructionsToSink.insert_range(*LRI);
2463 ++ScanIdx;
2464 --LRI;
2465 }
2466
2467 // If no instructions can be sunk, early-return.
2468 if (ScanIdx == 0)
2469 return false;
2470
2471 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2472
2473 if (!followedByDeoptOrUnreachable) {
2474 // Check whether this is the pointer operand of a load/store.
2475 auto IsMemOperand = [](Use &U) {
2476 auto *I = cast<Instruction>(U.getUser());
2477 if (isa<LoadInst>(I))
2478 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2479 if (isa<StoreInst>(I))
2480 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2481 return false;
2482 };
2483
2484 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2485 // actually sink before encountering instruction that is unprofitable to
2486 // sink?
2487 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2488 unsigned NumPHIInsts = 0;
2489 for (Use &U : (*LRI)[0]->operands()) {
2490 auto It = PHIOperands.find(&U);
2491 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2492 return InstructionsToSink.contains(V);
2493 })) {
2494 ++NumPHIInsts;
2495 // Do not separate a load/store from the gep producing the address.
2496 // The gep can likely be folded into the load/store as an addressing
2497 // mode. Additionally, a load of a gep is easier to analyze than a
2498 // load of a phi.
2499 if (IsMemOperand(U) &&
2500 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2501 return false;
2502 // FIXME: this check is overly optimistic. We may end up not sinking
2503 // said instruction, due to the very same profitability check.
2504 // See @creating_too_many_phis in sink-common-code.ll.
2505 }
2506 }
2507 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2508 return NumPHIInsts <= 1;
2509 };
2510
2511 // We've determined that we are going to sink last ScanIdx instructions,
2512 // and recorded them in InstructionsToSink. Now, some instructions may be
2513 // unprofitable to sink. But that determination depends on the instructions
2514 // that we are going to sink.
2515
2516 // First, forward scan: find the first instruction unprofitable to sink,
2517 // recording all the ones that are profitable to sink.
2518 // FIXME: would it be better, after we detect that not all are profitable.
2519 // to either record the profitable ones, or erase the unprofitable ones?
2520 // Maybe we need to choose (at runtime) the one that will touch least
2521 // instrs?
2522 LRI.reset();
2523 int Idx = 0;
2524 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2525 while (Idx < ScanIdx) {
2526 if (!ProfitableToSinkInstruction(LRI)) {
2527 // Too many PHIs would be created.
2528 LLVM_DEBUG(
2529 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2530 break;
2531 }
2532 InstructionsProfitableToSink.insert_range(*LRI);
2533 --LRI;
2534 ++Idx;
2535 }
2536
2537 // If no instructions can be sunk, early-return.
2538 if (Idx == 0)
2539 return false;
2540
2541 // Did we determine that (only) some instructions are unprofitable to sink?
2542 if (Idx < ScanIdx) {
2543 // Okay, some instructions are unprofitable.
2544 ScanIdx = Idx;
2545 InstructionsToSink = InstructionsProfitableToSink;
2546
2547 // But, that may make other instructions unprofitable, too.
2548 // So, do a backward scan, do any earlier instructions become
2549 // unprofitable?
2550 assert(
2551 !ProfitableToSinkInstruction(LRI) &&
2552 "We already know that the last instruction is unprofitable to sink");
2553 ++LRI;
2554 --Idx;
2555 while (Idx >= 0) {
2556 // If we detect that an instruction becomes unprofitable to sink,
2557 // all earlier instructions won't be sunk either,
2558 // so preemptively keep InstructionsProfitableToSink in sync.
2559 // FIXME: is this the most performant approach?
2560 for (auto *I : *LRI)
2561 InstructionsProfitableToSink.erase(I);
2562 if (!ProfitableToSinkInstruction(LRI)) {
2563 // Everything starting with this instruction won't be sunk.
2564 ScanIdx = Idx;
2565 InstructionsToSink = InstructionsProfitableToSink;
2566 }
2567 ++LRI;
2568 --Idx;
2569 }
2570 }
2571
2572 // If no instructions can be sunk, early-return.
2573 if (ScanIdx == 0)
2574 return false;
2575 }
2576
2577 bool Changed = false;
2578
2579 if (HaveNonUnconditionalPredecessors) {
2580 if (!followedByDeoptOrUnreachable) {
2581 // It is always legal to sink common instructions from unconditional
2582 // predecessors. However, if not all predecessors are unconditional,
2583 // this transformation might be pessimizing. So as a rule of thumb,
2584 // don't do it unless we'd sink at least one non-speculatable instruction.
2585 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2586 LRI.reset();
2587 int Idx = 0;
2588 bool Profitable = false;
2589 while (Idx < ScanIdx) {
2590 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2591 Profitable = true;
2592 break;
2593 }
2594 --LRI;
2595 ++Idx;
2596 }
2597 if (!Profitable)
2598 return false;
2599 }
2600
2601 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2602 // We have a conditional edge and we're going to sink some instructions.
2603 // Insert a new block postdominating all blocks we're going to sink from.
2604 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2605 // Edges couldn't be split.
2606 return false;
2607 Changed = true;
2608 }
2609
2610 // Now that we've analyzed all potential sinking candidates, perform the
2611 // actual sink. We iteratively sink the last non-terminator of the source
2612 // blocks into their common successor unless doing so would require too
2613 // many PHI instructions to be generated (currently only one PHI is allowed
2614 // per sunk instruction).
2615 //
2616 // We can use InstructionsToSink to discount values needing PHI-merging that will
2617 // actually be sunk in a later iteration. This allows us to be more
2618 // aggressive in what we sink. This does allow a false positive where we
2619 // sink presuming a later value will also be sunk, but stop half way through
2620 // and never actually sink it which means we produce more PHIs than intended.
2621 // This is unlikely in practice though.
2622 int SinkIdx = 0;
2623 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2624 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2625 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2626 << "\n");
2627
2628 // Because we've sunk every instruction in turn, the current instruction to
2629 // sink is always at index 0.
2630 LRI.reset();
2631
2632 sinkLastInstruction(UnconditionalPreds);
2633 NumSinkCommonInstrs++;
2634 Changed = true;
2635 }
2636 if (SinkIdx != 0)
2637 ++NumSinkCommonCode;
2638 return Changed;
2639}
2640
2641namespace {
2642
2643struct CompatibleSets {
2644 using SetTy = SmallVector<InvokeInst *, 2>;
2645
2647
2648 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2649
2650 SetTy &getCompatibleSet(InvokeInst *II);
2651
2652 void insert(InvokeInst *II);
2653};
2654
2655CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2656 // Perform a linear scan over all the existing sets, see if the new `invoke`
2657 // is compatible with any particular set. Since we know that all the `invokes`
2658 // within a set are compatible, only check the first `invoke` in each set.
2659 // WARNING: at worst, this has quadratic complexity.
2660 for (CompatibleSets::SetTy &Set : Sets) {
2661 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2662 return Set;
2663 }
2664
2665 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2666 return Sets.emplace_back();
2667}
2668
2669void CompatibleSets::insert(InvokeInst *II) {
2670 getCompatibleSet(II).emplace_back(II);
2671}
2672
2673bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2674 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2675
2676 // Can we theoretically merge these `invoke`s?
2677 auto IsIllegalToMerge = [](InvokeInst *II) {
2678 return II->cannotMerge() || II->isInlineAsm();
2679 };
2680 if (any_of(Invokes, IsIllegalToMerge))
2681 return false;
2682
2683 // Either both `invoke`s must be direct,
2684 // or both `invoke`s must be indirect.
2685 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2686 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2687 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2688 if (HaveIndirectCalls) {
2689 if (!AllCallsAreIndirect)
2690 return false;
2691 } else {
2692 // All callees must be identical.
2693 Value *Callee = nullptr;
2694 for (InvokeInst *II : Invokes) {
2695 Value *CurrCallee = II->getCalledOperand();
2696 assert(CurrCallee && "There is always a called operand.");
2697 if (!Callee)
2698 Callee = CurrCallee;
2699 else if (Callee != CurrCallee)
2700 return false;
2701 }
2702 }
2703
2704 // Either both `invoke`s must not have a normal destination,
2705 // or both `invoke`s must have a normal destination,
2706 auto HasNormalDest = [](InvokeInst *II) {
2707 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2708 };
2709 if (any_of(Invokes, HasNormalDest)) {
2710 // Do not merge `invoke` that does not have a normal destination with one
2711 // that does have a normal destination, even though doing so would be legal.
2712 if (!all_of(Invokes, HasNormalDest))
2713 return false;
2714
2715 // All normal destinations must be identical.
2716 BasicBlock *NormalBB = nullptr;
2717 for (InvokeInst *II : Invokes) {
2718 BasicBlock *CurrNormalBB = II->getNormalDest();
2719 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2720 if (!NormalBB)
2721 NormalBB = CurrNormalBB;
2722 else if (NormalBB != CurrNormalBB)
2723 return false;
2724 }
2725
2726 // In the normal destination, the incoming values for these two `invoke`s
2727 // must be compatible.
2728 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2730 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2731 &EquivalenceSet))
2732 return false;
2733 }
2734
2735#ifndef NDEBUG
2736 // All unwind destinations must be identical.
2737 // We know that because we have started from said unwind destination.
2738 BasicBlock *UnwindBB = nullptr;
2739 for (InvokeInst *II : Invokes) {
2740 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2741 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2742 if (!UnwindBB)
2743 UnwindBB = CurrUnwindBB;
2744 else
2745 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2746 }
2747#endif
2748
2749 // In the unwind destination, the incoming values for these two `invoke`s
2750 // must be compatible.
2752 Invokes.front()->getUnwindDest(),
2753 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2754 return false;
2755
2756 // Ignoring arguments, these `invoke`s must be identical,
2757 // including operand bundles.
2758 const InvokeInst *II0 = Invokes.front();
2759 for (auto *II : Invokes.drop_front())
2760 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2761 return false;
2762
2763 // Can we theoretically form the data operands for the merged `invoke`?
2764 auto IsIllegalToMergeArguments = [](auto Ops) {
2765 Use &U0 = std::get<0>(Ops);
2766 Use &U1 = std::get<1>(Ops);
2767 if (U0 == U1)
2768 return false;
2770 U0.getOperandNo());
2771 };
2772 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2773 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2774 IsIllegalToMergeArguments))
2775 return false;
2776
2777 return true;
2778}
2779
2780} // namespace
2781
2782// Merge all invokes in the provided set, all of which are compatible
2783// as per the `CompatibleSets::shouldBelongToSameSet()`.
2785 DomTreeUpdater *DTU) {
2786 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2787
2789 if (DTU)
2790 Updates.reserve(2 + 3 * Invokes.size());
2791
2792 bool HasNormalDest =
2793 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2794
2795 // Clone one of the invokes into a new basic block.
2796 // Since they are all compatible, it doesn't matter which invoke is cloned.
2797 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2798 InvokeInst *II0 = Invokes.front();
2799 BasicBlock *II0BB = II0->getParent();
2800 BasicBlock *InsertBeforeBlock =
2801 II0->getParent()->getIterator()->getNextNode();
2802 Function *Func = II0BB->getParent();
2803 LLVMContext &Ctx = II0->getContext();
2804
2805 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2806 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2807
2808 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2809 // NOTE: all invokes have the same attributes, so no handling needed.
2810 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2811
2812 if (!HasNormalDest) {
2813 // This set does not have a normal destination,
2814 // so just form a new block with unreachable terminator.
2815 BasicBlock *MergedNormalDest = BasicBlock::Create(
2816 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2817 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2818 UI->setDebugLoc(DebugLoc::getTemporary());
2819 MergedInvoke->setNormalDest(MergedNormalDest);
2820 }
2821
2822 // The unwind destination, however, remainds identical for all invokes here.
2823
2824 return MergedInvoke;
2825 }();
2826
2827 if (DTU) {
2828 // Predecessor blocks that contained these invokes will now branch to
2829 // the new block that contains the merged invoke, ...
2830 for (InvokeInst *II : Invokes)
2831 Updates.push_back(
2832 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2833
2834 // ... which has the new `unreachable` block as normal destination,
2835 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2836 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2837 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2838 SuccBBOfMergedInvoke});
2839
2840 // Since predecessor blocks now unconditionally branch to a new block,
2841 // they no longer branch to their original successors.
2842 for (InvokeInst *II : Invokes)
2843 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2844 Updates.push_back(
2845 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2846 }
2847
2848 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2849
2850 // Form the merged operands for the merged invoke.
2851 for (Use &U : MergedInvoke->operands()) {
2852 // Only PHI together the indirect callees and data operands.
2853 if (MergedInvoke->isCallee(&U)) {
2854 if (!IsIndirectCall)
2855 continue;
2856 } else if (!MergedInvoke->isDataOperand(&U))
2857 continue;
2858
2859 // Don't create trivial PHI's with all-identical incoming values.
2860 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2861 return II->getOperand(U.getOperandNo()) != U.get();
2862 });
2863 if (!NeedPHI)
2864 continue;
2865
2866 // Form a PHI out of all the data ops under this index.
2868 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2869 for (InvokeInst *II : Invokes)
2870 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2871
2872 U.set(PN);
2873 }
2874
2875 // We've ensured that each PHI node has compatible (identical) incoming values
2876 // when coming from each of the `invoke`s in the current merge set,
2877 // so update the PHI nodes accordingly.
2878 for (BasicBlock *Succ : successors(MergedInvoke))
2879 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2880 /*ExistPred=*/Invokes.front()->getParent());
2881
2882 // And finally, replace the original `invoke`s with an unconditional branch
2883 // to the block with the merged `invoke`. Also, give that merged `invoke`
2884 // the merged debugloc of all the original `invoke`s.
2885 DILocation *MergedDebugLoc = nullptr;
2886 for (InvokeInst *II : Invokes) {
2887 // Compute the debug location common to all the original `invoke`s.
2888 if (!MergedDebugLoc)
2889 MergedDebugLoc = II->getDebugLoc();
2890 else
2891 MergedDebugLoc =
2892 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2893
2894 // And replace the old `invoke` with an unconditionally branch
2895 // to the block with the merged `invoke`.
2896 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2897 OrigSuccBB->removePredecessor(II->getParent());
2898 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2899 // The unconditional branch is part of the replacement for the original
2900 // invoke, so should use its DebugLoc.
2901 BI->setDebugLoc(II->getDebugLoc());
2902 bool Success = MergedInvoke->tryIntersectAttributes(II);
2903 assert(Success && "Merged invokes with incompatible attributes");
2904 // For NDEBUG Compile
2905 (void)Success;
2906 II->replaceAllUsesWith(MergedInvoke);
2907 II->eraseFromParent();
2908 ++NumInvokesMerged;
2909 }
2910 MergedInvoke->setDebugLoc(MergedDebugLoc);
2911 ++NumInvokeSetsFormed;
2912
2913 if (DTU)
2914 DTU->applyUpdates(Updates);
2915}
2916
2917/// If this block is a `landingpad` exception handling block, categorize all
2918/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2919/// being "mergeable" together, and then merge invokes in each set together.
2920///
2921/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2922/// [...] [...]
2923/// | |
2924/// [invoke0] [invoke1]
2925/// / \ / \
2926/// [cont0] [landingpad] [cont1]
2927/// to:
2928/// [...] [...]
2929/// \ /
2930/// [invoke]
2931/// / \
2932/// [cont] [landingpad]
2933///
2934/// But of course we can only do that if the invokes share the `landingpad`,
2935/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2936/// and the invoked functions are "compatible".
2939 return false;
2940
2941 bool Changed = false;
2942
2943 // FIXME: generalize to all exception handling blocks?
2944 if (!BB->isLandingPad())
2945 return Changed;
2946
2947 CompatibleSets Grouper;
2948
2949 // Record all the predecessors of this `landingpad`. As per verifier,
2950 // the only allowed predecessor is the unwind edge of an `invoke`.
2951 // We want to group "compatible" `invokes` into the same set to be merged.
2952 for (BasicBlock *PredBB : predecessors(BB))
2953 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2954
2955 // And now, merge `invoke`s that were grouped togeter.
2956 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2957 if (Invokes.size() < 2)
2958 continue;
2959 Changed = true;
2960 mergeCompatibleInvokesImpl(Invokes, DTU);
2961 }
2962
2963 return Changed;
2964}
2965
2966namespace {
2967/// Track ephemeral values, which should be ignored for cost-modelling
2968/// purposes. Requires walking instructions in reverse order.
2969class EphemeralValueTracker {
2970 SmallPtrSet<const Instruction *, 32> EphValues;
2971
2972 bool isEphemeral(const Instruction *I) {
2973 if (isa<AssumeInst>(I))
2974 return true;
2975 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2976 all_of(I->users(), [&](const User *U) {
2977 return EphValues.count(cast<Instruction>(U));
2978 });
2979 }
2980
2981public:
2982 bool track(const Instruction *I) {
2983 if (isEphemeral(I)) {
2984 EphValues.insert(I);
2985 return true;
2986 }
2987 return false;
2988 }
2989
2990 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2991};
2992} // namespace
2993
2994/// Determine if we can hoist sink a sole store instruction out of a
2995/// conditional block.
2996///
2997/// We are looking for code like the following:
2998/// BrBB:
2999/// store i32 %add, i32* %arrayidx2
3000/// ... // No other stores or function calls (we could be calling a memory
3001/// ... // function).
3002/// %cmp = icmp ult %x, %y
3003/// br i1 %cmp, label %EndBB, label %ThenBB
3004/// ThenBB:
3005/// store i32 %add5, i32* %arrayidx2
3006/// br label EndBB
3007/// EndBB:
3008/// ...
3009/// We are going to transform this into:
3010/// BrBB:
3011/// store i32 %add, i32* %arrayidx2
3012/// ... //
3013/// %cmp = icmp ult %x, %y
3014/// %add.add5 = select i1 %cmp, i32 %add, %add5
3015/// store i32 %add.add5, i32* %arrayidx2
3016/// ...
3017///
3018/// \return The pointer to the value of the previous store if the store can be
3019/// hoisted into the predecessor block. 0 otherwise.
3021 BasicBlock *StoreBB, BasicBlock *EndBB) {
3022 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3023 if (!StoreToHoist)
3024 return nullptr;
3025
3026 // Volatile or atomic.
3027 if (!StoreToHoist->isSimple())
3028 return nullptr;
3029
3030 Value *StorePtr = StoreToHoist->getPointerOperand();
3031 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3032
3033 // Look for a store to the same pointer in BrBB.
3034 unsigned MaxNumInstToLookAt = 9;
3035 // Skip pseudo probe intrinsic calls which are not really killing any memory
3036 // accesses.
3037 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3038 if (!MaxNumInstToLookAt)
3039 break;
3040 --MaxNumInstToLookAt;
3041
3042 // Could be calling an instruction that affects memory like free().
3043 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3044 return nullptr;
3045
3046 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3047 // Found the previous store to same location and type. Make sure it is
3048 // simple, to avoid introducing a spurious non-atomic write after an
3049 // atomic write.
3050 if (SI->getPointerOperand() == StorePtr &&
3051 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3052 SI->getAlign() >= StoreToHoist->getAlign())
3053 // Found the previous store, return its value operand.
3054 return SI->getValueOperand();
3055 return nullptr; // Unknown store.
3056 }
3057
3058 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3059 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3060 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3061 Value *Obj = getUnderlyingObject(StorePtr);
3062 bool ExplicitlyDereferenceableOnly;
3063 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3065 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3067 (!ExplicitlyDereferenceableOnly ||
3068 isDereferenceablePointer(StorePtr, StoreTy,
3069 LI->getDataLayout()))) {
3070 // Found a previous load, return it.
3071 return LI;
3072 }
3073 }
3074 // The load didn't work out, but we may still find a store.
3075 }
3076 }
3077
3078 return nullptr;
3079}
3080
3081/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3082/// converted to selects.
3084 BasicBlock *EndBB,
3085 unsigned &SpeculatedInstructions,
3086 InstructionCost &Cost,
3087 const TargetTransformInfo &TTI) {
3089 BB->getParent()->hasMinSize()
3092
3093 bool HaveRewritablePHIs = false;
3094 for (PHINode &PN : EndBB->phis()) {
3095 Value *OrigV = PN.getIncomingValueForBlock(BB);
3096 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3097
3098 // FIXME: Try to remove some of the duplication with
3099 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3100 if (ThenV == OrigV)
3101 continue;
3102
3103 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3104 CmpInst::makeCmpResultType(PN.getType()),
3106
3107 // Don't convert to selects if we could remove undefined behavior instead.
3108 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3110 return false;
3111
3112 HaveRewritablePHIs = true;
3113 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3114 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3115 if (!OrigCE && !ThenCE)
3116 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3117
3118 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3119 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3120 InstructionCost MaxCost =
3122 if (OrigCost + ThenCost > MaxCost)
3123 return false;
3124
3125 // Account for the cost of an unfolded ConstantExpr which could end up
3126 // getting expanded into Instructions.
3127 // FIXME: This doesn't account for how many operations are combined in the
3128 // constant expression.
3129 ++SpeculatedInstructions;
3130 if (SpeculatedInstructions > 1)
3131 return false;
3132 }
3133
3134 return HaveRewritablePHIs;
3135}
3136
3138 std::optional<bool> Invert,
3139 const TargetTransformInfo &TTI) {
3140 // If the branch is non-unpredictable, and is predicted to *not* branch to
3141 // the `then` block, then avoid speculating it.
3142 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3143 return true;
3144
3145 uint64_t TWeight, FWeight;
3146 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3147 return true;
3148
3149 if (!Invert.has_value())
3150 return false;
3151
3152 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3153 BranchProbability BIEndProb =
3154 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3155 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3156 return BIEndProb < Likely;
3157}
3158
3159/// Speculate a conditional basic block flattening the CFG.
3160///
3161/// Note that this is a very risky transform currently. Speculating
3162/// instructions like this is most often not desirable. Instead, there is an MI
3163/// pass which can do it with full awareness of the resource constraints.
3164/// However, some cases are "obvious" and we should do directly. An example of
3165/// this is speculating a single, reasonably cheap instruction.
3166///
3167/// There is only one distinct advantage to flattening the CFG at the IR level:
3168/// it makes very common but simplistic optimizations such as are common in
3169/// instcombine and the DAG combiner more powerful by removing CFG edges and
3170/// modeling their effects with easier to reason about SSA value graphs.
3171///
3172///
3173/// An illustration of this transform is turning this IR:
3174/// \code
3175/// BB:
3176/// %cmp = icmp ult %x, %y
3177/// br i1 %cmp, label %EndBB, label %ThenBB
3178/// ThenBB:
3179/// %sub = sub %x, %y
3180/// br label BB2
3181/// EndBB:
3182/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3183/// ...
3184/// \endcode
3185///
3186/// Into this IR:
3187/// \code
3188/// BB:
3189/// %cmp = icmp ult %x, %y
3190/// %sub = sub %x, %y
3191/// %cond = select i1 %cmp, 0, %sub
3192/// ...
3193/// \endcode
3194///
3195/// \returns true if the conditional block is removed.
3196bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3197 BasicBlock *ThenBB) {
3198 if (!Options.SpeculateBlocks)
3199 return false;
3200
3201 // Be conservative for now. FP select instruction can often be expensive.
3202 Value *BrCond = BI->getCondition();
3203 if (isa<FCmpInst>(BrCond))
3204 return false;
3205
3206 BasicBlock *BB = BI->getParent();
3207 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3208 InstructionCost Budget =
3210
3211 // If ThenBB is actually on the false edge of the conditional branch, remember
3212 // to swap the select operands later.
3213 bool Invert = false;
3214 if (ThenBB != BI->getSuccessor(0)) {
3215 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3216 Invert = true;
3217 }
3218 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3219
3220 if (!isProfitableToSpeculate(BI, Invert, TTI))
3221 return false;
3222
3223 // Keep a count of how many times instructions are used within ThenBB when
3224 // they are candidates for sinking into ThenBB. Specifically:
3225 // - They are defined in BB, and
3226 // - They have no side effects, and
3227 // - All of their uses are in ThenBB.
3228 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3229
3230 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3231
3232 unsigned SpeculatedInstructions = 0;
3233 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3234 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3235 Value *SpeculatedStoreValue = nullptr;
3236 StoreInst *SpeculatedStore = nullptr;
3237 EphemeralValueTracker EphTracker;
3238 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3239 // Skip pseudo probes. The consequence is we lose track of the branch
3240 // probability for ThenBB, which is fine since the optimization here takes
3241 // place regardless of the branch probability.
3242 if (isa<PseudoProbeInst>(I)) {
3243 // The probe should be deleted so that it will not be over-counted when
3244 // the samples collected on the non-conditional path are counted towards
3245 // the conditional path. We leave it for the counts inference algorithm to
3246 // figure out a proper count for an unknown probe.
3247 SpeculatedPseudoProbes.push_back(&I);
3248 continue;
3249 }
3250
3251 // Ignore ephemeral values, they will be dropped by the transform.
3252 if (EphTracker.track(&I))
3253 continue;
3254
3255 // Only speculatively execute a single instruction (not counting the
3256 // terminator) for now.
3257 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3259 SpeculatedConditionalLoadsStores.size() <
3261 // Not count load/store into cost if target supports conditional faulting
3262 // b/c it's cheap to speculate it.
3263 if (IsSafeCheapLoadStore)
3264 SpeculatedConditionalLoadsStores.push_back(&I);
3265 else
3266 ++SpeculatedInstructions;
3267
3268 if (SpeculatedInstructions > 1)
3269 return false;
3270
3271 // Don't hoist the instruction if it's unsafe or expensive.
3272 if (!IsSafeCheapLoadStore &&
3274 !(HoistCondStores && !SpeculatedStoreValue &&
3275 (SpeculatedStoreValue =
3276 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3277 return false;
3278 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3281 return false;
3282
3283 // Store the store speculation candidate.
3284 if (!SpeculatedStore && SpeculatedStoreValue)
3285 SpeculatedStore = cast<StoreInst>(&I);
3286
3287 // Do not hoist the instruction if any of its operands are defined but not
3288 // used in BB. The transformation will prevent the operand from
3289 // being sunk into the use block.
3290 for (Use &Op : I.operands()) {
3292 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3293 continue; // Not a candidate for sinking.
3294
3295 ++SinkCandidateUseCounts[OpI];
3296 }
3297 }
3298
3299 // Consider any sink candidates which are only used in ThenBB as costs for
3300 // speculation. Note, while we iterate over a DenseMap here, we are summing
3301 // and so iteration order isn't significant.
3302 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3303 if (Inst->hasNUses(Count)) {
3304 ++SpeculatedInstructions;
3305 if (SpeculatedInstructions > 1)
3306 return false;
3307 }
3308
3309 // Check that we can insert the selects and that it's not too expensive to do
3310 // so.
3311 bool Convert =
3312 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3314 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3315 SpeculatedInstructions, Cost, TTI);
3316 if (!Convert || Cost > Budget)
3317 return false;
3318
3319 // If we get here, we can hoist the instruction and if-convert.
3320 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3321
3322 Instruction *Sel = nullptr;
3323 // Insert a select of the value of the speculated store.
3324 if (SpeculatedStoreValue) {
3325 IRBuilder<NoFolder> Builder(BI);
3326 Value *OrigV = SpeculatedStore->getValueOperand();
3327 Value *TrueV = SpeculatedStore->getValueOperand();
3328 Value *FalseV = SpeculatedStoreValue;
3329 if (Invert)
3330 std::swap(TrueV, FalseV);
3331 Value *S = Builder.CreateSelect(
3332 BrCond, TrueV, FalseV, "spec.store.select", BI);
3333 Sel = cast<Instruction>(S);
3334 SpeculatedStore->setOperand(0, S);
3335 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3336 SpeculatedStore->getDebugLoc());
3337 // The value stored is still conditional, but the store itself is now
3338 // unconditonally executed, so we must be sure that any linked dbg.assign
3339 // intrinsics are tracking the new stored value (the result of the
3340 // select). If we don't, and the store were to be removed by another pass
3341 // (e.g. DSE), then we'd eventually end up emitting a location describing
3342 // the conditional value, unconditionally.
3343 //
3344 // === Before this transformation ===
3345 // pred:
3346 // store %one, %x.dest, !DIAssignID !1
3347 // dbg.assign %one, "x", ..., !1, ...
3348 // br %cond if.then
3349 //
3350 // if.then:
3351 // store %two, %x.dest, !DIAssignID !2
3352 // dbg.assign %two, "x", ..., !2, ...
3353 //
3354 // === After this transformation ===
3355 // pred:
3356 // store %one, %x.dest, !DIAssignID !1
3357 // dbg.assign %one, "x", ..., !1
3358 /// ...
3359 // %merge = select %cond, %two, %one
3360 // store %merge, %x.dest, !DIAssignID !2
3361 // dbg.assign %merge, "x", ..., !2
3362 for (DbgVariableRecord *DbgAssign :
3363 at::getDVRAssignmentMarkers(SpeculatedStore))
3364 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3365 DbgAssign->replaceVariableLocationOp(OrigV, S);
3366 }
3367
3368 // Metadata can be dependent on the condition we are hoisting above.
3369 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3370 // to avoid making it appear as if the condition is a constant, which would
3371 // be misleading while debugging.
3372 // Similarly strip attributes that maybe dependent on condition we are
3373 // hoisting above.
3374 for (auto &I : make_early_inc_range(*ThenBB)) {
3375 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3376 I.dropLocation();
3377 }
3378 I.dropUBImplyingAttrsAndMetadata();
3379
3380 // Drop ephemeral values.
3381 if (EphTracker.contains(&I)) {
3382 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3383 I.eraseFromParent();
3384 }
3385 }
3386
3387 // Hoist the instructions.
3388 // Drop DbgVariableRecords attached to these instructions.
3389 for (auto &It : *ThenBB)
3390 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3391 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3392 // equivalent).
3393 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3394 !DVR || !DVR->isDbgAssign())
3395 It.dropOneDbgRecord(&DR);
3396 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3397 std::prev(ThenBB->end()));
3398
3399 if (!SpeculatedConditionalLoadsStores.empty())
3400 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3401 Sel);
3402
3403 // Insert selects and rewrite the PHI operands.
3404 IRBuilder<NoFolder> Builder(BI);
3405 for (PHINode &PN : EndBB->phis()) {
3406 unsigned OrigI = PN.getBasicBlockIndex(BB);
3407 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3408 Value *OrigV = PN.getIncomingValue(OrigI);
3409 Value *ThenV = PN.getIncomingValue(ThenI);
3410
3411 // Skip PHIs which are trivial.
3412 if (OrigV == ThenV)
3413 continue;
3414
3415 // Create a select whose true value is the speculatively executed value and
3416 // false value is the pre-existing value. Swap them if the branch
3417 // destinations were inverted.
3418 Value *TrueV = ThenV, *FalseV = OrigV;
3419 if (Invert)
3420 std::swap(TrueV, FalseV);
3421 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3422 PN.setIncomingValue(OrigI, V);
3423 PN.setIncomingValue(ThenI, V);
3424 }
3425
3426 // Remove speculated pseudo probes.
3427 for (Instruction *I : SpeculatedPseudoProbes)
3428 I->eraseFromParent();
3429
3430 ++NumSpeculations;
3431 return true;
3432}
3433
3435
3436// Return false if number of blocks searched is too much.
3437static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3438 BlocksSet &ReachesNonLocalUses) {
3439 if (BB == DefBB)
3440 return true;
3441 if (!ReachesNonLocalUses.insert(BB).second)
3442 return true;
3443
3444 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3445 return false;
3446 for (BasicBlock *Pred : predecessors(BB))
3447 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3448 return false;
3449 return true;
3450}
3451
3452/// Return true if we can thread a branch across this block.
3454 BlocksSet &NonLocalUseBlocks) {
3455 int Size = 0;
3456 EphemeralValueTracker EphTracker;
3457
3458 // Walk the loop in reverse so that we can identify ephemeral values properly
3459 // (values only feeding assumes).
3460 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3461 // Can't fold blocks that contain noduplicate or convergent calls.
3462 if (CallInst *CI = dyn_cast<CallInst>(&I))
3463 if (CI->cannotDuplicate() || CI->isConvergent())
3464 return false;
3465
3466 // Ignore ephemeral values which are deleted during codegen.
3467 // We will delete Phis while threading, so Phis should not be accounted in
3468 // block's size.
3469 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3470 if (Size++ > MaxSmallBlockSize)
3471 return false; // Don't clone large BB's.
3472 }
3473
3474 // Record blocks with non-local uses of values defined in the current basic
3475 // block.
3476 for (User *U : I.users()) {
3478 BasicBlock *UsedInBB = UI->getParent();
3479 if (UsedInBB == BB) {
3480 if (isa<PHINode>(UI))
3481 return false;
3482 } else
3483 NonLocalUseBlocks.insert(UsedInBB);
3484 }
3485
3486 // Looks ok, continue checking.
3487 }
3488
3489 return true;
3490}
3491
3493 BasicBlock *To) {
3494 // Don't look past the block defining the value, we might get the value from
3495 // a previous loop iteration.
3496 auto *I = dyn_cast<Instruction>(V);
3497 if (I && I->getParent() == To)
3498 return nullptr;
3499
3500 // We know the value if the From block branches on it.
3501 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3502 if (BI && BI->isConditional() && BI->getCondition() == V &&
3503 BI->getSuccessor(0) != BI->getSuccessor(1))
3504 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3506
3507 return nullptr;
3508}
3509
3510/// If we have a conditional branch on something for which we know the constant
3511/// value in predecessors (e.g. a phi node in the current block), thread edges
3512/// from the predecessor to their ultimate destination.
3513static std::optional<bool>
3515 const DataLayout &DL,
3516 AssumptionCache *AC) {
3518 BasicBlock *BB = BI->getParent();
3519 Value *Cond = BI->getCondition();
3521 if (PN && PN->getParent() == BB) {
3522 // Degenerate case of a single entry PHI.
3523 if (PN->getNumIncomingValues() == 1) {
3525 return true;
3526 }
3527
3528 for (Use &U : PN->incoming_values())
3529 if (auto *CB = dyn_cast<ConstantInt>(U))
3530 KnownValues[CB].insert(PN->getIncomingBlock(U));
3531 } else {
3532 for (BasicBlock *Pred : predecessors(BB)) {
3533 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3534 KnownValues[CB].insert(Pred);
3535 }
3536 }
3537
3538 if (KnownValues.empty())
3539 return false;
3540
3541 // Now we know that this block has multiple preds and two succs.
3542 // Check that the block is small enough and record which non-local blocks use
3543 // values defined in the block.
3544
3545 BlocksSet NonLocalUseBlocks;
3546 BlocksSet ReachesNonLocalUseBlocks;
3547 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3548 return false;
3549
3550 // Jump-threading can only be done to destinations where no values defined
3551 // in BB are live.
3552
3553 // Quickly check if both destinations have uses. If so, jump-threading cannot
3554 // be done.
3555 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3556 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3557 return false;
3558
3559 // Search backward from NonLocalUseBlocks to find which blocks
3560 // reach non-local uses.
3561 for (BasicBlock *UseBB : NonLocalUseBlocks)
3562 // Give up if too many blocks are searched.
3563 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3564 return false;
3565
3566 for (const auto &Pair : KnownValues) {
3567 ConstantInt *CB = Pair.first;
3568 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3569 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3570
3571 // Okay, we now know that all edges from PredBB should be revectored to
3572 // branch to RealDest.
3573 if (RealDest == BB)
3574 continue; // Skip self loops.
3575
3576 // Skip if the predecessor's terminator is an indirect branch.
3577 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3578 return isa<IndirectBrInst>(PredBB->getTerminator());
3579 }))
3580 continue;
3581
3582 // Only revector to RealDest if no values defined in BB are live.
3583 if (ReachesNonLocalUseBlocks.contains(RealDest))
3584 continue;
3585
3586 LLVM_DEBUG({
3587 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3588 << " has value " << *Pair.first << " in predecessors:\n";
3589 for (const BasicBlock *PredBB : Pair.second)
3590 dbgs() << " " << PredBB->getName() << "\n";
3591 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3592 });
3593
3594 // Split the predecessors we are threading into a new edge block. We'll
3595 // clone the instructions into this block, and then redirect it to RealDest.
3596 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3597
3598 // TODO: These just exist to reduce test diff, we can drop them if we like.
3599 EdgeBB->setName(RealDest->getName() + ".critedge");
3600 EdgeBB->moveBefore(RealDest);
3601
3602 // Update PHI nodes.
3603 addPredecessorToBlock(RealDest, EdgeBB, BB);
3604
3605 // BB may have instructions that are being threaded over. Clone these
3606 // instructions into EdgeBB. We know that there will be no uses of the
3607 // cloned instructions outside of EdgeBB.
3608 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3609 ValueToValueMapTy TranslateMap; // Track translated values.
3610 TranslateMap[Cond] = CB;
3611
3612 // RemoveDIs: track instructions that we optimise away while folding, so
3613 // that we can copy DbgVariableRecords from them later.
3614 BasicBlock::iterator SrcDbgCursor = BB->begin();
3615 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3616 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3617 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3618 continue;
3619 }
3620 // Clone the instruction.
3621 Instruction *N = BBI->clone();
3622 // Insert the new instruction into its new home.
3623 N->insertInto(EdgeBB, InsertPt);
3624
3625 if (BBI->hasName())
3626 N->setName(BBI->getName() + ".c");
3627
3628 // Update operands due to translation.
3629 // Key Instructions: Remap all the atom groups.
3630 if (const DebugLoc &DL = BBI->getDebugLoc())
3631 mapAtomInstance(DL, TranslateMap);
3632 RemapInstruction(N, TranslateMap,
3634
3635 // Check for trivial simplification.
3636 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3637 if (!BBI->use_empty())
3638 TranslateMap[&*BBI] = V;
3639 if (!N->mayHaveSideEffects()) {
3640 N->eraseFromParent(); // Instruction folded away, don't need actual
3641 // inst
3642 N = nullptr;
3643 }
3644 } else {
3645 if (!BBI->use_empty())
3646 TranslateMap[&*BBI] = N;
3647 }
3648 if (N) {
3649 // Copy all debug-info attached to instructions from the last we
3650 // successfully clone, up to this instruction (they might have been
3651 // folded away).
3652 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3653 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3654 SrcDbgCursor = std::next(BBI);
3655 // Clone debug-info on this instruction too.
3656 N->cloneDebugInfoFrom(&*BBI);
3657
3658 // Register the new instruction with the assumption cache if necessary.
3659 if (auto *Assume = dyn_cast<AssumeInst>(N))
3660 if (AC)
3661 AC->registerAssumption(Assume);
3662 }
3663 }
3664
3665 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3666 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3667 InsertPt->cloneDebugInfoFrom(BI);
3668
3669 BB->removePredecessor(EdgeBB);
3670 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3671 EdgeBI->setSuccessor(0, RealDest);
3672 EdgeBI->setDebugLoc(BI->getDebugLoc());
3673
3674 if (DTU) {
3676 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3677 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3678 DTU->applyUpdates(Updates);
3679 }
3680
3681 // For simplicity, we created a separate basic block for the edge. Merge
3682 // it back into the predecessor if possible. This not only avoids
3683 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3684 // bypass the check for trivial cycles above.
3685 MergeBlockIntoPredecessor(EdgeBB, DTU);
3686
3687 // Signal repeat, simplifying any other constants.
3688 return std::nullopt;
3689 }
3690
3691 return false;
3692}
3693
3694bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3695 // Note: If BB is a loop header then there is a risk that threading introduces
3696 // a non-canonical loop by moving a back edge. So we avoid this optimization
3697 // for loop headers if NeedCanonicalLoop is set.
3698 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3699 return false;
3700
3701 std::optional<bool> Result;
3702 bool EverChanged = false;
3703 do {
3704 // Note that None means "we changed things, but recurse further."
3705 Result =
3707 EverChanged |= Result == std::nullopt || *Result;
3708 } while (Result == std::nullopt);
3709 return EverChanged;
3710}
3711
3712/// Given a BB that starts with the specified two-entry PHI node,
3713/// see if we can eliminate it.
3716 const DataLayout &DL,
3717 bool SpeculateUnpredictables) {
3718 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3719 // statement", which has a very simple dominance structure. Basically, we
3720 // are trying to find the condition that is being branched on, which
3721 // subsequently causes this merge to happen. We really want control
3722 // dependence information for this check, but simplifycfg can't keep it up
3723 // to date, and this catches most of the cases we care about anyway.
3724 BasicBlock *BB = PN->getParent();
3725
3726 BasicBlock *IfTrue, *IfFalse;
3727 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3728 if (!DomBI)
3729 return false;
3730 Value *IfCond = DomBI->getCondition();
3731 // Don't bother if the branch will be constant folded trivially.
3732 if (isa<ConstantInt>(IfCond))
3733 return false;
3734
3735 BasicBlock *DomBlock = DomBI->getParent();
3738 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3739 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3740 });
3741 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3742 "Will have either one or two blocks to speculate.");
3743
3744 // If the branch is non-unpredictable, see if we either predictably jump to
3745 // the merge bb (if we have only a single 'then' block), or if we predictably
3746 // jump to one specific 'then' block (if we have two of them).
3747 // It isn't beneficial to speculatively execute the code
3748 // from the block that we know is predictably not entered.
3749 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3750 if (!IsUnpredictable) {
3751 uint64_t TWeight, FWeight;
3752 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3753 (TWeight + FWeight) != 0) {
3754 BranchProbability BITrueProb =
3755 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3756 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3757 BranchProbability BIFalseProb = BITrueProb.getCompl();
3758 if (IfBlocks.size() == 1) {
3759 BranchProbability BIBBProb =
3760 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3761 if (BIBBProb >= Likely)
3762 return false;
3763 } else {
3764 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3765 return false;
3766 }
3767 }
3768 }
3769
3770 // Don't try to fold an unreachable block. For example, the phi node itself
3771 // can't be the candidate if-condition for a select that we want to form.
3772 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3773 if (IfCondPhiInst->getParent() == BB)
3774 return false;
3775
3776 // Okay, we found that we can merge this two-entry phi node into a select.
3777 // Doing so would require us to fold *all* two entry phi nodes in this block.
3778 // At some point this becomes non-profitable (particularly if the target
3779 // doesn't support cmov's). Only do this transformation if there are two or
3780 // fewer PHI nodes in this block.
3781 unsigned NumPhis = 0;
3782 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3783 if (NumPhis > 2)
3784 return false;
3785
3786 // Loop over the PHI's seeing if we can promote them all to select
3787 // instructions. While we are at it, keep track of the instructions
3788 // that need to be moved to the dominating block.
3789 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3790 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3791 InstructionCost Cost = 0;
3792 InstructionCost Budget =
3794 if (SpeculateUnpredictables && IsUnpredictable)
3795 Budget += TTI.getBranchMispredictPenalty();
3796
3797 bool Changed = false;
3798 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3799 PHINode *PN = cast<PHINode>(II++);
3800 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3801 PN->replaceAllUsesWith(V);
3802 PN->eraseFromParent();
3803 Changed = true;
3804 continue;
3805 }
3806
3807 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3808 AggressiveInsts, Cost, Budget, TTI, AC,
3809 ZeroCostInstructions) ||
3810 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3811 AggressiveInsts, Cost, Budget, TTI, AC,
3812 ZeroCostInstructions))
3813 return Changed;
3814 }
3815
3816 // If we folded the first phi, PN dangles at this point. Refresh it. If
3817 // we ran out of PHIs then we simplified them all.
3818 PN = dyn_cast<PHINode>(BB->begin());
3819 if (!PN)
3820 return true;
3821
3822 // Return true if at least one of these is a 'not', and another is either
3823 // a 'not' too, or a constant.
3824 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3825 if (!match(V0, m_Not(m_Value())))
3826 std::swap(V0, V1);
3827 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3828 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3829 };
3830
3831 // Don't fold i1 branches on PHIs which contain binary operators or
3832 // (possibly inverted) select form of or/ands, unless one of
3833 // the incoming values is an 'not' and another one is freely invertible.
3834 // These can often be turned into switches and other things.
3835 auto IsBinOpOrAnd = [](Value *V) {
3836 return match(
3838 };
3839 if (PN->getType()->isIntegerTy(1) &&
3840 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3841 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3842 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3843 PN->getIncomingValue(1)))
3844 return Changed;
3845
3846 // If all PHI nodes are promotable, check to make sure that all instructions
3847 // in the predecessor blocks can be promoted as well. If not, we won't be able
3848 // to get rid of the control flow, so it's not worth promoting to select
3849 // instructions.
3850 for (BasicBlock *IfBlock : IfBlocks)
3851 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3852 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3853 // This is not an aggressive instruction that we can promote.
3854 // Because of this, we won't be able to get rid of the control flow, so
3855 // the xform is not worth it.
3856 return Changed;
3857 }
3858
3859 // If either of the blocks has it's address taken, we can't do this fold.
3860 if (any_of(IfBlocks,
3861 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3862 return Changed;
3863
3864 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3865 if (IsUnpredictable) dbgs() << " (unpredictable)";
3866 dbgs() << " T: " << IfTrue->getName()
3867 << " F: " << IfFalse->getName() << "\n");
3868
3869 // If we can still promote the PHI nodes after this gauntlet of tests,
3870 // do all of the PHI's now.
3871
3872 // Move all 'aggressive' instructions, which are defined in the
3873 // conditional parts of the if's up to the dominating block.
3874 for (BasicBlock *IfBlock : IfBlocks)
3875 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3876
3877 IRBuilder<NoFolder> Builder(DomBI);
3878 // Propagate fast-math-flags from phi nodes to replacement selects.
3879 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3880 // Change the PHI node into a select instruction.
3881 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3882 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3883
3884 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3885 isa<FPMathOperator>(PN) ? PN : nullptr,
3886 "", DomBI);
3887 PN->replaceAllUsesWith(Sel);
3888 Sel->takeName(PN);
3889 PN->eraseFromParent();
3890 }
3891
3892 // At this point, all IfBlocks are empty, so our if statement
3893 // has been flattened. Change DomBlock to jump directly to our new block to
3894 // avoid other simplifycfg's kicking in on the diamond.
3895 Builder.CreateBr(BB);
3896
3898 if (DTU) {
3899 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3900 for (auto *Successor : successors(DomBlock))
3901 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3902 }
3903
3904 DomBI->eraseFromParent();
3905 if (DTU)
3906 DTU->applyUpdates(Updates);
3907
3908 return true;
3909}
3910
3913 Value *RHS, const Twine &Name = "") {
3914 // Try to relax logical op to binary op.
3915 if (impliesPoison(RHS, LHS))
3916 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3917 if (Opc == Instruction::And)
3918 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3919 if (Opc == Instruction::Or)
3920 return Builder.CreateLogicalOr(LHS, RHS, Name);
3921 llvm_unreachable("Invalid logical opcode");
3922}
3923
3924/// Return true if either PBI or BI has branch weight available, and store
3925/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3926/// not have branch weight, use 1:1 as its weight.
3928 uint64_t &PredTrueWeight,
3929 uint64_t &PredFalseWeight,
3930 uint64_t &SuccTrueWeight,
3931 uint64_t &SuccFalseWeight) {
3932 bool PredHasWeights =
3933 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3934 bool SuccHasWeights =
3935 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3936 if (PredHasWeights || SuccHasWeights) {
3937 if (!PredHasWeights)
3938 PredTrueWeight = PredFalseWeight = 1;
3939 if (!SuccHasWeights)
3940 SuccTrueWeight = SuccFalseWeight = 1;
3941 return true;
3942 } else {
3943 return false;
3944 }
3945}
3946
3947/// Determine if the two branches share a common destination and deduce a glue
3948/// that joins the branches' conditions to arrive at the common destination if
3949/// that would be profitable.
3950static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3952 const TargetTransformInfo *TTI) {
3953 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3954 "Both blocks must end with a conditional branches.");
3956 "PredBB must be a predecessor of BB.");
3957
3958 // We have the potential to fold the conditions together, but if the
3959 // predecessor branch is predictable, we may not want to merge them.
3960 uint64_t PTWeight, PFWeight;
3961 BranchProbability PBITrueProb, Likely;
3962 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3963 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3964 (PTWeight + PFWeight) != 0) {
3965 PBITrueProb =
3966 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3967 Likely = TTI->getPredictableBranchThreshold();
3968 }
3969
3970 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3971 // Speculate the 2nd condition unless the 1st is probably true.
3972 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3973 return {{BI->getSuccessor(0), Instruction::Or, false}};
3974 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3975 // Speculate the 2nd condition unless the 1st is probably false.
3976 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3977 return {{BI->getSuccessor(1), Instruction::And, false}};
3978 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3979 // Speculate the 2nd condition unless the 1st is probably true.
3980 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3981 return {{BI->getSuccessor(1), Instruction::And, true}};
3982 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3983 // Speculate the 2nd condition unless the 1st is probably false.
3984 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3985 return {{BI->getSuccessor(0), Instruction::Or, true}};
3986 }
3987 return std::nullopt;
3988}
3989
3991 DomTreeUpdater *DTU,
3992 MemorySSAUpdater *MSSAU,
3993 const TargetTransformInfo *TTI) {
3994 BasicBlock *BB = BI->getParent();
3995 BasicBlock *PredBlock = PBI->getParent();
3996
3997 // Determine if the two branches share a common destination.
3998 BasicBlock *CommonSucc;
4000 bool InvertPredCond;
4001 std::tie(CommonSucc, Opc, InvertPredCond) =
4003
4004 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4005
4006 IRBuilder<> Builder(PBI);
4007 // The builder is used to create instructions to eliminate the branch in BB.
4008 // If BB's terminator has !annotation metadata, add it to the new
4009 // instructions.
4010 Builder.CollectMetadataToCopy(BB->getTerminator(),
4011 {LLVMContext::MD_annotation});
4012
4013 // If we need to invert the condition in the pred block to match, do so now.
4014 if (InvertPredCond) {
4015 InvertBranch(PBI, Builder);
4016 }
4017
4018 BasicBlock *UniqueSucc =
4019 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4020
4021 // Before cloning instructions, notify the successor basic block that it
4022 // is about to have a new predecessor. This will update PHI nodes,
4023 // which will allow us to update live-out uses of bonus instructions.
4024 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4025
4026 // Try to update branch weights.
4027 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4028 SmallVector<uint64_t, 2> MDWeights;
4029 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4030 SuccTrueWeight, SuccFalseWeight)) {
4031
4032 if (PBI->getSuccessor(0) == BB) {
4033 // PBI: br i1 %x, BB, FalseDest
4034 // BI: br i1 %y, UniqueSucc, FalseDest
4035 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4036 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4037 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4038 // TrueWeight for PBI * FalseWeight for BI.
4039 // We assume that total weights of a BranchInst can fit into 32 bits.
4040 // Therefore, we will not have overflow using 64-bit arithmetic.
4041 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4042 PredTrueWeight * SuccFalseWeight);
4043 } else {
4044 // PBI: br i1 %x, TrueDest, BB
4045 // BI: br i1 %y, TrueDest, UniqueSucc
4046 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4047 // FalseWeight for PBI * TrueWeight for BI.
4048 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4049 PredFalseWeight * SuccTrueWeight);
4050 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4051 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4052 }
4053
4054 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4055 /*ElideAllZero=*/true);
4056
4057 // TODO: If BB is reachable from all paths through PredBlock, then we
4058 // could replace PBI's branch probabilities with BI's.
4059 } else
4060 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4061
4062 // Now, update the CFG.
4063 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4064
4065 if (DTU)
4066 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4067 {DominatorTree::Delete, PredBlock, BB}});
4068
4069 // If BI was a loop latch, it may have had associated loop metadata.
4070 // We need to copy it to the new latch, that is, PBI.
4071 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4072 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4073
4074 ValueToValueMapTy VMap; // maps original values to cloned values
4076
4077 Module *M = BB->getModule();
4078
4079 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4080 for (DbgVariableRecord &DVR :
4082 RemapDbgRecord(M, &DVR, VMap,
4084 }
4085
4086 // Now that the Cond was cloned into the predecessor basic block,
4087 // or/and the two conditions together.
4088 Value *BICond = VMap[BI->getCondition()];
4089 PBI->setCondition(
4090 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4092 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4093 if (!MDWeights.empty()) {
4094 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4095 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4096 /*IsExpected=*/false, /*ElideAllZero=*/true);
4097 }
4098
4099 ++NumFoldBranchToCommonDest;
4100 return true;
4101}
4102
4103/// Return if an instruction's type or any of its operands' types are a vector
4104/// type.
4105static bool isVectorOp(Instruction &I) {
4106 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4107 return U->getType()->isVectorTy();
4108 });
4109}
4110
4111/// If this basic block is simple enough, and if a predecessor branches to us
4112/// and one of our successors, fold the block into the predecessor and use
4113/// logical operations to pick the right destination.
4115 MemorySSAUpdater *MSSAU,
4116 const TargetTransformInfo *TTI,
4117 unsigned BonusInstThreshold) {
4118 // If this block ends with an unconditional branch,
4119 // let speculativelyExecuteBB() deal with it.
4120 if (!BI->isConditional())
4121 return false;
4122
4123 BasicBlock *BB = BI->getParent();
4127
4129
4131 Cond->getParent() != BB || !Cond->hasOneUse())
4132 return false;
4133
4134 // Finally, don't infinitely unroll conditional loops.
4135 if (is_contained(successors(BB), BB))
4136 return false;
4137
4138 // With which predecessors will we want to deal with?
4140 for (BasicBlock *PredBlock : predecessors(BB)) {
4141 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4142
4143 // Check that we have two conditional branches. If there is a PHI node in
4144 // the common successor, verify that the same value flows in from both
4145 // blocks.
4146 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4147 continue;
4148
4149 // Determine if the two branches share a common destination.
4150 BasicBlock *CommonSucc;
4152 bool InvertPredCond;
4153 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4154 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4155 else
4156 continue;
4157
4158 // Check the cost of inserting the necessary logic before performing the
4159 // transformation.
4160 if (TTI) {
4161 Type *Ty = BI->getCondition()->getType();
4162 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4163 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4164 !isa<CmpInst>(PBI->getCondition())))
4165 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4166
4168 continue;
4169 }
4170
4171 // Ok, we do want to deal with this predecessor. Record it.
4172 Preds.emplace_back(PredBlock);
4173 }
4174
4175 // If there aren't any predecessors into which we can fold,
4176 // don't bother checking the cost.
4177 if (Preds.empty())
4178 return false;
4179
4180 // Only allow this transformation if computing the condition doesn't involve
4181 // too many instructions and these involved instructions can be executed
4182 // unconditionally. We denote all involved instructions except the condition
4183 // as "bonus instructions", and only allow this transformation when the
4184 // number of the bonus instructions we'll need to create when cloning into
4185 // each predecessor does not exceed a certain threshold.
4186 unsigned NumBonusInsts = 0;
4187 bool SawVectorOp = false;
4188 const unsigned PredCount = Preds.size();
4189 for (Instruction &I : *BB) {
4190 // Don't check the branch condition comparison itself.
4191 if (&I == Cond)
4192 continue;
4193 // Ignore the terminator.
4194 if (isa<BranchInst>(I))
4195 continue;
4196 // I must be safe to execute unconditionally.
4198 return false;
4199 SawVectorOp |= isVectorOp(I);
4200
4201 // Account for the cost of duplicating this instruction into each
4202 // predecessor. Ignore free instructions.
4203 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4205 NumBonusInsts += PredCount;
4206
4207 // Early exits once we reach the limit.
4208 if (NumBonusInsts >
4209 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4210 return false;
4211 }
4212
4213 auto IsBCSSAUse = [BB, &I](Use &U) {
4214 auto *UI = cast<Instruction>(U.getUser());
4215 if (auto *PN = dyn_cast<PHINode>(UI))
4216 return PN->getIncomingBlock(U) == BB;
4217 return UI->getParent() == BB && I.comesBefore(UI);
4218 };
4219
4220 // Does this instruction require rewriting of uses?
4221 if (!all_of(I.uses(), IsBCSSAUse))
4222 return false;
4223 }
4224 if (NumBonusInsts >
4225 BonusInstThreshold *
4226 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4227 return false;
4228
4229 // Ok, we have the budget. Perform the transformation.
4230 for (BasicBlock *PredBlock : Preds) {
4231 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4232 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4233 }
4234 return false;
4235}
4236
4237// If there is only one store in BB1 and BB2, return it, otherwise return
4238// nullptr.
4240 StoreInst *S = nullptr;
4241 for (auto *BB : {BB1, BB2}) {
4242 if (!BB)
4243 continue;
4244 for (auto &I : *BB)
4245 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4246 if (S)
4247 // Multiple stores seen.
4248 return nullptr;
4249 else
4250 S = SI;
4251 }
4252 }
4253 return S;
4254}
4255
4257 Value *AlternativeV = nullptr) {
4258 // PHI is going to be a PHI node that allows the value V that is defined in
4259 // BB to be referenced in BB's only successor.
4260 //
4261 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4262 // doesn't matter to us what the other operand is (it'll never get used). We
4263 // could just create a new PHI with an undef incoming value, but that could
4264 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4265 // other PHI. So here we directly look for some PHI in BB's successor with V
4266 // as an incoming operand. If we find one, we use it, else we create a new
4267 // one.
4268 //
4269 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4270 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4271 // where OtherBB is the single other predecessor of BB's only successor.
4272 PHINode *PHI = nullptr;
4273 BasicBlock *Succ = BB->getSingleSuccessor();
4274
4275 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4276 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4277 PHI = cast<PHINode>(I);
4278 if (!AlternativeV)
4279 break;
4280
4281 assert(Succ->hasNPredecessors(2));
4282 auto PredI = pred_begin(Succ);
4283 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4284 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4285 break;
4286 PHI = nullptr;
4287 }
4288 if (PHI)
4289 return PHI;
4290
4291 // If V is not an instruction defined in BB, just return it.
4292 if (!AlternativeV &&
4293 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4294 return V;
4295
4296 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4297 PHI->insertBefore(Succ->begin());
4298 PHI->addIncoming(V, BB);
4299 for (BasicBlock *PredBB : predecessors(Succ))
4300 if (PredBB != BB)
4301 PHI->addIncoming(
4302 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4303 return PHI;
4304}
4305
4307 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4308 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4309 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4310 // For every pointer, there must be exactly two stores, one coming from
4311 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4312 // store (to any address) in PTB,PFB or QTB,QFB.
4313 // FIXME: We could relax this restriction with a bit more work and performance
4314 // testing.
4315 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4316 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4317 if (!PStore || !QStore)
4318 return false;
4319
4320 // Now check the stores are compatible.
4321 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4322 PStore->getValueOperand()->getType() !=
4323 QStore->getValueOperand()->getType())
4324 return false;
4325
4326 // Check that sinking the store won't cause program behavior changes. Sinking
4327 // the store out of the Q blocks won't change any behavior as we're sinking
4328 // from a block to its unconditional successor. But we're moving a store from
4329 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4330 // So we need to check that there are no aliasing loads or stores in
4331 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4332 // operations between PStore and the end of its parent block.
4333 //
4334 // The ideal way to do this is to query AliasAnalysis, but we don't
4335 // preserve AA currently so that is dangerous. Be super safe and just
4336 // check there are no other memory operations at all.
4337 for (auto &I : *QFB->getSinglePredecessor())
4338 if (I.mayReadOrWriteMemory())
4339 return false;
4340 for (auto &I : *QFB)
4341 if (&I != QStore && I.mayReadOrWriteMemory())
4342 return false;
4343 if (QTB)
4344 for (auto &I : *QTB)
4345 if (&I != QStore && I.mayReadOrWriteMemory())
4346 return false;
4347 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4348 I != E; ++I)
4349 if (&*I != PStore && I->mayReadOrWriteMemory())
4350 return false;
4351
4352 // If we're not in aggressive mode, we only optimize if we have some
4353 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4354 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4355 if (!BB)
4356 return true;
4357 // Heuristic: if the block can be if-converted/phi-folded and the
4358 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4359 // thread this store.
4360 InstructionCost Cost = 0;
4361 InstructionCost Budget =
4363 for (auto &I : BB->instructionsWithoutDebug(false)) {
4364 // Consider terminator instruction to be free.
4365 if (I.isTerminator())
4366 continue;
4367 // If this is one the stores that we want to speculate out of this BB,
4368 // then don't count it's cost, consider it to be free.
4369 if (auto *S = dyn_cast<StoreInst>(&I))
4370 if (llvm::find(FreeStores, S))
4371 continue;
4372 // Else, we have a white-list of instructions that we are ak speculating.
4374 return false; // Not in white-list - not worthwhile folding.
4375 // And finally, if this is a non-free instruction that we are okay
4376 // speculating, ensure that we consider the speculation budget.
4377 Cost +=
4378 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4379 if (Cost > Budget)
4380 return false; // Eagerly refuse to fold as soon as we're out of budget.
4381 }
4382 assert(Cost <= Budget &&
4383 "When we run out of budget we will eagerly return from within the "
4384 "per-instruction loop.");
4385 return true;
4386 };
4387
4388 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4390 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4391 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4392 return false;
4393
4394 // If PostBB has more than two predecessors, we need to split it so we can
4395 // sink the store.
4396 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4397 // We know that QFB's only successor is PostBB. And QFB has a single
4398 // predecessor. If QTB exists, then its only successor is also PostBB.
4399 // If QTB does not exist, then QFB's only predecessor has a conditional
4400 // branch to QFB and PostBB.
4401 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4402 BasicBlock *NewBB =
4403 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4404 if (!NewBB)
4405 return false;
4406 PostBB = NewBB;
4407 }
4408
4409 // OK, we're going to sink the stores to PostBB. The store has to be
4410 // conditional though, so first create the predicate.
4411 BranchInst *PBranch =
4413 BranchInst *QBranch =
4415 Value *PCond = PBranch->getCondition();
4416 Value *QCond = QBranch->getCondition();
4417
4419 PStore->getParent());
4421 QStore->getParent(), PPHI);
4422
4423 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4424 IRBuilder<> QB(PostBB, PostBBFirst);
4425 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4426
4427 InvertPCond ^= (PStore->getParent() != PTB);
4428 InvertQCond ^= (QStore->getParent() != QTB);
4429 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4430 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4431
4432 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4433
4434 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4435 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4436 /*Unreachable=*/false,
4437 /*BranchWeights=*/nullptr, DTU);
4438 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4440 SmallVector<uint32_t, 2> PWeights, QWeights;
4441 extractBranchWeights(*PBranch, PWeights);
4442 extractBranchWeights(*QBranch, QWeights);
4443 if (InvertPCond)
4444 std::swap(PWeights[0], PWeights[1]);
4445 if (InvertQCond)
4446 std::swap(QWeights[0], QWeights[1]);
4447 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4449 {CombinedWeights[0], CombinedWeights[1]},
4450 /*IsExpected=*/false, /*ElideAllZero=*/true);
4451 }
4452
4453 QB.SetInsertPoint(T);
4454 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4455 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4456 // Choose the minimum alignment. If we could prove both stores execute, we
4457 // could use biggest one. In this case, though, we only know that one of the
4458 // stores executes. And we don't know it's safe to take the alignment from a
4459 // store that doesn't execute.
4460 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4461
4462 QStore->eraseFromParent();
4463 PStore->eraseFromParent();
4464
4465 return true;
4466}
4467
4469 DomTreeUpdater *DTU, const DataLayout &DL,
4470 const TargetTransformInfo &TTI) {
4471 // The intention here is to find diamonds or triangles (see below) where each
4472 // conditional block contains a store to the same address. Both of these
4473 // stores are conditional, so they can't be unconditionally sunk. But it may
4474 // be profitable to speculatively sink the stores into one merged store at the
4475 // end, and predicate the merged store on the union of the two conditions of
4476 // PBI and QBI.
4477 //
4478 // This can reduce the number of stores executed if both of the conditions are
4479 // true, and can allow the blocks to become small enough to be if-converted.
4480 // This optimization will also chain, so that ladders of test-and-set
4481 // sequences can be if-converted away.
4482 //
4483 // We only deal with simple diamonds or triangles:
4484 //
4485 // PBI or PBI or a combination of the two
4486 // / \ | \
4487 // PTB PFB | PFB
4488 // \ / | /
4489 // QBI QBI
4490 // / \ | \
4491 // QTB QFB | QFB
4492 // \ / | /
4493 // PostBB PostBB
4494 //
4495 // We model triangles as a type of diamond with a nullptr "true" block.
4496 // Triangles are canonicalized so that the fallthrough edge is represented by
4497 // a true condition, as in the diagram above.
4498 BasicBlock *PTB = PBI->getSuccessor(0);
4499 BasicBlock *PFB = PBI->getSuccessor(1);
4500 BasicBlock *QTB = QBI->getSuccessor(0);
4501 BasicBlock *QFB = QBI->getSuccessor(1);
4502 BasicBlock *PostBB = QFB->getSingleSuccessor();
4503
4504 // Make sure we have a good guess for PostBB. If QTB's only successor is
4505 // QFB, then QFB is a better PostBB.
4506 if (QTB->getSingleSuccessor() == QFB)
4507 PostBB = QFB;
4508
4509 // If we couldn't find a good PostBB, stop.
4510 if (!PostBB)
4511 return false;
4512
4513 bool InvertPCond = false, InvertQCond = false;
4514 // Canonicalize fallthroughs to the true branches.
4515 if (PFB == QBI->getParent()) {
4516 std::swap(PFB, PTB);
4517 InvertPCond = true;
4518 }
4519 if (QFB == PostBB) {
4520 std::swap(QFB, QTB);
4521 InvertQCond = true;
4522 }
4523
4524 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4525 // and QFB may not. Model fallthroughs as a nullptr block.
4526 if (PTB == QBI->getParent())
4527 PTB = nullptr;
4528 if (QTB == PostBB)
4529 QTB = nullptr;
4530
4531 // Legality bailouts. We must have at least the non-fallthrough blocks and
4532 // the post-dominating block, and the non-fallthroughs must only have one
4533 // predecessor.
4534 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4535 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4536 };
4537 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4538 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4539 return false;
4540 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4541 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4542 return false;
4543 if (!QBI->getParent()->hasNUses(2))
4544 return false;
4545
4546 // OK, this is a sequence of two diamonds or triangles.
4547 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4548 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4549 for (auto *BB : {PTB, PFB}) {
4550 if (!BB)
4551 continue;
4552 for (auto &I : *BB)
4554 PStoreAddresses.insert(SI->getPointerOperand());
4555 }
4556 for (auto *BB : {QTB, QFB}) {
4557 if (!BB)
4558 continue;
4559 for (auto &I : *BB)
4561 QStoreAddresses.insert(SI->getPointerOperand());
4562 }
4563
4564 set_intersect(PStoreAddresses, QStoreAddresses);
4565 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4566 // clear what it contains.
4567 auto &CommonAddresses = PStoreAddresses;
4568
4569 bool Changed = false;
4570 for (auto *Address : CommonAddresses)
4571 Changed |=
4572 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4573 InvertPCond, InvertQCond, DTU, DL, TTI);
4574 return Changed;
4575}
4576
4577/// If the previous block ended with a widenable branch, determine if reusing
4578/// the target block is profitable and legal. This will have the effect of
4579/// "widening" PBI, but doesn't require us to reason about hosting safety.
4581 DomTreeUpdater *DTU) {
4582 // TODO: This can be generalized in two important ways:
4583 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4584 // values from the PBI edge.
4585 // 2) We can sink side effecting instructions into BI's fallthrough
4586 // successor provided they doesn't contribute to computation of
4587 // BI's condition.
4588 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4589 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4590 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4591 !BI->getParent()->getSinglePredecessor())
4592 return false;
4593 if (!IfFalseBB->phis().empty())
4594 return false; // TODO
4595 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4596 // may undo the transform done here.
4597 // TODO: There might be a more fine-grained solution to this.
4598 if (!llvm::succ_empty(IfFalseBB))
4599 return false;
4600 // Use lambda to lazily compute expensive condition after cheap ones.
4601 auto NoSideEffects = [](BasicBlock &BB) {
4602 return llvm::none_of(BB, [](const Instruction &I) {
4603 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4604 });
4605 };
4606 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4607 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4608 NoSideEffects(*BI->getParent())) {
4609 auto *OldSuccessor = BI->getSuccessor(1);
4610 OldSuccessor->removePredecessor(BI->getParent());
4611 BI->setSuccessor(1, IfFalseBB);
4612 if (DTU)
4613 DTU->applyUpdates(
4614 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4615 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4616 return true;
4617 }
4618 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4619 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4620 NoSideEffects(*BI->getParent())) {
4621 auto *OldSuccessor = BI->getSuccessor(0);
4622 OldSuccessor->removePredecessor(BI->getParent());
4623 BI->setSuccessor(0, IfFalseBB);
4624 if (DTU)
4625 DTU->applyUpdates(
4626 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4627 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4628 return true;
4629 }
4630 return false;
4631}
4632
4633/// If we have a conditional branch as a predecessor of another block,
4634/// this function tries to simplify it. We know
4635/// that PBI and BI are both conditional branches, and BI is in one of the
4636/// successor blocks of PBI - PBI branches to BI.
4638 DomTreeUpdater *DTU,
4639 const DataLayout &DL,
4640 const TargetTransformInfo &TTI) {
4641 assert(PBI->isConditional() && BI->isConditional());
4642 BasicBlock *BB = BI->getParent();
4643
4644 // If this block ends with a branch instruction, and if there is a
4645 // predecessor that ends on a branch of the same condition, make
4646 // this conditional branch redundant.
4647 if (PBI->getCondition() == BI->getCondition() &&
4648 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4649 // Okay, the outcome of this conditional branch is statically
4650 // knowable. If this block had a single pred, handle specially, otherwise
4651 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4652 if (BB->getSinglePredecessor()) {
4653 // Turn this into a branch on constant.
4654 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4655 BI->setCondition(
4656 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4657 return true; // Nuke the branch on constant.
4658 }
4659 }
4660
4661 // If the previous block ended with a widenable branch, determine if reusing
4662 // the target block is profitable and legal. This will have the effect of
4663 // "widening" PBI, but doesn't require us to reason about hosting safety.
4664 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4665 return true;
4666
4667 // If both branches are conditional and both contain stores to the same
4668 // address, remove the stores from the conditionals and create a conditional
4669 // merged store at the end.
4670 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4671 return true;
4672
4673 // If this is a conditional branch in an empty block, and if any
4674 // predecessors are a conditional branch to one of our destinations,
4675 // fold the conditions into logical ops and one cond br.
4676
4677 // Ignore dbg intrinsics.
4678 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4679 return false;
4680
4681 int PBIOp, BIOp;
4682 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4683 PBIOp = 0;
4684 BIOp = 0;
4685 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4686 PBIOp = 0;
4687 BIOp = 1;
4688 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4689 PBIOp = 1;
4690 BIOp = 0;
4691 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4692 PBIOp = 1;
4693 BIOp = 1;
4694 } else {
4695 return false;
4696 }
4697
4698 // Check to make sure that the other destination of this branch
4699 // isn't BB itself. If so, this is an infinite loop that will
4700 // keep getting unwound.
4701 if (PBI->getSuccessor(PBIOp) == BB)
4702 return false;
4703
4704 // If predecessor's branch probability to BB is too low don't merge branches.
4705 SmallVector<uint32_t, 2> PredWeights;
4706 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4707 extractBranchWeights(*PBI, PredWeights) &&
4708 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4709
4711 PredWeights[PBIOp],
4712 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4713
4714 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4715 if (CommonDestProb >= Likely)
4716 return false;
4717 }
4718
4719 // Do not perform this transformation if it would require
4720 // insertion of a large number of select instructions. For targets
4721 // without predication/cmovs, this is a big pessimization.
4722
4723 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4724 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4725 unsigned NumPhis = 0;
4726 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4727 ++II, ++NumPhis) {
4728 if (NumPhis > 2) // Disable this xform.
4729 return false;
4730 }
4731
4732 // Finally, if everything is ok, fold the branches to logical ops.
4733 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4734
4735 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4736 << "AND: " << *BI->getParent());
4737
4739
4740 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4741 // branch in it, where one edge (OtherDest) goes back to itself but the other
4742 // exits. We don't *know* that the program avoids the infinite loop
4743 // (even though that seems likely). If we do this xform naively, we'll end up
4744 // recursively unpeeling the loop. Since we know that (after the xform is
4745 // done) that the block *is* infinite if reached, we just make it an obviously
4746 // infinite loop with no cond branch.
4747 if (OtherDest == BB) {
4748 // Insert it at the end of the function, because it's either code,
4749 // or it won't matter if it's hot. :)
4750 BasicBlock *InfLoopBlock =
4751 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4752 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4753 if (DTU)
4754 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4755 OtherDest = InfLoopBlock;
4756 }
4757
4758 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4759
4760 // BI may have other predecessors. Because of this, we leave
4761 // it alone, but modify PBI.
4762
4763 // Make sure we get to CommonDest on True&True directions.
4764 Value *PBICond = PBI->getCondition();
4765 IRBuilder<NoFolder> Builder(PBI);
4766 if (PBIOp)
4767 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4768
4769 Value *BICond = BI->getCondition();
4770 if (BIOp)
4771 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4772
4773 // Merge the conditions.
4774 Value *Cond =
4775 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4776
4777 // Modify PBI to branch on the new condition to the new dests.
4778 PBI->setCondition(Cond);
4779 PBI->setSuccessor(0, CommonDest);
4780 PBI->setSuccessor(1, OtherDest);
4781
4782 if (DTU) {
4783 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4784 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4785
4786 DTU->applyUpdates(Updates);
4787 }
4788
4789 // Update branch weight for PBI.
4790 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4791 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4792 bool HasWeights =
4793 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4794 SuccTrueWeight, SuccFalseWeight);
4795 if (HasWeights) {
4796 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4797 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4798 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4799 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4800 // The weight to CommonDest should be PredCommon * SuccTotal +
4801 // PredOther * SuccCommon.
4802 // The weight to OtherDest should be PredOther * SuccOther.
4803 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4804 PredOther * SuccCommon,
4805 PredOther * SuccOther};
4806
4807 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4808 /*ElideAllZero=*/true);
4809 // Cond may be a select instruction with the first operand set to "true", or
4810 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4812 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4813 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4814 // The select is predicated on PBICond
4816 // The corresponding probabilities are what was referred to above as
4817 // PredCommon and PredOther.
4818 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4819 /*IsExpected=*/false, /*ElideAllZero=*/true);
4820 }
4821 }
4822
4823 // OtherDest may have phi nodes. If so, add an entry from PBI's
4824 // block that are identical to the entries for BI's block.
4825 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4826
4827 // We know that the CommonDest already had an edge from PBI to
4828 // it. If it has PHIs though, the PHIs may have different
4829 // entries for BB and PBI's BB. If so, insert a select to make
4830 // them agree.
4831 for (PHINode &PN : CommonDest->phis()) {
4832 Value *BIV = PN.getIncomingValueForBlock(BB);
4833 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4834 Value *PBIV = PN.getIncomingValue(PBBIdx);
4835 if (BIV != PBIV) {
4836 // Insert a select in PBI to pick the right value.
4838 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4839 PN.setIncomingValue(PBBIdx, NV);
4840 // The select has the same condition as PBI, in the same BB. The
4841 // probabilities don't change.
4842 if (HasWeights) {
4843 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4844 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4845 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4846 /*IsExpected=*/false, /*ElideAllZero=*/true);
4847 }
4848 }
4849 }
4850
4851 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4852 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4853
4854 // This basic block is probably dead. We know it has at least
4855 // one fewer predecessor.
4856 return true;
4857}
4858
4859// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4860// true or to FalseBB if Cond is false.
4861// Takes care of updating the successors and removing the old terminator.
4862// Also makes sure not to introduce new successors by assuming that edges to
4863// non-successor TrueBBs and FalseBBs aren't reachable.
4864bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4865 Value *Cond, BasicBlock *TrueBB,
4866 BasicBlock *FalseBB,
4867 uint32_t TrueWeight,
4868 uint32_t FalseWeight) {
4869 auto *BB = OldTerm->getParent();
4870 // Remove any superfluous successor edges from the CFG.
4871 // First, figure out which successors to preserve.
4872 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4873 // successor.
4874 BasicBlock *KeepEdge1 = TrueBB;
4875 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4876
4877 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4878
4879 // Then remove the rest.
4880 for (BasicBlock *Succ : successors(OldTerm)) {
4881 // Make sure only to keep exactly one copy of each edge.
4882 if (Succ == KeepEdge1)
4883 KeepEdge1 = nullptr;
4884 else if (Succ == KeepEdge2)
4885 KeepEdge2 = nullptr;
4886 else {
4887 Succ->removePredecessor(BB,
4888 /*KeepOneInputPHIs=*/true);
4889
4890 if (Succ != TrueBB && Succ != FalseBB)
4891 RemovedSuccessors.insert(Succ);
4892 }
4893 }
4894
4895 IRBuilder<> Builder(OldTerm);
4896 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4897
4898 // Insert an appropriate new terminator.
4899 if (!KeepEdge1 && !KeepEdge2) {
4900 if (TrueBB == FalseBB) {
4901 // We were only looking for one successor, and it was present.
4902 // Create an unconditional branch to it.
4903 Builder.CreateBr(TrueBB);
4904 } else {
4905 // We found both of the successors we were looking for.
4906 // Create a conditional branch sharing the condition of the select.
4907 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4908 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4909 /*IsExpected=*/false, /*ElideAllZero=*/true);
4910 }
4911 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4912 // Neither of the selected blocks were successors, so this
4913 // terminator must be unreachable.
4914 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4915 } else {
4916 // One of the selected values was a successor, but the other wasn't.
4917 // Insert an unconditional branch to the one that was found;
4918 // the edge to the one that wasn't must be unreachable.
4919 if (!KeepEdge1) {
4920 // Only TrueBB was found.
4921 Builder.CreateBr(TrueBB);
4922 } else {
4923 // Only FalseBB was found.
4924 Builder.CreateBr(FalseBB);
4925 }
4926 }
4927
4929
4930 if (DTU) {
4931 SmallVector<DominatorTree::UpdateType, 2> Updates;
4932 Updates.reserve(RemovedSuccessors.size());
4933 for (auto *RemovedSuccessor : RemovedSuccessors)
4934 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4935 DTU->applyUpdates(Updates);
4936 }
4937
4938 return true;
4939}
4940
4941// Replaces
4942// (switch (select cond, X, Y)) on constant X, Y
4943// with a branch - conditional if X and Y lead to distinct BBs,
4944// unconditional otherwise.
4945bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4946 SelectInst *Select) {
4947 // Check for constant integer values in the select.
4948 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4949 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4950 if (!TrueVal || !FalseVal)
4951 return false;
4952
4953 // Find the relevant condition and destinations.
4954 Value *Condition = Select->getCondition();
4955 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4956 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4957
4958 // Get weight for TrueBB and FalseBB.
4959 uint32_t TrueWeight = 0, FalseWeight = 0;
4960 SmallVector<uint64_t, 8> Weights;
4961 bool HasWeights = hasBranchWeightMD(*SI);
4962 if (HasWeights) {
4963 getBranchWeights(SI, Weights);
4964 if (Weights.size() == 1 + SI->getNumCases()) {
4965 TrueWeight =
4966 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4967 FalseWeight =
4968 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4969 }
4970 }
4971
4972 // Perform the actual simplification.
4973 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4974 FalseWeight);
4975}
4976
4977// Replaces
4978// (indirectbr (select cond, blockaddress(@fn, BlockA),
4979// blockaddress(@fn, BlockB)))
4980// with
4981// (br cond, BlockA, BlockB).
4982bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4983 SelectInst *SI) {
4984 // Check that both operands of the select are block addresses.
4985 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4986 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4987 if (!TBA || !FBA)
4988 return false;
4989
4990 // Extract the actual blocks.
4991 BasicBlock *TrueBB = TBA->getBasicBlock();
4992 BasicBlock *FalseBB = FBA->getBasicBlock();
4993
4994 // The select's profile becomes the profile of the conditional branch that
4995 // replaces the indirect branch.
4996 SmallVector<uint32_t> SelectBranchWeights(2);
4998 extractBranchWeights(*SI, SelectBranchWeights);
4999 // Perform the actual simplification.
5000 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5001 SelectBranchWeights[0],
5002 SelectBranchWeights[1]);
5003}
5004
5005/// This is called when we find an icmp instruction
5006/// (a seteq/setne with a constant) as the only instruction in a
5007/// block that ends with an uncond branch. We are looking for a very specific
5008/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5009/// this case, we merge the first two "or's of icmp" into a switch, but then the
5010/// default value goes to an uncond block with a seteq in it, we get something
5011/// like:
5012///
5013/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5014/// DEFAULT:
5015/// %tmp = icmp eq i8 %A, 92
5016/// br label %end
5017/// end:
5018/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5019///
5020/// We prefer to split the edge to 'end' so that there is a true/false entry to
5021/// the PHI, merging the third icmp into the switch.
5022bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5023 ICmpInst *ICI, IRBuilder<> &Builder) {
5024 BasicBlock *BB = ICI->getParent();
5025
5026 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5027 // complex.
5028 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5029 return false;
5030
5031 Value *V = ICI->getOperand(0);
5032 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5033
5034 // The pattern we're looking for is where our only predecessor is a switch on
5035 // 'V' and this block is the default case for the switch. In this case we can
5036 // fold the compared value into the switch to simplify things.
5037 BasicBlock *Pred = BB->getSinglePredecessor();
5038 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5039 return false;
5040
5041 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5042 if (SI->getCondition() != V)
5043 return false;
5044
5045 // If BB is reachable on a non-default case, then we simply know the value of
5046 // V in this block. Substitute it and constant fold the icmp instruction
5047 // away.
5048 if (SI->getDefaultDest() != BB) {
5049 ConstantInt *VVal = SI->findCaseDest(BB);
5050 assert(VVal && "Should have a unique destination value");
5051 ICI->setOperand(0, VVal);
5052
5053 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5054 ICI->replaceAllUsesWith(V);
5055 ICI->eraseFromParent();
5056 }
5057 // BB is now empty, so it is likely to simplify away.
5058 return requestResimplify();
5059 }
5060
5061 // Ok, the block is reachable from the default dest. If the constant we're
5062 // comparing exists in one of the other edges, then we can constant fold ICI
5063 // and zap it.
5064 if (SI->findCaseValue(Cst) != SI->case_default()) {
5065 Value *V;
5066 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5068 else
5070
5071 ICI->replaceAllUsesWith(V);
5072 ICI->eraseFromParent();
5073 // BB is now empty, so it is likely to simplify away.
5074 return requestResimplify();
5075 }
5076
5077 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5078 // the block.
5079 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5080 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5081 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5083 return false;
5084
5085 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5086 // true in the PHI.
5087 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5088 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5089
5090 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5091 std::swap(DefaultCst, NewCst);
5092
5093 // Replace ICI (which is used by the PHI for the default value) with true or
5094 // false depending on if it is EQ or NE.
5095 ICI->replaceAllUsesWith(DefaultCst);
5096 ICI->eraseFromParent();
5097
5098 SmallVector<DominatorTree::UpdateType, 2> Updates;
5099
5100 // Okay, the switch goes to this block on a default value. Add an edge from
5101 // the switch to the merge point on the compared value.
5102 BasicBlock *NewBB =
5103 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5104 {
5105 SwitchInstProfUpdateWrapper SIW(*SI);
5106 auto W0 = SIW.getSuccessorWeight(0);
5108 if (W0) {
5109 NewW = ((uint64_t(*W0) + 1) >> 1);
5110 SIW.setSuccessorWeight(0, *NewW);
5111 }
5112 SIW.addCase(Cst, NewBB, NewW);
5113 if (DTU)
5114 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5115 }
5116
5117 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5118 Builder.SetInsertPoint(NewBB);
5119 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5120 Builder.CreateBr(SuccBlock);
5121 PHIUse->addIncoming(NewCst, NewBB);
5122 if (DTU) {
5123 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5124 DTU->applyUpdates(Updates);
5125 }
5126 return true;
5127}
5128
5129/// The specified branch is a conditional branch.
5130/// Check to see if it is branching on an or/and chain of icmp instructions, and
5131/// fold it into a switch instruction if so.
5132bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5133 IRBuilder<> &Builder,
5134 const DataLayout &DL) {
5136 if (!Cond)
5137 return false;
5138
5139 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5140 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5141 // 'setne's and'ed together, collect them.
5142
5143 // Try to gather values from a chain of and/or to be turned into a switch
5144 ConstantComparesGatherer ConstantCompare(Cond, DL);
5145 // Unpack the result
5146 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5147 Value *CompVal = ConstantCompare.CompValue;
5148 unsigned UsedICmps = ConstantCompare.UsedICmps;
5149 Value *ExtraCase = ConstantCompare.Extra;
5150 bool TrueWhenEqual = ConstantCompare.IsEq;
5151
5152 // If we didn't have a multiply compared value, fail.
5153 if (!CompVal)
5154 return false;
5155
5156 // Avoid turning single icmps into a switch.
5157 if (UsedICmps <= 1)
5158 return false;
5159
5160 // There might be duplicate constants in the list, which the switch
5161 // instruction can't handle, remove them now.
5162 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5163 Values.erase(llvm::unique(Values), Values.end());
5164
5165 // If Extra was used, we require at least two switch values to do the
5166 // transformation. A switch with one value is just a conditional branch.
5167 if (ExtraCase && Values.size() < 2)
5168 return false;
5169
5170 SmallVector<uint32_t> BranchWeights;
5171 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5172 extractBranchWeights(*BI, BranchWeights);
5173
5174 // Figure out which block is which destination.
5175 BasicBlock *DefaultBB = BI->getSuccessor(1);
5176 BasicBlock *EdgeBB = BI->getSuccessor(0);
5177 if (!TrueWhenEqual) {
5178 std::swap(DefaultBB, EdgeBB);
5179 if (HasProfile)
5180 std::swap(BranchWeights[0], BranchWeights[1]);
5181 }
5182
5183 BasicBlock *BB = BI->getParent();
5184
5185 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5186 << " cases into SWITCH. BB is:\n"
5187 << *BB);
5188
5189 SmallVector<DominatorTree::UpdateType, 2> Updates;
5190
5191 // If there are any extra values that couldn't be folded into the switch
5192 // then we evaluate them with an explicit branch first. Split the block
5193 // right before the condbr to handle it.
5194 if (ExtraCase) {
5195 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5196 /*MSSAU=*/nullptr, "switch.early.test");
5197
5198 // Remove the uncond branch added to the old block.
5199 Instruction *OldTI = BB->getTerminator();
5200 Builder.SetInsertPoint(OldTI);
5201
5202 // There can be an unintended UB if extra values are Poison. Before the
5203 // transformation, extra values may not be evaluated according to the
5204 // condition, and it will not raise UB. But after transformation, we are
5205 // evaluating extra values before checking the condition, and it will raise
5206 // UB. It can be solved by adding freeze instruction to extra values.
5207 AssumptionCache *AC = Options.AC;
5208
5209 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5210 ExtraCase = Builder.CreateFreeze(ExtraCase);
5211
5212 // We don't have any info about this condition.
5213 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5214 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5216 DEBUG_TYPE);
5217
5218 OldTI->eraseFromParent();
5219
5220 if (DTU)
5221 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5222
5223 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5224 // for the edge we just added.
5225 addPredecessorToBlock(EdgeBB, BB, NewBB);
5226
5227 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5228 << "\nEXTRABB = " << *BB);
5229 BB = NewBB;
5230 }
5231
5232 Builder.SetInsertPoint(BI);
5233 // Convert pointer to int before we switch.
5234 if (CompVal->getType()->isPointerTy()) {
5235 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5236 "Should not end up here with unstable pointers");
5237 CompVal = Builder.CreatePtrToInt(
5238 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5239 }
5240
5241 // Check if we can represent the values as a contiguous range. If so, we use a
5242 // range check + conditional branch instead of a switch.
5243 if (Values.front()->getValue() - Values.back()->getValue() ==
5244 Values.size() - 1) {
5245 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5246 Values.back()->getValue(), Values.front()->getValue() + 1);
5247 APInt Offset, RHS;
5248 ICmpInst::Predicate Pred;
5249 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5250 Value *X = CompVal;
5251 if (!Offset.isZero())
5252 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5253 Value *Cond =
5254 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5255 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5256 if (HasProfile)
5257 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5258 // We don't need to update PHI nodes since we don't add any new edges.
5259 } else {
5260 // Create the new switch instruction now.
5261 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5262 if (HasProfile) {
5263 // We know the weight of the default case. We don't know the weight of the
5264 // other cases, but rather than completely lose profiling info, we split
5265 // the remaining probability equally over them.
5266 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5267 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5268 // if TrueWhenEqual.
5269 for (auto &V : drop_begin(NewWeights))
5270 V = BranchWeights[0] / Values.size();
5271 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5272 }
5273
5274 // Add all of the 'cases' to the switch instruction.
5275 for (ConstantInt *Val : Values)
5276 New->addCase(Val, EdgeBB);
5277
5278 // We added edges from PI to the EdgeBB. As such, if there were any
5279 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5280 // the number of edges added.
5281 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5282 PHINode *PN = cast<PHINode>(BBI);
5283 Value *InVal = PN->getIncomingValueForBlock(BB);
5284 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5285 PN->addIncoming(InVal, BB);
5286 }
5287 }
5288
5289 // Erase the old branch instruction.
5291 if (DTU)
5292 DTU->applyUpdates(Updates);
5293
5294 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5295 return true;
5296}
5297
5298bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5299 if (isa<PHINode>(RI->getValue()))
5300 return simplifyCommonResume(RI);
5301 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5302 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5303 // The resume must unwind the exception that caused control to branch here.
5304 return simplifySingleResume(RI);
5305
5306 return false;
5307}
5308
5309// Check if cleanup block is empty
5311 for (Instruction &I : R) {
5312 auto *II = dyn_cast<IntrinsicInst>(&I);
5313 if (!II)
5314 return false;
5315
5316 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5317 switch (IntrinsicID) {
5318 case Intrinsic::dbg_declare:
5319 case Intrinsic::dbg_value:
5320 case Intrinsic::dbg_label:
5321 case Intrinsic::lifetime_end:
5322 break;
5323 default:
5324 return false;
5325 }
5326 }
5327 return true;
5328}
5329
5330// Simplify resume that is shared by several landing pads (phi of landing pad).
5331bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5332 BasicBlock *BB = RI->getParent();
5333
5334 // Check that there are no other instructions except for debug and lifetime
5335 // intrinsics between the phi's and resume instruction.
5336 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5337 BB->getTerminator()->getIterator())))
5338 return false;
5339
5340 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5341 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5342
5343 // Check incoming blocks to see if any of them are trivial.
5344 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5345 Idx++) {
5346 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5347 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5348
5349 // If the block has other successors, we can not delete it because
5350 // it has other dependents.
5351 if (IncomingBB->getUniqueSuccessor() != BB)
5352 continue;
5353
5354 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5355 // Not the landing pad that caused the control to branch here.
5356 if (IncomingValue != LandingPad)
5357 continue;
5358
5360 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5361 TrivialUnwindBlocks.insert(IncomingBB);
5362 }
5363
5364 // If no trivial unwind blocks, don't do any simplifications.
5365 if (TrivialUnwindBlocks.empty())
5366 return false;
5367
5368 // Turn all invokes that unwind here into calls.
5369 for (auto *TrivialBB : TrivialUnwindBlocks) {
5370 // Blocks that will be simplified should be removed from the phi node.
5371 // Note there could be multiple edges to the resume block, and we need
5372 // to remove them all.
5373 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5374 BB->removePredecessor(TrivialBB, true);
5375
5376 for (BasicBlock *Pred :
5378 removeUnwindEdge(Pred, DTU);
5379 ++NumInvokes;
5380 }
5381
5382 // In each SimplifyCFG run, only the current processed block can be erased.
5383 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5384 // of erasing TrivialBB, we only remove the branch to the common resume
5385 // block so that we can later erase the resume block since it has no
5386 // predecessors.
5387 TrivialBB->getTerminator()->eraseFromParent();
5388 new UnreachableInst(RI->getContext(), TrivialBB);
5389 if (DTU)
5390 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5391 }
5392
5393 // Delete the resume block if all its predecessors have been removed.
5394 if (pred_empty(BB))
5395 DeleteDeadBlock(BB, DTU);
5396
5397 return !TrivialUnwindBlocks.empty();
5398}
5399
5400// Simplify resume that is only used by a single (non-phi) landing pad.
5401bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5402 BasicBlock *BB = RI->getParent();
5403 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5404 assert(RI->getValue() == LPInst &&
5405 "Resume must unwind the exception that caused control to here");
5406
5407 // Check that there are no other instructions except for debug intrinsics.
5409 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5410 return false;
5411
5412 // Turn all invokes that unwind here into calls and delete the basic block.
5413 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5414 removeUnwindEdge(Pred, DTU);
5415 ++NumInvokes;
5416 }
5417
5418 // The landingpad is now unreachable. Zap it.
5419 DeleteDeadBlock(BB, DTU);
5420 return true;
5421}
5422
5424 // If this is a trivial cleanup pad that executes no instructions, it can be
5425 // eliminated. If the cleanup pad continues to the caller, any predecessor
5426 // that is an EH pad will be updated to continue to the caller and any
5427 // predecessor that terminates with an invoke instruction will have its invoke
5428 // instruction converted to a call instruction. If the cleanup pad being
5429 // simplified does not continue to the caller, each predecessor will be
5430 // updated to continue to the unwind destination of the cleanup pad being
5431 // simplified.
5432 BasicBlock *BB = RI->getParent();
5433 CleanupPadInst *CPInst = RI->getCleanupPad();
5434 if (CPInst->getParent() != BB)
5435 // This isn't an empty cleanup.
5436 return false;
5437
5438 // We cannot kill the pad if it has multiple uses. This typically arises
5439 // from unreachable basic blocks.
5440 if (!CPInst->hasOneUse())
5441 return false;
5442
5443 // Check that there are no other instructions except for benign intrinsics.
5445 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5446 return false;
5447
5448 // If the cleanup return we are simplifying unwinds to the caller, this will
5449 // set UnwindDest to nullptr.
5450 BasicBlock *UnwindDest = RI->getUnwindDest();
5451
5452 // We're about to remove BB from the control flow. Before we do, sink any
5453 // PHINodes into the unwind destination. Doing this before changing the
5454 // control flow avoids some potentially slow checks, since we can currently
5455 // be certain that UnwindDest and BB have no common predecessors (since they
5456 // are both EH pads).
5457 if (UnwindDest) {
5458 // First, go through the PHI nodes in UnwindDest and update any nodes that
5459 // reference the block we are removing
5460 for (PHINode &DestPN : UnwindDest->phis()) {
5461 int Idx = DestPN.getBasicBlockIndex(BB);
5462 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5463 assert(Idx != -1);
5464 // This PHI node has an incoming value that corresponds to a control
5465 // path through the cleanup pad we are removing. If the incoming
5466 // value is in the cleanup pad, it must be a PHINode (because we
5467 // verified above that the block is otherwise empty). Otherwise, the
5468 // value is either a constant or a value that dominates the cleanup
5469 // pad being removed.
5470 //
5471 // Because BB and UnwindDest are both EH pads, all of their
5472 // predecessors must unwind to these blocks, and since no instruction
5473 // can have multiple unwind destinations, there will be no overlap in
5474 // incoming blocks between SrcPN and DestPN.
5475 Value *SrcVal = DestPN.getIncomingValue(Idx);
5476 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5477
5478 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5479 for (auto *Pred : predecessors(BB)) {
5480 Value *Incoming =
5481 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5482 DestPN.addIncoming(Incoming, Pred);
5483 }
5484 }
5485
5486 // Sink any remaining PHI nodes directly into UnwindDest.
5487 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5488 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5489 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5490 // If the PHI node has no uses or all of its uses are in this basic
5491 // block (meaning they are debug or lifetime intrinsics), just leave
5492 // it. It will be erased when we erase BB below.
5493 continue;
5494
5495 // Otherwise, sink this PHI node into UnwindDest.
5496 // Any predecessors to UnwindDest which are not already represented
5497 // must be back edges which inherit the value from the path through
5498 // BB. In this case, the PHI value must reference itself.
5499 for (auto *pred : predecessors(UnwindDest))
5500 if (pred != BB)
5501 PN.addIncoming(&PN, pred);
5502 PN.moveBefore(InsertPt);
5503 // Also, add a dummy incoming value for the original BB itself,
5504 // so that the PHI is well-formed until we drop said predecessor.
5505 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5506 }
5507 }
5508
5509 std::vector<DominatorTree::UpdateType> Updates;
5510
5511 // We use make_early_inc_range here because we will remove all predecessors.
5513 if (UnwindDest == nullptr) {
5514 if (DTU) {
5515 DTU->applyUpdates(Updates);
5516 Updates.clear();
5517 }
5518 removeUnwindEdge(PredBB, DTU);
5519 ++NumInvokes;
5520 } else {
5521 BB->removePredecessor(PredBB);
5522 Instruction *TI = PredBB->getTerminator();
5523 TI->replaceUsesOfWith(BB, UnwindDest);
5524 if (DTU) {
5525 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5526 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5527 }
5528 }
5529 }
5530
5531 if (DTU)
5532 DTU->applyUpdates(Updates);
5533
5534 DeleteDeadBlock(BB, DTU);
5535
5536 return true;
5537}
5538
5539// Try to merge two cleanuppads together.
5541 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5542 // with.
5543 BasicBlock *UnwindDest = RI->getUnwindDest();
5544 if (!UnwindDest)
5545 return false;
5546
5547 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5548 // be safe to merge without code duplication.
5549 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5550 return false;
5551
5552 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5553 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5554 if (!SuccessorCleanupPad)
5555 return false;
5556
5557 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5558 // Replace any uses of the successor cleanupad with the predecessor pad
5559 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5560 // funclet bundle operands.
5561 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5562 // Remove the old cleanuppad.
5563 SuccessorCleanupPad->eraseFromParent();
5564 // Now, we simply replace the cleanupret with a branch to the unwind
5565 // destination.
5566 BranchInst::Create(UnwindDest, RI->getParent());
5567 RI->eraseFromParent();
5568
5569 return true;
5570}
5571
5572bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5573 // It is possible to transiantly have an undef cleanuppad operand because we
5574 // have deleted some, but not all, dead blocks.
5575 // Eventually, this block will be deleted.
5576 if (isa<UndefValue>(RI->getOperand(0)))
5577 return false;
5578
5579 if (mergeCleanupPad(RI))
5580 return true;
5581
5582 if (removeEmptyCleanup(RI, DTU))
5583 return true;
5584
5585 return false;
5586}
5587
5588// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5589bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5590 BasicBlock *BB = UI->getParent();
5591
5592 bool Changed = false;
5593
5594 // Ensure that any debug-info records that used to occur after the Unreachable
5595 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5596 // the block.
5598
5599 // Debug-info records on the unreachable inst itself should be deleted, as
5600 // below we delete everything past the final executable instruction.
5601 UI->dropDbgRecords();
5602
5603 // If there are any instructions immediately before the unreachable that can
5604 // be removed, do so.
5605 while (UI->getIterator() != BB->begin()) {
5607 --BBI;
5608
5610 break; // Can not drop any more instructions. We're done here.
5611 // Otherwise, this instruction can be freely erased,
5612 // even if it is not side-effect free.
5613
5614 // Note that deleting EH's here is in fact okay, although it involves a bit
5615 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5616 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5617 // and we can therefore guarantee this block will be erased.
5618
5619 // If we're deleting this, we're deleting any subsequent debug info, so
5620 // delete DbgRecords.
5621 BBI->dropDbgRecords();
5622
5623 // Delete this instruction (any uses are guaranteed to be dead)
5624 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5625 BBI->eraseFromParent();
5626 Changed = true;
5627 }
5628
5629 // If the unreachable instruction is the first in the block, take a gander
5630 // at all of the predecessors of this instruction, and simplify them.
5631 if (&BB->front() != UI)
5632 return Changed;
5633
5634 std::vector<DominatorTree::UpdateType> Updates;
5635
5636 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5637 for (BasicBlock *Predecessor : Preds) {
5638 Instruction *TI = Predecessor->getTerminator();
5639 IRBuilder<> Builder(TI);
5640 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5641 // We could either have a proper unconditional branch,
5642 // or a degenerate conditional branch with matching destinations.
5643 if (all_of(BI->successors(),
5644 [BB](auto *Successor) { return Successor == BB; })) {
5645 new UnreachableInst(TI->getContext(), TI->getIterator());
5646 TI->eraseFromParent();
5647 Changed = true;
5648 } else {
5649 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5650 Value* Cond = BI->getCondition();
5651 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5652 "The destinations are guaranteed to be different here.");
5653 CallInst *Assumption;
5654 if (BI->getSuccessor(0) == BB) {
5655 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5656 Builder.CreateBr(BI->getSuccessor(1));
5657 } else {
5658 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5659 Assumption = Builder.CreateAssumption(Cond);
5660 Builder.CreateBr(BI->getSuccessor(0));
5661 }
5662 if (Options.AC)
5663 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5664
5666 Changed = true;
5667 }
5668 if (DTU)
5669 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5670 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5671 SwitchInstProfUpdateWrapper SU(*SI);
5672 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5673 if (i->getCaseSuccessor() != BB) {
5674 ++i;
5675 continue;
5676 }
5677 BB->removePredecessor(SU->getParent());
5678 i = SU.removeCase(i);
5679 e = SU->case_end();
5680 Changed = true;
5681 }
5682 // Note that the default destination can't be removed!
5683 if (DTU && SI->getDefaultDest() != BB)
5684 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5685 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5686 if (II->getUnwindDest() == BB) {
5687 if (DTU) {
5688 DTU->applyUpdates(Updates);
5689 Updates.clear();
5690 }
5691 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5692 if (!CI->doesNotThrow())
5693 CI->setDoesNotThrow();
5694 Changed = true;
5695 }
5696 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5697 if (CSI->getUnwindDest() == BB) {
5698 if (DTU) {
5699 DTU->applyUpdates(Updates);
5700 Updates.clear();
5701 }
5702 removeUnwindEdge(TI->getParent(), DTU);
5703 Changed = true;
5704 continue;
5705 }
5706
5707 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5708 E = CSI->handler_end();
5709 I != E; ++I) {
5710 if (*I == BB) {
5711 CSI->removeHandler(I);
5712 --I;
5713 --E;
5714 Changed = true;
5715 }
5716 }
5717 if (DTU)
5718 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5719 if (CSI->getNumHandlers() == 0) {
5720 if (CSI->hasUnwindDest()) {
5721 // Redirect all predecessors of the block containing CatchSwitchInst
5722 // to instead branch to the CatchSwitchInst's unwind destination.
5723 if (DTU) {
5724 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5725 Updates.push_back({DominatorTree::Insert,
5726 PredecessorOfPredecessor,
5727 CSI->getUnwindDest()});
5728 Updates.push_back({DominatorTree::Delete,
5729 PredecessorOfPredecessor, Predecessor});
5730 }
5731 }
5732 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5733 } else {
5734 // Rewrite all preds to unwind to caller (or from invoke to call).
5735 if (DTU) {
5736 DTU->applyUpdates(Updates);
5737 Updates.clear();
5738 }
5739 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5740 for (BasicBlock *EHPred : EHPreds)
5741 removeUnwindEdge(EHPred, DTU);
5742 }
5743 // The catchswitch is no longer reachable.
5744 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5745 CSI->eraseFromParent();
5746 Changed = true;
5747 }
5748 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5749 (void)CRI;
5750 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5751 "Expected to always have an unwind to BB.");
5752 if (DTU)
5753 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5754 new UnreachableInst(TI->getContext(), TI->getIterator());
5755 TI->eraseFromParent();
5756 Changed = true;
5757 }
5758 }
5759
5760 if (DTU)
5761 DTU->applyUpdates(Updates);
5762
5763 // If this block is now dead, remove it.
5764 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5765 DeleteDeadBlock(BB, DTU);
5766 return true;
5767 }
5768
5769 return Changed;
5770}
5771
5780
5781static std::optional<ContiguousCasesResult>
5784 BasicBlock *Dest, BasicBlock *OtherDest) {
5785 assert(Cases.size() >= 1);
5786
5788 const APInt &Min = Cases.back()->getValue();
5789 const APInt &Max = Cases.front()->getValue();
5790 APInt Offset = Max - Min;
5791 size_t ContiguousOffset = Cases.size() - 1;
5792 if (Offset == ContiguousOffset) {
5793 return ContiguousCasesResult{
5794 /*Min=*/Cases.back(),
5795 /*Max=*/Cases.front(),
5796 /*Dest=*/Dest,
5797 /*OtherDest=*/OtherDest,
5798 /*Cases=*/&Cases,
5799 /*OtherCases=*/&OtherCases,
5800 };
5801 }
5802 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5803 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5804 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5805 // contiguous range for the other destination. N.B. If CR is not a full range,
5806 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5807 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5808 assert(Cases.size() >= 2);
5809 auto *It =
5810 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5811 return L->getValue() != R->getValue() + 1;
5812 });
5813 if (It == Cases.end())
5814 return std::nullopt;
5815 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5816 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5817 Cases.size() - 2) {
5818 return ContiguousCasesResult{
5819 /*Min=*/cast<ConstantInt>(
5820 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5821 /*Max=*/
5823 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5824 /*Dest=*/OtherDest,
5825 /*OtherDest=*/Dest,
5826 /*Cases=*/&OtherCases,
5827 /*OtherCases=*/&Cases,
5828 };
5829 }
5830 }
5831 return std::nullopt;
5832}
5833
5835 DomTreeUpdater *DTU,
5836 bool RemoveOrigDefaultBlock = true) {
5837 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5838 auto *BB = Switch->getParent();
5839 auto *OrigDefaultBlock = Switch->getDefaultDest();
5840 if (RemoveOrigDefaultBlock)
5841 OrigDefaultBlock->removePredecessor(BB);
5842 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5843 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5844 OrigDefaultBlock);
5845 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5847 Switch->setDefaultDest(&*NewDefaultBlock);
5848 if (DTU) {
5850 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5851 if (RemoveOrigDefaultBlock &&
5852 !is_contained(successors(BB), OrigDefaultBlock))
5853 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5854 DTU->applyUpdates(Updates);
5855 }
5856}
5857
5858/// Turn a switch into an integer range comparison and branch.
5859/// Switches with more than 2 destinations are ignored.
5860/// Switches with 1 destination are also ignored.
5861bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5862 IRBuilder<> &Builder) {
5863 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5864
5865 bool HasDefault = !SI->defaultDestUnreachable();
5866
5867 auto *BB = SI->getParent();
5868 // Partition the cases into two sets with different destinations.
5869 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5870 BasicBlock *DestB = nullptr;
5873
5874 for (auto Case : SI->cases()) {
5875 BasicBlock *Dest = Case.getCaseSuccessor();
5876 if (!DestA)
5877 DestA = Dest;
5878 if (Dest == DestA) {
5879 CasesA.push_back(Case.getCaseValue());
5880 continue;
5881 }
5882 if (!DestB)
5883 DestB = Dest;
5884 if (Dest == DestB) {
5885 CasesB.push_back(Case.getCaseValue());
5886 continue;
5887 }
5888 return false; // More than two destinations.
5889 }
5890 if (!DestB)
5891 return false; // All destinations are the same and the default is unreachable
5892
5893 assert(DestA && DestB &&
5894 "Single-destination switch should have been folded.");
5895 assert(DestA != DestB);
5896 assert(DestB != SI->getDefaultDest());
5897 assert(!CasesB.empty() && "There must be non-default cases.");
5898 assert(!CasesA.empty() || HasDefault);
5899
5900 // Figure out if one of the sets of cases form a contiguous range.
5901 std::optional<ContiguousCasesResult> ContiguousCases;
5902
5903 // Only one icmp is needed when there is only one case.
5904 if (!HasDefault && CasesA.size() == 1)
5905 ContiguousCases = ContiguousCasesResult{
5906 /*Min=*/CasesA[0],
5907 /*Max=*/CasesA[0],
5908 /*Dest=*/DestA,
5909 /*OtherDest=*/DestB,
5910 /*Cases=*/&CasesA,
5911 /*OtherCases=*/&CasesB,
5912 };
5913 else if (CasesB.size() == 1)
5914 ContiguousCases = ContiguousCasesResult{
5915 /*Min=*/CasesB[0],
5916 /*Max=*/CasesB[0],
5917 /*Dest=*/DestB,
5918 /*OtherDest=*/DestA,
5919 /*Cases=*/&CasesB,
5920 /*OtherCases=*/&CasesA,
5921 };
5922 // Correctness: Cases to the default destination cannot be contiguous cases.
5923 else if (!HasDefault)
5924 ContiguousCases =
5925 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
5926
5927 if (!ContiguousCases)
5928 ContiguousCases =
5929 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
5930
5931 if (!ContiguousCases)
5932 return false;
5933
5934 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
5935
5936 // Start building the compare and branch.
5937
5939 Constant *NumCases = ConstantInt::get(Offset->getType(),
5940 Max->getValue() - Min->getValue() + 1);
5941 BranchInst *NewBI;
5942 if (NumCases->isOneValue()) {
5943 assert(Max->getValue() == Min->getValue());
5944 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
5945 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5946 }
5947 // If NumCases overflowed, then all possible values jump to the successor.
5948 else if (NumCases->isNullValue() && !Cases->empty()) {
5949 NewBI = Builder.CreateBr(Dest);
5950 } else {
5951 Value *Sub = SI->getCondition();
5952 if (!Offset->isNullValue())
5953 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5954 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5955 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5956 }
5957
5958 // Update weight for the newly-created conditional branch.
5959 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
5960 SmallVector<uint64_t, 8> Weights;
5961 getBranchWeights(SI, Weights);
5962 if (Weights.size() == 1 + SI->getNumCases()) {
5963 uint64_t TrueWeight = 0;
5964 uint64_t FalseWeight = 0;
5965 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5966 if (SI->getSuccessor(I) == Dest)
5967 TrueWeight += Weights[I];
5968 else
5969 FalseWeight += Weights[I];
5970 }
5971 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5972 TrueWeight /= 2;
5973 FalseWeight /= 2;
5974 }
5975 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
5976 /*IsExpected=*/false, /*ElideAllZero=*/true);
5977 }
5978 }
5979
5980 // Prune obsolete incoming values off the successors' PHI nodes.
5981 for (auto &PHI : make_early_inc_range(Dest->phis())) {
5982 unsigned PreviousEdges = Cases->size();
5983 if (Dest == SI->getDefaultDest())
5984 ++PreviousEdges;
5985 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5986 PHI.removeIncomingValue(SI->getParent());
5987 }
5988 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
5989 unsigned PreviousEdges = OtherCases->size();
5990 if (OtherDest == SI->getDefaultDest())
5991 ++PreviousEdges;
5992 unsigned E = PreviousEdges - 1;
5993 // Remove all incoming values from OtherDest if OtherDest is unreachable.
5994 if (NewBI->isUnconditional())
5995 ++E;
5996 for (unsigned I = 0; I != E; ++I)
5997 PHI.removeIncomingValue(SI->getParent());
5998 }
5999
6000 // Clean up the default block - it may have phis or other instructions before
6001 // the unreachable terminator.
6002 if (!HasDefault)
6004
6005 auto *UnreachableDefault = SI->getDefaultDest();
6006
6007 // Drop the switch.
6008 SI->eraseFromParent();
6009
6010 if (!HasDefault && DTU)
6011 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6012
6013 return true;
6014}
6015
6016/// Compute masked bits for the condition of a switch
6017/// and use it to remove dead cases.
6019 AssumptionCache *AC,
6020 const DataLayout &DL) {
6021 Value *Cond = SI->getCondition();
6022 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6023
6024 // We can also eliminate cases by determining that their values are outside of
6025 // the limited range of the condition based on how many significant (non-sign)
6026 // bits are in the condition value.
6027 unsigned MaxSignificantBitsInCond =
6029
6030 // Gather dead cases.
6032 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6033 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6034 for (const auto &Case : SI->cases()) {
6035 auto *Successor = Case.getCaseSuccessor();
6036 if (DTU) {
6037 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6038 if (Inserted)
6039 UniqueSuccessors.push_back(Successor);
6040 ++It->second;
6041 }
6042 const APInt &CaseVal = Case.getCaseValue()->getValue();
6043 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6044 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
6045 DeadCases.push_back(Case.getCaseValue());
6046 if (DTU)
6047 --NumPerSuccessorCases[Successor];
6048 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6049 << " is dead.\n");
6050 }
6051 }
6052
6053 // If we can prove that the cases must cover all possible values, the
6054 // default destination becomes dead and we can remove it. If we know some
6055 // of the bits in the value, we can use that to more precisely compute the
6056 // number of possible unique case values.
6057 bool HasDefault = !SI->defaultDestUnreachable();
6058 const unsigned NumUnknownBits =
6059 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6060 assert(NumUnknownBits <= Known.getBitWidth());
6061 if (HasDefault && DeadCases.empty() &&
6062 NumUnknownBits < 64 /* avoid overflow */) {
6063 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6064 if (SI->getNumCases() == AllNumCases) {
6066 return true;
6067 }
6068 // When only one case value is missing, replace default with that case.
6069 // Eliminating the default branch will provide more opportunities for
6070 // optimization, such as lookup tables.
6071 if (SI->getNumCases() == AllNumCases - 1) {
6072 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6073 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6074 if (CondTy->getIntegerBitWidth() > 64 ||
6075 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6076 return false;
6077
6078 uint64_t MissingCaseVal = 0;
6079 for (const auto &Case : SI->cases())
6080 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6081 auto *MissingCase =
6082 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
6084 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
6085 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
6086 SIW.setSuccessorWeight(0, 0);
6087 return true;
6088 }
6089 }
6090
6091 if (DeadCases.empty())
6092 return false;
6093
6095 for (ConstantInt *DeadCase : DeadCases) {
6096 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6097 assert(CaseI != SI->case_default() &&
6098 "Case was not found. Probably mistake in DeadCases forming.");
6099 // Prune unused values from PHI nodes.
6100 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6101 SIW.removeCase(CaseI);
6102 }
6103
6104 if (DTU) {
6105 std::vector<DominatorTree::UpdateType> Updates;
6106 for (auto *Successor : UniqueSuccessors)
6107 if (NumPerSuccessorCases[Successor] == 0)
6108 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6109 DTU->applyUpdates(Updates);
6110 }
6111
6112 return true;
6113}
6114
6115/// If BB would be eligible for simplification by
6116/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6117/// by an unconditional branch), look at the phi node for BB in the successor
6118/// block and see if the incoming value is equal to CaseValue. If so, return
6119/// the phi node, and set PhiIndex to BB's index in the phi node.
6121 BasicBlock *BB, int *PhiIndex) {
6122 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6123 return nullptr; // BB must be empty to be a candidate for simplification.
6124 if (!BB->getSinglePredecessor())
6125 return nullptr; // BB must be dominated by the switch.
6126
6128 if (!Branch || !Branch->isUnconditional())
6129 return nullptr; // Terminator must be unconditional branch.
6130
6131 BasicBlock *Succ = Branch->getSuccessor(0);
6132
6133 for (PHINode &PHI : Succ->phis()) {
6134 int Idx = PHI.getBasicBlockIndex(BB);
6135 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6136
6137 Value *InValue = PHI.getIncomingValue(Idx);
6138 if (InValue != CaseValue)
6139 continue;
6140
6141 *PhiIndex = Idx;
6142 return &PHI;
6143 }
6144
6145 return nullptr;
6146}
6147
6148/// Try to forward the condition of a switch instruction to a phi node
6149/// dominated by the switch, if that would mean that some of the destination
6150/// blocks of the switch can be folded away. Return true if a change is made.
6152 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6153
6154 ForwardingNodesMap ForwardingNodes;
6155 BasicBlock *SwitchBlock = SI->getParent();
6156 bool Changed = false;
6157 for (const auto &Case : SI->cases()) {
6158 ConstantInt *CaseValue = Case.getCaseValue();
6159 BasicBlock *CaseDest = Case.getCaseSuccessor();
6160
6161 // Replace phi operands in successor blocks that are using the constant case
6162 // value rather than the switch condition variable:
6163 // switchbb:
6164 // switch i32 %x, label %default [
6165 // i32 17, label %succ
6166 // ...
6167 // succ:
6168 // %r = phi i32 ... [ 17, %switchbb ] ...
6169 // -->
6170 // %r = phi i32 ... [ %x, %switchbb ] ...
6171
6172 for (PHINode &Phi : CaseDest->phis()) {
6173 // This only works if there is exactly 1 incoming edge from the switch to
6174 // a phi. If there is >1, that means multiple cases of the switch map to 1
6175 // value in the phi, and that phi value is not the switch condition. Thus,
6176 // this transform would not make sense (the phi would be invalid because
6177 // a phi can't have different incoming values from the same block).
6178 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6179 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6180 count(Phi.blocks(), SwitchBlock) == 1) {
6181 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6182 Changed = true;
6183 }
6184 }
6185
6186 // Collect phi nodes that are indirectly using this switch's case constants.
6187 int PhiIdx;
6188 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6189 ForwardingNodes[Phi].push_back(PhiIdx);
6190 }
6191
6192 for (auto &ForwardingNode : ForwardingNodes) {
6193 PHINode *Phi = ForwardingNode.first;
6194 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6195 // Check if it helps to fold PHI.
6196 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6197 continue;
6198
6199 for (int Index : Indexes)
6200 Phi->setIncomingValue(Index, SI->getCondition());
6201 Changed = true;
6202 }
6203
6204 return Changed;
6205}
6206
6207/// Return true if the backend will be able to handle
6208/// initializing an array of constants like C.
6210 if (C->isThreadDependent())
6211 return false;
6212 if (C->isDLLImportDependent())
6213 return false;
6214
6215 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6218 return false;
6219
6221 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6222 // materializing the array of constants.
6223 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6224 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6225 return false;
6226 }
6227
6228 if (!TTI.shouldBuildLookupTablesForConstant(C))
6229 return false;
6230
6231 return true;
6232}
6233
6234/// If V is a Constant, return it. Otherwise, try to look up
6235/// its constant value in ConstantPool, returning 0 if it's not there.
6236static Constant *
6239 if (Constant *C = dyn_cast<Constant>(V))
6240 return C;
6241 return ConstantPool.lookup(V);
6242}
6243
6244/// Try to fold instruction I into a constant. This works for
6245/// simple instructions such as binary operations where both operands are
6246/// constant or can be replaced by constants from the ConstantPool. Returns the
6247/// resulting constant on success, 0 otherwise.
6248static Constant *
6252 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6253 if (!A)
6254 return nullptr;
6255 if (A->isAllOnesValue())
6256 return lookupConstant(Select->getTrueValue(), ConstantPool);
6257 if (A->isNullValue())
6258 return lookupConstant(Select->getFalseValue(), ConstantPool);
6259 return nullptr;
6260 }
6261
6263 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6264 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6265 COps.push_back(A);
6266 else
6267 return nullptr;
6268 }
6269
6270 return ConstantFoldInstOperands(I, COps, DL);
6271}
6272
6273/// Try to determine the resulting constant values in phi nodes
6274/// at the common destination basic block, *CommonDest, for one of the case
6275/// destionations CaseDest corresponding to value CaseVal (0 for the default
6276/// case), of a switch instruction SI.
6277static bool
6279 BasicBlock **CommonDest,
6280 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6281 const DataLayout &DL, const TargetTransformInfo &TTI) {
6282 // The block from which we enter the common destination.
6283 BasicBlock *Pred = SI->getParent();
6284
6285 // If CaseDest is empty except for some side-effect free instructions through
6286 // which we can constant-propagate the CaseVal, continue to its successor.
6288 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6289 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6290 if (I.isTerminator()) {
6291 // If the terminator is a simple branch, continue to the next block.
6292 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6293 return false;
6294 Pred = CaseDest;
6295 CaseDest = I.getSuccessor(0);
6296 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6297 // Instruction is side-effect free and constant.
6298
6299 // If the instruction has uses outside this block or a phi node slot for
6300 // the block, it is not safe to bypass the instruction since it would then
6301 // no longer dominate all its uses.
6302 for (auto &Use : I.uses()) {
6303 User *User = Use.getUser();
6305 if (I->getParent() == CaseDest)
6306 continue;
6307 if (PHINode *Phi = dyn_cast<PHINode>(User))
6308 if (Phi->getIncomingBlock(Use) == CaseDest)
6309 continue;
6310 return false;
6311 }
6312
6313 ConstantPool.insert(std::make_pair(&I, C));
6314 } else {
6315 break;
6316 }
6317 }
6318
6319 // If we did not have a CommonDest before, use the current one.
6320 if (!*CommonDest)
6321 *CommonDest = CaseDest;
6322 // If the destination isn't the common one, abort.
6323 if (CaseDest != *CommonDest)
6324 return false;
6325
6326 // Get the values for this case from phi nodes in the destination block.
6327 for (PHINode &PHI : (*CommonDest)->phis()) {
6328 int Idx = PHI.getBasicBlockIndex(Pred);
6329 if (Idx == -1)
6330 continue;
6331
6332 Constant *ConstVal =
6333 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6334 if (!ConstVal)
6335 return false;
6336
6337 // Be conservative about which kinds of constants we support.
6338 if (!validLookupTableConstant(ConstVal, TTI))
6339 return false;
6340
6341 Res.push_back(std::make_pair(&PHI, ConstVal));
6342 }
6343
6344 return Res.size() > 0;
6345}
6346
6347// Helper function used to add CaseVal to the list of cases that generate
6348// Result. Returns the updated number of cases that generate this result.
6349static size_t mapCaseToResult(ConstantInt *CaseVal,
6350 SwitchCaseResultVectorTy &UniqueResults,
6351 Constant *Result) {
6352 for (auto &I : UniqueResults) {
6353 if (I.first == Result) {
6354 I.second.push_back(CaseVal);
6355 return I.second.size();
6356 }
6357 }
6358 UniqueResults.push_back(
6359 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6360 return 1;
6361}
6362
6363// Helper function that initializes a map containing
6364// results for the PHI node of the common destination block for a switch
6365// instruction. Returns false if multiple PHI nodes have been found or if
6366// there is not a common destination block for the switch.
6368 BasicBlock *&CommonDest,
6369 SwitchCaseResultVectorTy &UniqueResults,
6370 Constant *&DefaultResult,
6371 const DataLayout &DL,
6372 const TargetTransformInfo &TTI,
6373 uintptr_t MaxUniqueResults) {
6374 for (const auto &I : SI->cases()) {
6375 ConstantInt *CaseVal = I.getCaseValue();
6376
6377 // Resulting value at phi nodes for this case value.
6378 SwitchCaseResultsTy Results;
6379 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6380 DL, TTI))
6381 return false;
6382
6383 // Only one value per case is permitted.
6384 if (Results.size() > 1)
6385 return false;
6386
6387 // Add the case->result mapping to UniqueResults.
6388 const size_t NumCasesForResult =
6389 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6390
6391 // Early out if there are too many cases for this result.
6392 if (NumCasesForResult > MaxSwitchCasesPerResult)
6393 return false;
6394
6395 // Early out if there are too many unique results.
6396 if (UniqueResults.size() > MaxUniqueResults)
6397 return false;
6398
6399 // Check the PHI consistency.
6400 if (!PHI)
6401 PHI = Results[0].first;
6402 else if (PHI != Results[0].first)
6403 return false;
6404 }
6405 // Find the default result value.
6407 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6408 DL, TTI);
6409 // If the default value is not found abort unless the default destination
6410 // is unreachable.
6411 DefaultResult =
6412 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6413
6414 return DefaultResult || SI->defaultDestUnreachable();
6415}
6416
6417// Helper function that checks if it is possible to transform a switch with only
6418// two cases (or two cases + default) that produces a result into a select.
6419// TODO: Handle switches with more than 2 cases that map to the same result.
6420// The branch weights correspond to the provided Condition (i.e. if Condition is
6421// modified from the original SwitchInst, the caller must adjust the weights)
6422static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6423 Constant *DefaultResult, Value *Condition,
6424 IRBuilder<> &Builder, const DataLayout &DL,
6425 ArrayRef<uint32_t> BranchWeights) {
6426 // If we are selecting between only two cases transform into a simple
6427 // select or a two-way select if default is possible.
6428 // Example:
6429 // switch (a) { %0 = icmp eq i32 %a, 10
6430 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6431 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6432 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6433 // }
6434
6435 const bool HasBranchWeights =
6436 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6437
6438 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6439 ResultVector[1].second.size() == 1) {
6440 ConstantInt *FirstCase = ResultVector[0].second[0];
6441 ConstantInt *SecondCase = ResultVector[1].second[0];
6442 Value *SelectValue = ResultVector[1].first;
6443 if (DefaultResult) {
6444 Value *ValueCompare =
6445 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6446 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6447 DefaultResult, "switch.select");
6448 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6449 SI && HasBranchWeights) {
6450 // We start with 3 probabilities, where the numerator is the
6451 // corresponding BranchWeights[i], and the denominator is the sum over
6452 // BranchWeights. We want the probability and negative probability of
6453 // Condition == SecondCase.
6454 assert(BranchWeights.size() == 3);
6456 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6457 /*IsExpected=*/false, /*ElideAllZero=*/true);
6458 }
6459 }
6460 Value *ValueCompare =
6461 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6462 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6463 SelectValue, "switch.select");
6464 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6465 // We may have had a DefaultResult. Base the position of the first and
6466 // second's branch weights accordingly. Also the proability that Condition
6467 // != FirstCase needs to take that into account.
6468 assert(BranchWeights.size() >= 2);
6469 size_t FirstCasePos = (Condition != nullptr);
6470 size_t SecondCasePos = FirstCasePos + 1;
6471 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6473 {BranchWeights[FirstCasePos],
6474 DefaultCase + BranchWeights[SecondCasePos]},
6475 /*IsExpected=*/false, /*ElideAllZero=*/true);
6476 }
6477 return Ret;
6478 }
6479
6480 // Handle the degenerate case where two cases have the same result value.
6481 if (ResultVector.size() == 1 && DefaultResult) {
6482 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6483 unsigned CaseCount = CaseValues.size();
6484 // n bits group cases map to the same result:
6485 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6486 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6487 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6488 if (isPowerOf2_32(CaseCount)) {
6489 ConstantInt *MinCaseVal = CaseValues[0];
6490 // If there are bits that are set exclusively by CaseValues, we
6491 // can transform the switch into a select if the conjunction of
6492 // all the values uniquely identify CaseValues.
6493 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6494
6495 // Find the minimum value and compute the and of all the case values.
6496 for (auto *Case : CaseValues) {
6497 if (Case->getValue().slt(MinCaseVal->getValue()))
6498 MinCaseVal = Case;
6499 AndMask &= Case->getValue();
6500 }
6501 KnownBits Known = computeKnownBits(Condition, DL);
6502
6503 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6504 // Compute the number of bits that are free to vary.
6505 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6506
6507 // Check if the number of values covered by the mask is equal
6508 // to the number of cases.
6509 if (FreeBits == Log2_32(CaseCount)) {
6510 Value *And = Builder.CreateAnd(Condition, AndMask);
6511 Value *Cmp = Builder.CreateICmpEQ(
6512 And, Constant::getIntegerValue(And->getType(), AndMask));
6513 Value *Ret =
6514 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6515 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6516 // We know there's a Default case. We base the resulting branch
6517 // weights off its probability.
6518 assert(BranchWeights.size() >= 2);
6520 *SI,
6521 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6522 /*IsExpected=*/false, /*ElideAllZero=*/true);
6523 }
6524 return Ret;
6525 }
6526 }
6527
6528 // Mark the bits case number touched.
6529 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6530 for (auto *Case : CaseValues)
6531 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6532
6533 // Check if cases with the same result can cover all number
6534 // in touched bits.
6535 if (BitMask.popcount() == Log2_32(CaseCount)) {
6536 if (!MinCaseVal->isNullValue())
6537 Condition = Builder.CreateSub(Condition, MinCaseVal);
6538 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6539 Value *Cmp = Builder.CreateICmpEQ(
6540 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6541 Value *Ret =
6542 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6543 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6544 assert(BranchWeights.size() >= 2);
6546 *SI,
6547 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6548 /*IsExpected=*/false, /*ElideAllZero=*/true);
6549 }
6550 return Ret;
6551 }
6552 }
6553
6554 // Handle the degenerate case where two cases have the same value.
6555 if (CaseValues.size() == 2) {
6556 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6557 "switch.selectcmp.case1");
6558 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6559 "switch.selectcmp.case2");
6560 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6561 Value *Ret =
6562 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6563 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6564 assert(BranchWeights.size() >= 2);
6566 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6567 /*IsExpected=*/false, /*ElideAllZero=*/true);
6568 }
6569 return Ret;
6570 }
6571 }
6572
6573 return nullptr;
6574}
6575
6576// Helper function to cleanup a switch instruction that has been converted into
6577// a select, fixing up PHI nodes and basic blocks.
6579 Value *SelectValue,
6580 IRBuilder<> &Builder,
6581 DomTreeUpdater *DTU) {
6582 std::vector<DominatorTree::UpdateType> Updates;
6583
6584 BasicBlock *SelectBB = SI->getParent();
6585 BasicBlock *DestBB = PHI->getParent();
6586
6587 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6588 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6589 Builder.CreateBr(DestBB);
6590
6591 // Remove the switch.
6592
6593 PHI->removeIncomingValueIf(
6594 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6595 PHI->addIncoming(SelectValue, SelectBB);
6596
6597 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6598 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6599 BasicBlock *Succ = SI->getSuccessor(i);
6600
6601 if (Succ == DestBB)
6602 continue;
6603 Succ->removePredecessor(SelectBB);
6604 if (DTU && RemovedSuccessors.insert(Succ).second)
6605 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6606 }
6607 SI->eraseFromParent();
6608 if (DTU)
6609 DTU->applyUpdates(Updates);
6610}
6611
6612/// If a switch is only used to initialize one or more phi nodes in a common
6613/// successor block with only two different constant values, try to replace the
6614/// switch with a select. Returns true if the fold was made.
6616 DomTreeUpdater *DTU, const DataLayout &DL,
6617 const TargetTransformInfo &TTI) {
6618 Value *const Cond = SI->getCondition();
6619 PHINode *PHI = nullptr;
6620 BasicBlock *CommonDest = nullptr;
6621 Constant *DefaultResult;
6622 SwitchCaseResultVectorTy UniqueResults;
6623 // Collect all the cases that will deliver the same value from the switch.
6624 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6625 DL, TTI, /*MaxUniqueResults*/ 2))
6626 return false;
6627
6628 assert(PHI != nullptr && "PHI for value select not found");
6629 Builder.SetInsertPoint(SI);
6630 SmallVector<uint32_t, 4> BranchWeights;
6632 [[maybe_unused]] auto HasWeights =
6634 assert(!HasWeights == (BranchWeights.empty()));
6635 }
6636 assert(BranchWeights.empty() ||
6637 (BranchWeights.size() >=
6638 UniqueResults.size() + (DefaultResult != nullptr)));
6639
6640 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6641 Builder, DL, BranchWeights);
6642 if (!SelectValue)
6643 return false;
6644
6645 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6646 return true;
6647}
6648
6649namespace {
6650
6651/// This class finds alternatives for switches to ultimately
6652/// replace the switch.
6653class SwitchReplacement {
6654public:
6655 /// Create a helper for optimizations to use as a switch replacement.
6656 /// Find a better representation for the content of Values,
6657 /// using DefaultValue to fill any holes in the table.
6658 SwitchReplacement(
6659 Module &M, uint64_t TableSize, ConstantInt *Offset,
6660 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6661 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6662
6663 /// Build instructions with Builder to retrieve values using Index
6664 /// and replace the switch.
6665 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6666 Function *Func);
6667
6668 /// Return true if a table with TableSize elements of
6669 /// type ElementType would fit in a target-legal register.
6670 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6671 Type *ElementType);
6672
6673 /// Return the default value of the switch.
6674 Constant *getDefaultValue();
6675
6676 /// Return true if the replacement is a lookup table.
6677 bool isLookupTable();
6678
6679 /// Return true if the replacement is a bit map.
6680 bool isBitMap();
6681
6682private:
6683 // Depending on the switch, there are different alternatives.
6684 enum {
6685 // For switches where each case contains the same value, we just have to
6686 // store that single value and return it for each lookup.
6687 SingleValueKind,
6688
6689 // For switches where there is a linear relationship between table index
6690 // and values. We calculate the result with a simple multiplication
6691 // and addition instead of a table lookup.
6692 LinearMapKind,
6693
6694 // For small tables with integer elements, we can pack them into a bitmap
6695 // that fits into a target-legal register. Values are retrieved by
6696 // shift and mask operations.
6697 BitMapKind,
6698
6699 // The table is stored as an array of values. Values are retrieved by load
6700 // instructions from the table.
6701 LookupTableKind
6702 } Kind;
6703
6704 // The default value of the switch.
6705 Constant *DefaultValue;
6706
6707 // The type of the output values.
6708 Type *ValueType;
6709
6710 // For SingleValueKind, this is the single value.
6711 Constant *SingleValue = nullptr;
6712
6713 // For BitMapKind, this is the bitmap.
6714 ConstantInt *BitMap = nullptr;
6715 IntegerType *BitMapElementTy = nullptr;
6716
6717 // For LinearMapKind, these are the constants used to derive the value.
6718 ConstantInt *LinearOffset = nullptr;
6719 ConstantInt *LinearMultiplier = nullptr;
6720 bool LinearMapValWrapped = false;
6721
6722 // For LookupTableKind, this is the table.
6723 Constant *Initializer = nullptr;
6724};
6725
6726} // end anonymous namespace
6727
6728SwitchReplacement::SwitchReplacement(
6729 Module &M, uint64_t TableSize, ConstantInt *Offset,
6730 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6731 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6732 : DefaultValue(DefaultValue) {
6733 assert(Values.size() && "Can't build lookup table without values!");
6734 assert(TableSize >= Values.size() && "Can't fit values in table!");
6735
6736 // If all values in the table are equal, this is that value.
6737 SingleValue = Values.begin()->second;
6738
6739 ValueType = Values.begin()->second->getType();
6740
6741 // Build up the table contents.
6742 SmallVector<Constant *, 64> TableContents(TableSize);
6743 for (const auto &[CaseVal, CaseRes] : Values) {
6744 assert(CaseRes->getType() == ValueType);
6745
6746 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6747 TableContents[Idx] = CaseRes;
6748
6749 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6750 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6751 }
6752
6753 // Fill in any holes in the table with the default result.
6754 if (Values.size() < TableSize) {
6755 assert(DefaultValue &&
6756 "Need a default value to fill the lookup table holes.");
6757 assert(DefaultValue->getType() == ValueType);
6758 for (uint64_t I = 0; I < TableSize; ++I) {
6759 if (!TableContents[I])
6760 TableContents[I] = DefaultValue;
6761 }
6762
6763 // If the default value is poison, all the holes are poison.
6764 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6765
6766 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6767 SingleValue = nullptr;
6768 }
6769
6770 // If each element in the table contains the same value, we only need to store
6771 // that single value.
6772 if (SingleValue) {
6773 Kind = SingleValueKind;
6774 return;
6775 }
6776
6777 // Check if we can derive the value with a linear transformation from the
6778 // table index.
6780 bool LinearMappingPossible = true;
6781 APInt PrevVal;
6782 APInt DistToPrev;
6783 // When linear map is monotonic and signed overflow doesn't happen on
6784 // maximum index, we can attach nsw on Add and Mul.
6785 bool NonMonotonic = false;
6786 assert(TableSize >= 2 && "Should be a SingleValue table.");
6787 // Check if there is the same distance between two consecutive values.
6788 for (uint64_t I = 0; I < TableSize; ++I) {
6789 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6790
6791 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6792 // This is an poison, so it's (probably) a lookup table hole.
6793 // To prevent any regressions from before we switched to using poison as
6794 // the default value, holes will fall back to using the first value.
6795 // This can be removed once we add proper handling for poisons in lookup
6796 // tables.
6797 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6798 }
6799
6800 if (!ConstVal) {
6801 // This is an undef. We could deal with it, but undefs in lookup tables
6802 // are very seldom. It's probably not worth the additional complexity.
6803 LinearMappingPossible = false;
6804 break;
6805 }
6806 const APInt &Val = ConstVal->getValue();
6807 if (I != 0) {
6808 APInt Dist = Val - PrevVal;
6809 if (I == 1) {
6810 DistToPrev = Dist;
6811 } else if (Dist != DistToPrev) {
6812 LinearMappingPossible = false;
6813 break;
6814 }
6815 NonMonotonic |=
6816 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6817 }
6818 PrevVal = Val;
6819 }
6820 if (LinearMappingPossible) {
6821 LinearOffset = cast<ConstantInt>(TableContents[0]);
6822 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6823 APInt M = LinearMultiplier->getValue();
6824 bool MayWrap = true;
6825 if (isIntN(M.getBitWidth(), TableSize - 1))
6826 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6827 LinearMapValWrapped = NonMonotonic || MayWrap;
6828 Kind = LinearMapKind;
6829 return;
6830 }
6831 }
6832
6833 // If the type is integer and the table fits in a register, build a bitmap.
6834 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6836 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6837 for (uint64_t I = TableSize; I > 0; --I) {
6838 TableInt <<= IT->getBitWidth();
6839 // Insert values into the bitmap. Undef values are set to zero.
6840 if (!isa<UndefValue>(TableContents[I - 1])) {
6841 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6842 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6843 }
6844 }
6845 BitMap = ConstantInt::get(M.getContext(), TableInt);
6846 BitMapElementTy = IT;
6847 Kind = BitMapKind;
6848 return;
6849 }
6850
6851 // Store the table in an array.
6852 auto *TableTy = ArrayType::get(ValueType, TableSize);
6853 Initializer = ConstantArray::get(TableTy, TableContents);
6854
6855 Kind = LookupTableKind;
6856}
6857
6858Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6859 const DataLayout &DL, Function *Func) {
6860 switch (Kind) {
6861 case SingleValueKind:
6862 return SingleValue;
6863 case LinearMapKind: {
6864 ++NumLinearMaps;
6865 // Derive the result value from the input value.
6866 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6867 false, "switch.idx.cast");
6868 if (!LinearMultiplier->isOne())
6869 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6870 /*HasNUW = */ false,
6871 /*HasNSW = */ !LinearMapValWrapped);
6872
6873 if (!LinearOffset->isZero())
6874 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6875 /*HasNUW = */ false,
6876 /*HasNSW = */ !LinearMapValWrapped);
6877 return Result;
6878 }
6879 case BitMapKind: {
6880 ++NumBitMaps;
6881 // Type of the bitmap (e.g. i59).
6882 IntegerType *MapTy = BitMap->getIntegerType();
6883
6884 // Cast Index to the same type as the bitmap.
6885 // Note: The Index is <= the number of elements in the table, so
6886 // truncating it to the width of the bitmask is safe.
6887 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6888
6889 // Multiply the shift amount by the element width. NUW/NSW can always be
6890 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6891 // BitMap's bit width.
6892 ShiftAmt = Builder.CreateMul(
6893 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6894 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6895
6896 // Shift down.
6897 Value *DownShifted =
6898 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6899 // Mask off.
6900 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6901 }
6902 case LookupTableKind: {
6903 ++NumLookupTables;
6904 auto *Table =
6905 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6906 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6907 Initializer, "switch.table." + Func->getName());
6908 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6909 // Set the alignment to that of an array items. We will be only loading one
6910 // value out of it.
6911 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6912 Type *IndexTy = DL.getIndexType(Table->getType());
6913 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6914
6915 if (Index->getType() != IndexTy) {
6916 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6917 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6918 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6919 Zext->setNonNeg(
6920 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6921 }
6922
6923 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6924 Value *GEP =
6925 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6926 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6927 }
6928 }
6929 llvm_unreachable("Unknown helper kind!");
6930}
6931
6932bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6933 uint64_t TableSize,
6934 Type *ElementType) {
6935 auto *IT = dyn_cast<IntegerType>(ElementType);
6936 if (!IT)
6937 return false;
6938 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6939 // are <= 15, we could try to narrow the type.
6940
6941 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6942 if (TableSize >= UINT_MAX / IT->getBitWidth())
6943 return false;
6944 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6945}
6946
6948 const DataLayout &DL) {
6949 // Allow any legal type.
6950 if (TTI.isTypeLegal(Ty))
6951 return true;
6952
6953 auto *IT = dyn_cast<IntegerType>(Ty);
6954 if (!IT)
6955 return false;
6956
6957 // Also allow power of 2 integer types that have at least 8 bits and fit in
6958 // a register. These types are common in frontend languages and targets
6959 // usually support loads of these types.
6960 // TODO: We could relax this to any integer that fits in a register and rely
6961 // on ABI alignment and padding in the table to allow the load to be widened.
6962 // Or we could widen the constants and truncate the load.
6963 unsigned BitWidth = IT->getBitWidth();
6964 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6965 DL.fitsInLegalInteger(IT->getBitWidth());
6966}
6967
6968Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6969
6970bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6971
6972bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
6973
6974static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6975 // 40% is the default density for building a jump table in optsize/minsize
6976 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6977 // function was based on.
6978 const uint64_t MinDensity = 40;
6979
6980 if (CaseRange >= UINT64_MAX / 100)
6981 return false; // Avoid multiplication overflows below.
6982
6983 return NumCases * 100 >= CaseRange * MinDensity;
6984}
6985
6987 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6988 uint64_t Range = Diff + 1;
6989 if (Range < Diff)
6990 return false; // Overflow.
6991
6992 return isSwitchDense(Values.size(), Range);
6993}
6994
6995/// Determine whether a lookup table should be built for this switch, based on
6996/// the number of cases, size of the table, and the types of the results.
6997// TODO: We could support larger than legal types by limiting based on the
6998// number of loads required and/or table size. If the constants are small we
6999// could use smaller table entries and extend after the load.
7001 const TargetTransformInfo &TTI,
7002 const DataLayout &DL,
7003 const SmallVector<Type *> &ResultTypes) {
7004 if (SI->getNumCases() > TableSize)
7005 return false; // TableSize overflowed.
7006
7007 bool AllTablesFitInRegister = true;
7008 bool HasIllegalType = false;
7009 for (const auto &Ty : ResultTypes) {
7010 // Saturate this flag to true.
7011 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7012
7013 // Saturate this flag to false.
7014 AllTablesFitInRegister =
7015 AllTablesFitInRegister &&
7016 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7017
7018 // If both flags saturate, we're done. NOTE: This *only* works with
7019 // saturating flags, and all flags have to saturate first due to the
7020 // non-deterministic behavior of iterating over a dense map.
7021 if (HasIllegalType && !AllTablesFitInRegister)
7022 break;
7023 }
7024
7025 // If each table would fit in a register, we should build it anyway.
7026 if (AllTablesFitInRegister)
7027 return true;
7028
7029 // Don't build a table that doesn't fit in-register if it has illegal types.
7030 if (HasIllegalType)
7031 return false;
7032
7033 return isSwitchDense(SI->getNumCases(), TableSize);
7034}
7035
7037 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7038 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7039 const DataLayout &DL, const TargetTransformInfo &TTI) {
7040 if (MinCaseVal.isNullValue())
7041 return true;
7042 if (MinCaseVal.isNegative() ||
7043 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7044 !HasDefaultResults)
7045 return false;
7046 return all_of(ResultTypes, [&](const auto &ResultType) {
7047 return SwitchReplacement::wouldFitInRegister(
7048 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7049 });
7050}
7051
7052/// Try to reuse the switch table index compare. Following pattern:
7053/// \code
7054/// if (idx < tablesize)
7055/// r = table[idx]; // table does not contain default_value
7056/// else
7057/// r = default_value;
7058/// if (r != default_value)
7059/// ...
7060/// \endcode
7061/// Is optimized to:
7062/// \code
7063/// cond = idx < tablesize;
7064/// if (cond)
7065/// r = table[idx];
7066/// else
7067/// r = default_value;
7068/// if (cond)
7069/// ...
7070/// \endcode
7071/// Jump threading will then eliminate the second if(cond).
7073 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7074 Constant *DefaultValue,
7075 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7077 if (!CmpInst)
7078 return;
7079
7080 // We require that the compare is in the same block as the phi so that jump
7081 // threading can do its work afterwards.
7082 if (CmpInst->getParent() != PhiBlock)
7083 return;
7084
7086 if (!CmpOp1)
7087 return;
7088
7089 Value *RangeCmp = RangeCheckBranch->getCondition();
7090 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7091 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7092
7093 // Check if the compare with the default value is constant true or false.
7094 const DataLayout &DL = PhiBlock->getDataLayout();
7096 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7097 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7098 return;
7099
7100 // Check if the compare with the case values is distinct from the default
7101 // compare result.
7102 for (auto ValuePair : Values) {
7104 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7105 if (!CaseConst || CaseConst == DefaultConst ||
7106 (CaseConst != TrueConst && CaseConst != FalseConst))
7107 return;
7108 }
7109
7110 // Check if the branch instruction dominates the phi node. It's a simple
7111 // dominance check, but sufficient for our needs.
7112 // Although this check is invariant in the calling loops, it's better to do it
7113 // at this late stage. Practically we do it at most once for a switch.
7114 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7115 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7116 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7117 return;
7118 }
7119
7120 if (DefaultConst == FalseConst) {
7121 // The compare yields the same result. We can replace it.
7122 CmpInst->replaceAllUsesWith(RangeCmp);
7123 ++NumTableCmpReuses;
7124 } else {
7125 // The compare yields the same result, just inverted. We can replace it.
7126 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7127 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7128 RangeCheckBranch->getIterator());
7129 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7130 ++NumTableCmpReuses;
7131 }
7132}
7133
7134/// If the switch is only used to initialize one or more phi nodes in a common
7135/// successor block with different constant values, replace the switch with
7136/// lookup tables.
7138 DomTreeUpdater *DTU, const DataLayout &DL,
7139 const TargetTransformInfo &TTI,
7140 bool ConvertSwitchToLookupTable) {
7141 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7142
7143 BasicBlock *BB = SI->getParent();
7144 Function *Fn = BB->getParent();
7145
7146 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7147 // split off a dense part and build a lookup table for that.
7148
7149 // FIXME: This creates arrays of GEPs to constant strings, which means each
7150 // GEP needs a runtime relocation in PIC code. We should just build one big
7151 // string and lookup indices into that.
7152
7153 // Ignore switches with less than three cases. Lookup tables will not make
7154 // them faster, so we don't analyze them.
7155 if (SI->getNumCases() < 3)
7156 return false;
7157
7158 // Figure out the corresponding result for each case value and phi node in the
7159 // common destination, as well as the min and max case values.
7160 assert(!SI->cases().empty());
7161 SwitchInst::CaseIt CI = SI->case_begin();
7162 ConstantInt *MinCaseVal = CI->getCaseValue();
7163 ConstantInt *MaxCaseVal = CI->getCaseValue();
7164
7165 BasicBlock *CommonDest = nullptr;
7166
7167 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7169
7171 SmallVector<Type *> ResultTypes;
7173
7174 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7175 ConstantInt *CaseVal = CI->getCaseValue();
7176 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7177 MinCaseVal = CaseVal;
7178 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7179 MaxCaseVal = CaseVal;
7180
7181 // Resulting value at phi nodes for this case value.
7183 ResultsTy Results;
7184 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7185 Results, DL, TTI))
7186 return false;
7187
7188 // Append the result and result types from this case to the list for each
7189 // phi.
7190 for (const auto &I : Results) {
7191 PHINode *PHI = I.first;
7192 Constant *Value = I.second;
7193 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7194 if (Inserted)
7195 PHIs.push_back(PHI);
7196 It->second.push_back(std::make_pair(CaseVal, Value));
7197 ResultTypes.push_back(PHI->getType());
7198 }
7199 }
7200
7201 // If the table has holes, we need a constant result for the default case
7202 // or a bitmask that fits in a register.
7203 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7204 bool HasDefaultResults =
7205 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7206 DefaultResultsList, DL, TTI);
7207 for (const auto &I : DefaultResultsList) {
7208 PHINode *PHI = I.first;
7209 Constant *Result = I.second;
7210 DefaultResults[PHI] = Result;
7211 }
7212
7213 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7214 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7215 uint64_t TableSize;
7216 ConstantInt *TableIndexOffset;
7217 if (UseSwitchConditionAsTableIndex) {
7218 TableSize = MaxCaseVal->getLimitedValue() + 1;
7219 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7220 } else {
7221 TableSize =
7222 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7223
7224 TableIndexOffset = MinCaseVal;
7225 }
7226
7227 // If the default destination is unreachable, or if the lookup table covers
7228 // all values of the conditional variable, branch directly to the lookup table
7229 // BB. Otherwise, check that the condition is within the case range.
7230 uint64_t NumResults = ResultLists[PHIs[0]].size();
7231 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7232
7233 bool TableHasHoles = (NumResults < TableSize);
7234
7235 // If the table has holes but the default destination doesn't produce any
7236 // constant results, the lookup table entries corresponding to the holes will
7237 // contain poison.
7238 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7239
7240 // If the default destination doesn't produce a constant result but is still
7241 // reachable, and the lookup table has holes, we need to use a mask to
7242 // determine if the current index should load from the lookup table or jump
7243 // to the default case.
7244 // The mask is unnecessary if the table has holes but the default destination
7245 // is unreachable, as in that case the holes must also be unreachable.
7246 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7247 if (NeedMask) {
7248 // As an extra penalty for the validity test we require more cases.
7249 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7250 return false;
7251 if (!DL.fitsInLegalInteger(TableSize))
7252 return false;
7253 }
7254
7255 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7256 return false;
7257
7258 // Compute the table index value.
7259 Value *TableIndex;
7260 if (UseSwitchConditionAsTableIndex) {
7261 TableIndex = SI->getCondition();
7262 if (HasDefaultResults) {
7263 // Grow the table to cover all possible index values to avoid the range
7264 // check. It will use the default result to fill in the table hole later,
7265 // so make sure it exist.
7266 ConstantRange CR =
7267 computeConstantRange(TableIndex, /* ForSigned */ false);
7268 // Grow the table shouldn't have any size impact by checking
7269 // wouldFitInRegister.
7270 // TODO: Consider growing the table also when it doesn't fit in a register
7271 // if no optsize is specified.
7272 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7273 if (!CR.isUpperWrapped() &&
7274 all_of(ResultTypes, [&](const auto &ResultType) {
7275 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7276 ResultType);
7277 })) {
7278 // There may be some case index larger than the UpperBound (unreachable
7279 // case), so make sure the table size does not get smaller.
7280 TableSize = std::max(UpperBound, TableSize);
7281 // The default branch is unreachable after we enlarge the lookup table.
7282 // Adjust DefaultIsReachable to reuse code path.
7283 DefaultIsReachable = false;
7284 }
7285 }
7286 }
7287
7288 // Keep track of the switch replacement for each phi
7290 for (PHINode *PHI : PHIs) {
7291 const auto &ResultList = ResultLists[PHI];
7292
7293 Type *ResultType = ResultList.begin()->second->getType();
7294 // Use any value to fill the lookup table holes.
7296 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7297 StringRef FuncName = Fn->getName();
7298 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7299 ResultList, DefaultVal, DL, FuncName);
7300 PhiToReplacementMap.insert({PHI, Replacement});
7301 }
7302
7303 bool AnyLookupTables = any_of(
7304 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7305 bool AnyBitMaps = any_of(PhiToReplacementMap,
7306 [](auto &KV) { return KV.second.isBitMap(); });
7307
7308 // A few conditions prevent the generation of lookup tables:
7309 // 1. The target does not support lookup tables.
7310 // 2. The "no-jump-tables" function attribute is set.
7311 // However, these objections do not apply to other switch replacements, like
7312 // the bitmap, so we only stop here if any of these conditions are met and we
7313 // want to create a LUT. Otherwise, continue with the switch replacement.
7314 if (AnyLookupTables &&
7315 (!TTI.shouldBuildLookupTables() ||
7316 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7317 return false;
7318
7319 // In the early optimization pipeline, disable formation of lookup tables,
7320 // bit maps and mask checks, as they may inhibit further optimization.
7321 if (!ConvertSwitchToLookupTable &&
7322 (AnyLookupTables || AnyBitMaps || NeedMask))
7323 return false;
7324
7325 Builder.SetInsertPoint(SI);
7326 // TableIndex is the switch condition - TableIndexOffset if we don't
7327 // use the condition directly
7328 if (!UseSwitchConditionAsTableIndex) {
7329 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7330 // we can try to attach nsw.
7331 bool MayWrap = true;
7332 if (!DefaultIsReachable) {
7333 APInt Res =
7334 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7335 (void)Res;
7336 }
7337 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7338 "switch.tableidx", /*HasNUW =*/false,
7339 /*HasNSW =*/!MayWrap);
7340 }
7341
7342 std::vector<DominatorTree::UpdateType> Updates;
7343
7344 // Compute the maximum table size representable by the integer type we are
7345 // switching upon.
7346 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7347 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7348 assert(MaxTableSize >= TableSize &&
7349 "It is impossible for a switch to have more entries than the max "
7350 "representable value of its input integer type's size.");
7351
7352 // Create the BB that does the lookups.
7353 Module &Mod = *CommonDest->getParent()->getParent();
7354 BasicBlock *LookupBB = BasicBlock::Create(
7355 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7356
7357 BranchInst *RangeCheckBranch = nullptr;
7358 BranchInst *CondBranch = nullptr;
7359
7360 Builder.SetInsertPoint(SI);
7361 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7362 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7363 Builder.CreateBr(LookupBB);
7364 if (DTU)
7365 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7366 // Note: We call removeProdecessor later since we need to be able to get the
7367 // PHI value for the default case in case we're using a bit mask.
7368 } else {
7369 Value *Cmp = Builder.CreateICmpULT(
7370 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7371 RangeCheckBranch =
7372 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7373 CondBranch = RangeCheckBranch;
7374 if (DTU)
7375 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7376 }
7377
7378 // Populate the BB that does the lookups.
7379 Builder.SetInsertPoint(LookupBB);
7380
7381 if (NeedMask) {
7382 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7383 // re-purposed to do the hole check, and we create a new LookupBB.
7384 BasicBlock *MaskBB = LookupBB;
7385 MaskBB->setName("switch.hole_check");
7386 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7387 CommonDest->getParent(), CommonDest);
7388
7389 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7390 // unnecessary illegal types.
7391 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7392 APInt MaskInt(TableSizePowOf2, 0);
7393 APInt One(TableSizePowOf2, 1);
7394 // Build bitmask; fill in a 1 bit for every case.
7395 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7396 for (const auto &Result : ResultList) {
7397 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7398 .getLimitedValue();
7399 MaskInt |= One << Idx;
7400 }
7401 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7402
7403 // Get the TableIndex'th bit of the bitmask.
7404 // If this bit is 0 (meaning hole) jump to the default destination,
7405 // else continue with table lookup.
7406 IntegerType *MapTy = TableMask->getIntegerType();
7407 Value *MaskIndex =
7408 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7409 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7410 Value *LoBit = Builder.CreateTrunc(
7411 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7412 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7413 if (DTU) {
7414 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7415 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7416 }
7417 Builder.SetInsertPoint(LookupBB);
7418 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7419 }
7420
7421 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7422 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7423 // do not delete PHINodes here.
7424 SI->getDefaultDest()->removePredecessor(BB,
7425 /*KeepOneInputPHIs=*/true);
7426 if (DTU)
7427 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7428 }
7429
7430 for (PHINode *PHI : PHIs) {
7431 const ResultListTy &ResultList = ResultLists[PHI];
7432 auto Replacement = PhiToReplacementMap.at(PHI);
7433 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7434 // Do a small peephole optimization: re-use the switch table compare if
7435 // possible.
7436 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7437 BasicBlock *PhiBlock = PHI->getParent();
7438 // Search for compare instructions which use the phi.
7439 for (auto *User : PHI->users()) {
7440 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7441 Replacement.getDefaultValue(), ResultList);
7442 }
7443 }
7444
7445 PHI->addIncoming(Result, LookupBB);
7446 }
7447
7448 Builder.CreateBr(CommonDest);
7449 if (DTU)
7450 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7451
7452 SmallVector<uint32_t> BranchWeights;
7453 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7454 extractBranchWeights(*SI, BranchWeights);
7455 uint64_t ToLookupWeight = 0;
7456 uint64_t ToDefaultWeight = 0;
7457
7458 // Remove the switch.
7459 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7460 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7461 BasicBlock *Succ = SI->getSuccessor(I);
7462
7463 if (Succ == SI->getDefaultDest()) {
7464 if (HasBranchWeights)
7465 ToDefaultWeight += BranchWeights[I];
7466 continue;
7467 }
7468 Succ->removePredecessor(BB);
7469 if (DTU && RemovedSuccessors.insert(Succ).second)
7470 Updates.push_back({DominatorTree::Delete, BB, Succ});
7471 if (HasBranchWeights)
7472 ToLookupWeight += BranchWeights[I];
7473 }
7474 SI->eraseFromParent();
7475 if (HasBranchWeights)
7476 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7477 /*IsExpected=*/false);
7478 if (DTU)
7479 DTU->applyUpdates(Updates);
7480
7481 if (NeedMask)
7482 ++NumLookupTablesHoles;
7483 return true;
7484}
7485
7486/// Try to transform a switch that has "holes" in it to a contiguous sequence
7487/// of cases.
7488///
7489/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7490/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7491///
7492/// This converts a sparse switch into a dense switch which allows better
7493/// lowering and could also allow transforming into a lookup table.
7495 const DataLayout &DL,
7496 const TargetTransformInfo &TTI) {
7497 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7498 if (CondTy->getIntegerBitWidth() > 64 ||
7499 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7500 return false;
7501 // Only bother with this optimization if there are more than 3 switch cases;
7502 // SDAG will only bother creating jump tables for 4 or more cases.
7503 if (SI->getNumCases() < 4)
7504 return false;
7505
7506 // This transform is agnostic to the signedness of the input or case values. We
7507 // can treat the case values as signed or unsigned. We can optimize more common
7508 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7509 // as signed.
7511 for (const auto &C : SI->cases())
7512 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7513 llvm::sort(Values);
7514
7515 // If the switch is already dense, there's nothing useful to do here.
7516 if (isSwitchDense(Values))
7517 return false;
7518
7519 // First, transform the values such that they start at zero and ascend.
7520 int64_t Base = Values[0];
7521 for (auto &V : Values)
7522 V -= (uint64_t)(Base);
7523
7524 // Now we have signed numbers that have been shifted so that, given enough
7525 // precision, there are no negative values. Since the rest of the transform
7526 // is bitwise only, we switch now to an unsigned representation.
7527
7528 // This transform can be done speculatively because it is so cheap - it
7529 // results in a single rotate operation being inserted.
7530
7531 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7532 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7533 // less than 64.
7534 unsigned Shift = 64;
7535 for (auto &V : Values)
7536 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7537 assert(Shift < 64);
7538 if (Shift > 0)
7539 for (auto &V : Values)
7540 V = (int64_t)((uint64_t)V >> Shift);
7541
7542 if (!isSwitchDense(Values))
7543 // Transform didn't create a dense switch.
7544 return false;
7545
7546 // The obvious transform is to shift the switch condition right and emit a
7547 // check that the condition actually cleanly divided by GCD, i.e.
7548 // C & (1 << Shift - 1) == 0
7549 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7550 //
7551 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7552 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7553 // are nonzero then the switch condition will be very large and will hit the
7554 // default case.
7555
7556 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7557 Builder.SetInsertPoint(SI);
7558 Value *Sub =
7559 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7560 Value *Rot = Builder.CreateIntrinsic(
7561 Ty, Intrinsic::fshl,
7562 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7563 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7564
7565 for (auto Case : SI->cases()) {
7566 auto *Orig = Case.getCaseValue();
7567 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7568 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7569 }
7570 return true;
7571}
7572
7573/// Tries to transform switch of powers of two to reduce switch range.
7574/// For example, switch like:
7575/// switch (C) { case 1: case 2: case 64: case 128: }
7576/// will be transformed to:
7577/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7578///
7579/// This transformation allows better lowering and may transform the switch
7580/// instruction into a sequence of bit manipulation and a smaller
7581/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7582/// address of the jump target, and indirectly jump to it).
7584 DomTreeUpdater *DTU,
7585 const DataLayout &DL,
7586 const TargetTransformInfo &TTI) {
7587 Value *Condition = SI->getCondition();
7588 LLVMContext &Context = SI->getContext();
7589 auto *CondTy = cast<IntegerType>(Condition->getType());
7590
7591 if (CondTy->getIntegerBitWidth() > 64 ||
7592 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7593 return false;
7594
7595 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7596 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7597 {Condition, ConstantInt::getTrue(Context)});
7598 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7599 TTI::TCC_Basic * 2)
7600 return false;
7601
7602 // Only bother with this optimization if there are more than 3 switch cases.
7603 // SDAG will start emitting jump tables for 4 or more cases.
7604 if (SI->getNumCases() < 4)
7605 return false;
7606
7607 // Check that switch cases are powers of two.
7609 for (const auto &Case : SI->cases()) {
7610 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7611 if (llvm::has_single_bit(CaseValue))
7612 Values.push_back(CaseValue);
7613 else
7614 return false;
7615 }
7616
7617 // isSwichDense requires case values to be sorted.
7618 llvm::sort(Values);
7619 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7620 llvm::countr_zero(Values.front()) + 1))
7621 // Transform is unable to generate dense switch.
7622 return false;
7623
7624 Builder.SetInsertPoint(SI);
7625
7626 if (!SI->defaultDestUnreachable()) {
7627 // Let non-power-of-two inputs jump to the default case, when the latter is
7628 // reachable.
7629 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7630 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7631
7632 auto *OrigBB = SI->getParent();
7633 auto *DefaultCaseBB = SI->getDefaultDest();
7634 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7635 auto It = OrigBB->getTerminator()->getIterator();
7636 SmallVector<uint32_t> Weights;
7637 auto HasWeights =
7639 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7640 if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7641 // IsPow2 covers a subset of the cases in which we'd go to the default
7642 // label. The other is those powers of 2 that don't appear in the case
7643 // statement. We don't know the distribution of the values coming in, so
7644 // the safest is to split 50-50 the original probability to `default`.
7645 uint64_t OrigDenominator = sum_of(map_range(
7646 Weights, [](const auto &V) { return static_cast<uint64_t>(V); }));
7647 SmallVector<uint64_t> NewWeights(2);
7648 NewWeights[1] = Weights[0] / 2;
7649 NewWeights[0] = OrigDenominator - NewWeights[1];
7650 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7651
7652 // For the original switch, we reduce the weight of the default by the
7653 // amount by which the previous branch contributes to getting to default,
7654 // and then make sure the remaining weights have the same relative ratio
7655 // wrt eachother.
7656 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7657 Weights[0] /= 2;
7658 for (auto &W : drop_begin(Weights))
7659 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7660
7661 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7662 }
7663 // BI is handling the default case for SI, and so should share its DebugLoc.
7664 BI->setDebugLoc(SI->getDebugLoc());
7665 It->eraseFromParent();
7666
7667 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7668 if (DTU)
7669 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7670 }
7671
7672 // Replace each case with its trailing zeros number.
7673 for (auto &Case : SI->cases()) {
7674 auto *OrigValue = Case.getCaseValue();
7675 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7676 OrigValue->getValue().countr_zero()));
7677 }
7678
7679 // Replace condition with its trailing zeros number.
7680 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7681 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7682
7683 SI->setCondition(ConditionTrailingZeros);
7684
7685 return true;
7686}
7687
7688/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7689/// the same destination.
7691 DomTreeUpdater *DTU) {
7692 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7693 if (!Cmp || !Cmp->hasOneUse())
7694 return false;
7695
7697 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7698 if (!HasWeights)
7699 Weights.resize(4); // Avoid checking HasWeights everywhere.
7700
7701 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7702 int64_t Res;
7703 BasicBlock *Succ, *OtherSucc;
7704 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7705 BasicBlock *Unreachable = nullptr;
7706
7707 if (SI->getNumCases() == 2) {
7708 // Find which of 1, 0 or -1 is missing (handled by default dest).
7709 SmallSet<int64_t, 3> Missing;
7710 Missing.insert(1);
7711 Missing.insert(0);
7712 Missing.insert(-1);
7713
7714 Succ = SI->getDefaultDest();
7715 SuccWeight = Weights[0];
7716 OtherSucc = nullptr;
7717 for (auto &Case : SI->cases()) {
7718 std::optional<int64_t> Val =
7719 Case.getCaseValue()->getValue().trySExtValue();
7720 if (!Val)
7721 return false;
7722 if (!Missing.erase(*Val))
7723 return false;
7724 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7725 return false;
7726 OtherSucc = Case.getCaseSuccessor();
7727 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7728 }
7729
7730 assert(Missing.size() == 1 && "Should have one case left");
7731 Res = *Missing.begin();
7732 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7733 // Normalize so that Succ is taken once and OtherSucc twice.
7734 Unreachable = SI->getDefaultDest();
7735 Succ = OtherSucc = nullptr;
7736 for (auto &Case : SI->cases()) {
7737 BasicBlock *NewSucc = Case.getCaseSuccessor();
7738 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7739 if (!OtherSucc || OtherSucc == NewSucc) {
7740 OtherSucc = NewSucc;
7741 OtherSuccWeight += Weight;
7742 } else if (!Succ) {
7743 Succ = NewSucc;
7744 SuccWeight = Weight;
7745 } else if (Succ == NewSucc) {
7746 std::swap(Succ, OtherSucc);
7747 std::swap(SuccWeight, OtherSuccWeight);
7748 } else
7749 return false;
7750 }
7751 for (auto &Case : SI->cases()) {
7752 std::optional<int64_t> Val =
7753 Case.getCaseValue()->getValue().trySExtValue();
7754 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7755 return false;
7756 if (Case.getCaseSuccessor() == Succ) {
7757 Res = *Val;
7758 break;
7759 }
7760 }
7761 } else {
7762 return false;
7763 }
7764
7765 // Determine predicate for the missing case.
7767 switch (Res) {
7768 case 1:
7769 Pred = ICmpInst::ICMP_UGT;
7770 break;
7771 case 0:
7772 Pred = ICmpInst::ICMP_EQ;
7773 break;
7774 case -1:
7775 Pred = ICmpInst::ICMP_ULT;
7776 break;
7777 }
7778 if (Cmp->isSigned())
7779 Pred = ICmpInst::getSignedPredicate(Pred);
7780
7781 MDNode *NewWeights = nullptr;
7782 if (HasWeights)
7783 NewWeights = MDBuilder(SI->getContext())
7784 .createBranchWeights(SuccWeight, OtherSuccWeight);
7785
7786 BasicBlock *BB = SI->getParent();
7787 Builder.SetInsertPoint(SI->getIterator());
7788 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7789 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7790 SI->getMetadata(LLVMContext::MD_unpredictable));
7791 OtherSucc->removePredecessor(BB);
7792 if (Unreachable)
7793 Unreachable->removePredecessor(BB);
7794 SI->eraseFromParent();
7795 Cmp->eraseFromParent();
7796 if (DTU && Unreachable)
7797 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7798 return true;
7799}
7800
7801/// Checking whether two cases of SI are equal depends on the contents of the
7802/// BasicBlock and the incoming values of their successor PHINodes.
7803/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7804/// calling this function on each BasicBlock every time isEqual is called,
7805/// especially since the same BasicBlock may be passed as an argument multiple
7806/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7807/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7808/// of the incoming values.
7813
7816 return static_cast<SwitchSuccWrapper *>(
7818 }
7820 return static_cast<SwitchSuccWrapper *>(
7822 }
7823 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7824 BasicBlock *Succ = SSW->Dest;
7826 assert(BI->isUnconditional() &&
7827 "Only supporting unconditional branches for now");
7828 assert(BI->getNumSuccessors() == 1 &&
7829 "Expected unconditional branches to have one successor");
7830 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7831
7832 // Since we assume the BB is just a single BranchInst with a single
7833 // successor, we hash as the BB and the incoming Values of its successor
7834 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7835 // including the incoming PHI values leads to better performance.
7836 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7837 // time and passing it in SwitchSuccWrapper, but this slowed down the
7838 // average compile time without having any impact on the worst case compile
7839 // time.
7840 BasicBlock *BB = BI->getSuccessor(0);
7841 SmallVector<Value *> PhiValsForBB;
7842 for (PHINode &Phi : BB->phis())
7843 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7844
7845 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7846 }
7847 static bool isEqual(const SwitchSuccWrapper *LHS,
7848 const SwitchSuccWrapper *RHS) {
7851 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7852 return LHS == RHS;
7853
7854 BasicBlock *A = LHS->Dest;
7855 BasicBlock *B = RHS->Dest;
7856
7857 // FIXME: we checked that the size of A and B are both 1 in
7858 // simplifyDuplicateSwitchArms to make the Case list smaller to
7859 // improve performance. If we decide to support BasicBlocks with more
7860 // than just a single instruction, we need to check that A.size() ==
7861 // B.size() here, and we need to check more than just the BranchInsts
7862 // for equality.
7863
7864 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7865 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7866 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7867 "Only supporting unconditional branches for now");
7868 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7869 return false;
7870
7871 // Need to check that PHIs in successor have matching values
7872 BasicBlock *Succ = ABI->getSuccessor(0);
7873 for (PHINode &Phi : Succ->phis()) {
7874 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7875 if (PredIVs[A] != PredIVs[B])
7876 return false;
7877 }
7878
7879 return true;
7880 }
7881};
7882
7883bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7884 DomTreeUpdater *DTU) {
7885 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7886 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7887 // an entire PHI at once after the loop, opposed to calling
7888 // getIncomingValueForBlock inside this loop, since each call to
7889 // getIncomingValueForBlock is O(|Preds|).
7895 Cases.reserve(SI->getNumSuccessors());
7896
7897 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7898 BasicBlock *BB = SI->getSuccessor(I);
7899
7900 // FIXME: Support more than just a single BranchInst. One way we could do
7901 // this is by taking a hashing approach of all insts in BB.
7902 if (BB->size() != 1)
7903 continue;
7904
7905 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7906 // on other kinds of terminators. We decide to only support unconditional
7907 // branches for now for compile time reasons.
7908 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7909 if (!BI || BI->isConditional())
7910 continue;
7911
7912 if (!Seen.insert(BB).second) {
7913 auto It = BBToSuccessorIndexes.find(BB);
7914 if (It != BBToSuccessorIndexes.end())
7915 It->second.emplace_back(I);
7916 continue;
7917 }
7918
7919 // FIXME: This case needs some extra care because the terminators other than
7920 // SI need to be updated. For now, consider only backedges to the SI.
7921 if (BB->getUniquePredecessor() != SI->getParent())
7922 continue;
7923
7924 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7925 for (BasicBlock *Succ : BI->successors())
7927
7928 // Add the successor only if not previously visited.
7929 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7930 BBToSuccessorIndexes[BB].emplace_back(I);
7931 }
7932
7933 // Precompute a data structure to improve performance of isEqual for
7934 // SwitchSuccWrapper.
7935 PhiPredIVs.reserve(Phis.size());
7936 for (PHINode *Phi : Phis) {
7937 auto &IVs =
7938 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7939 for (auto &IV : Phi->incoming_values())
7940 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7941 }
7942
7943 // Build a set such that if the SwitchSuccWrapper exists in the set and
7944 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7945 // which is not in the set should be replaced with the one in the set. If the
7946 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7947 // other SwitchSuccWrappers can check against it in the same manner. We use
7948 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7949 // around information to isEquality, getHashValue, and when doing the
7950 // replacement with better performance.
7951 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7952 ReplaceWith.reserve(Cases.size());
7953
7955 Updates.reserve(ReplaceWith.size());
7956 bool MadeChange = false;
7957 for (auto &SSW : Cases) {
7958 // SSW is a candidate for simplification. If we find a duplicate BB,
7959 // replace it.
7960 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7961 if (!Inserted) {
7962 // We know that SI's parent BB no longer dominates the old case successor
7963 // since we are making it dead.
7964 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7965 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7966 for (unsigned Idx : Successors)
7967 SI->setSuccessor(Idx, (*It)->Dest);
7968 MadeChange = true;
7969 }
7970 }
7971
7972 if (DTU)
7973 DTU->applyUpdates(Updates);
7974
7975 return MadeChange;
7976}
7977
7978bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7979 BasicBlock *BB = SI->getParent();
7980
7981 if (isValueEqualityComparison(SI)) {
7982 // If we only have one predecessor, and if it is a branch on this value,
7983 // see if that predecessor totally determines the outcome of this switch.
7984 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7985 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7986 return requestResimplify();
7987
7988 Value *Cond = SI->getCondition();
7989 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7990 if (simplifySwitchOnSelect(SI, Select))
7991 return requestResimplify();
7992
7993 // If the block only contains the switch, see if we can fold the block
7994 // away into any preds.
7995 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7996 if (foldValueComparisonIntoPredecessors(SI, Builder))
7997 return requestResimplify();
7998 }
7999
8000 // Try to transform the switch into an icmp and a branch.
8001 // The conversion from switch to comparison may lose information on
8002 // impossible switch values, so disable it early in the pipeline.
8003 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8004 return requestResimplify();
8005
8006 // Remove unreachable cases.
8007 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8008 return requestResimplify();
8009
8010 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8011 return requestResimplify();
8012
8013 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8014 return requestResimplify();
8015
8016 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8017 return requestResimplify();
8018
8019 // The conversion of switches to arithmetic or lookup table is disabled in
8020 // the early optimization pipeline, as it may lose information or make the
8021 // resulting code harder to analyze.
8022 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8023 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8024 Options.ConvertSwitchToLookupTable))
8025 return requestResimplify();
8026
8027 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8028 return requestResimplify();
8029
8030 if (reduceSwitchRange(SI, Builder, DL, TTI))
8031 return requestResimplify();
8032
8033 if (HoistCommon &&
8034 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8035 return requestResimplify();
8036
8037 if (simplifyDuplicateSwitchArms(SI, DTU))
8038 return requestResimplify();
8039
8040 return false;
8041}
8042
8043bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8044 BasicBlock *BB = IBI->getParent();
8045 bool Changed = false;
8046 SmallVector<uint32_t> BranchWeights;
8047 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8048 extractBranchWeights(*IBI, BranchWeights);
8049
8050 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8051 if (HasBranchWeights)
8052 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8053 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8054
8055 // Eliminate redundant destinations.
8056 SmallPtrSet<Value *, 8> Succs;
8057 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8058 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8059 BasicBlock *Dest = IBI->getDestination(I);
8060 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8061 if (!Dest->hasAddressTaken())
8062 RemovedSuccs.insert(Dest);
8063 Dest->removePredecessor(BB);
8064 IBI->removeDestination(I);
8065 --I;
8066 --E;
8067 Changed = true;
8068 }
8069 }
8070
8071 if (DTU) {
8072 std::vector<DominatorTree::UpdateType> Updates;
8073 Updates.reserve(RemovedSuccs.size());
8074 for (auto *RemovedSucc : RemovedSuccs)
8075 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8076 DTU->applyUpdates(Updates);
8077 }
8078
8079 if (IBI->getNumDestinations() == 0) {
8080 // If the indirectbr has no successors, change it to unreachable.
8081 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8083 return true;
8084 }
8085
8086 if (IBI->getNumDestinations() == 1) {
8087 // If the indirectbr has one successor, change it to a direct branch.
8090 return true;
8091 }
8092 if (HasBranchWeights) {
8093 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8094 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8095 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8096 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8097 }
8098 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8099 if (simplifyIndirectBrOnSelect(IBI, SI))
8100 return requestResimplify();
8101 }
8102 return Changed;
8103}
8104
8105/// Given an block with only a single landing pad and a unconditional branch
8106/// try to find another basic block which this one can be merged with. This
8107/// handles cases where we have multiple invokes with unique landing pads, but
8108/// a shared handler.
8109///
8110/// We specifically choose to not worry about merging non-empty blocks
8111/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8112/// practice, the optimizer produces empty landing pad blocks quite frequently
8113/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8114/// sinking in this file)
8115///
8116/// This is primarily a code size optimization. We need to avoid performing
8117/// any transform which might inhibit optimization (such as our ability to
8118/// specialize a particular handler via tail commoning). We do this by not
8119/// merging any blocks which require us to introduce a phi. Since the same
8120/// values are flowing through both blocks, we don't lose any ability to
8121/// specialize. If anything, we make such specialization more likely.
8122///
8123/// TODO - This transformation could remove entries from a phi in the target
8124/// block when the inputs in the phi are the same for the two blocks being
8125/// merged. In some cases, this could result in removal of the PHI entirely.
8127 BasicBlock *BB, DomTreeUpdater *DTU) {
8128 auto Succ = BB->getUniqueSuccessor();
8129 assert(Succ);
8130 // If there's a phi in the successor block, we'd likely have to introduce
8131 // a phi into the merged landing pad block.
8132 if (isa<PHINode>(*Succ->begin()))
8133 return false;
8134
8135 for (BasicBlock *OtherPred : predecessors(Succ)) {
8136 if (BB == OtherPred)
8137 continue;
8138 BasicBlock::iterator I = OtherPred->begin();
8140 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8141 continue;
8142 ++I;
8144 if (!BI2 || !BI2->isIdenticalTo(BI))
8145 continue;
8146
8147 std::vector<DominatorTree::UpdateType> Updates;
8148
8149 // We've found an identical block. Update our predecessors to take that
8150 // path instead and make ourselves dead.
8152 for (BasicBlock *Pred : UniquePreds) {
8153 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8154 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8155 "unexpected successor");
8156 II->setUnwindDest(OtherPred);
8157 if (DTU) {
8158 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8159 Updates.push_back({DominatorTree::Delete, Pred, BB});
8160 }
8161 }
8162
8164 for (BasicBlock *Succ : UniqueSuccs) {
8165 Succ->removePredecessor(BB);
8166 if (DTU)
8167 Updates.push_back({DominatorTree::Delete, BB, Succ});
8168 }
8169
8170 IRBuilder<> Builder(BI);
8171 Builder.CreateUnreachable();
8172 BI->eraseFromParent();
8173 if (DTU)
8174 DTU->applyUpdates(Updates);
8175 return true;
8176 }
8177 return false;
8178}
8179
8180bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8181 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8182 : simplifyCondBranch(Branch, Builder);
8183}
8184
8185bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8186 IRBuilder<> &Builder) {
8187 BasicBlock *BB = BI->getParent();
8188 BasicBlock *Succ = BI->getSuccessor(0);
8189
8190 // If the Terminator is the only non-phi instruction, simplify the block.
8191 // If LoopHeader is provided, check if the block or its successor is a loop
8192 // header. (This is for early invocations before loop simplify and
8193 // vectorization to keep canonical loop forms for nested loops. These blocks
8194 // can be eliminated when the pass is invoked later in the back-end.)
8195 // Note that if BB has only one predecessor then we do not introduce new
8196 // backedge, so we can eliminate BB.
8197 bool NeedCanonicalLoop =
8198 Options.NeedCanonicalLoop &&
8199 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8200 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8202 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8203 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8204 return true;
8205
8206 // If the only instruction in the block is a seteq/setne comparison against a
8207 // constant, try to simplify the block.
8208 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
8209 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8210 ++I;
8211 if (I->isTerminator() &&
8212 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8213 return true;
8214 }
8215
8216 // See if we can merge an empty landing pad block with another which is
8217 // equivalent.
8218 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8219 ++I;
8220 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8221 return true;
8222 }
8223
8224 // If this basic block is ONLY a compare and a branch, and if a predecessor
8225 // branches to us and our successor, fold the comparison into the
8226 // predecessor and use logical operations to update the incoming value
8227 // for PHI nodes in common successor.
8228 if (Options.SpeculateBlocks &&
8229 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8230 Options.BonusInstThreshold))
8231 return requestResimplify();
8232 return false;
8233}
8234
8236 BasicBlock *PredPred = nullptr;
8237 for (auto *P : predecessors(BB)) {
8238 BasicBlock *PPred = P->getSinglePredecessor();
8239 if (!PPred || (PredPred && PredPred != PPred))
8240 return nullptr;
8241 PredPred = PPred;
8242 }
8243 return PredPred;
8244}
8245
8246/// Fold the following pattern:
8247/// bb0:
8248/// br i1 %cond1, label %bb1, label %bb2
8249/// bb1:
8250/// br i1 %cond2, label %bb3, label %bb4
8251/// bb2:
8252/// br i1 %cond2, label %bb4, label %bb3
8253/// bb3:
8254/// ...
8255/// bb4:
8256/// ...
8257/// into
8258/// bb0:
8259/// %cond = xor i1 %cond1, %cond2
8260/// br i1 %cond, label %bb4, label %bb3
8261/// bb3:
8262/// ...
8263/// bb4:
8264/// ...
8265/// NOTE: %cond2 always dominates the terminator of bb0.
8267 BasicBlock *BB = BI->getParent();
8268 BasicBlock *BB1 = BI->getSuccessor(0);
8269 BasicBlock *BB2 = BI->getSuccessor(1);
8270 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8271 if (Succ == BB)
8272 return false;
8273 if (&Succ->front() != Succ->getTerminator())
8274 return false;
8275 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8276 if (!SuccBI || !SuccBI->isConditional())
8277 return false;
8278 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8279 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8280 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8281 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8282 };
8283 BranchInst *BB1BI, *BB2BI;
8284 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8285 return false;
8286
8287 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8288 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8289 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8290 return false;
8291
8292 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8293 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8294 IRBuilder<> Builder(BI);
8295 BI->setCondition(
8296 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8297 BB1->removePredecessor(BB);
8298 BI->setSuccessor(0, BB4);
8299 BB2->removePredecessor(BB);
8300 BI->setSuccessor(1, BB3);
8301 if (DTU) {
8303 Updates.push_back({DominatorTree::Delete, BB, BB1});
8304 Updates.push_back({DominatorTree::Insert, BB, BB4});
8305 Updates.push_back({DominatorTree::Delete, BB, BB2});
8306 Updates.push_back({DominatorTree::Insert, BB, BB3});
8307
8308 DTU->applyUpdates(Updates);
8309 }
8310 bool HasWeight = false;
8311 uint64_t BBTWeight, BBFWeight;
8312 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8313 HasWeight = true;
8314 else
8315 BBTWeight = BBFWeight = 1;
8316 uint64_t BB1TWeight, BB1FWeight;
8317 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8318 HasWeight = true;
8319 else
8320 BB1TWeight = BB1FWeight = 1;
8321 uint64_t BB2TWeight, BB2FWeight;
8322 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8323 HasWeight = true;
8324 else
8325 BB2TWeight = BB2FWeight = 1;
8326 if (HasWeight) {
8327 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8328 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8329 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8330 /*ElideAllZero=*/true);
8331 }
8332 return true;
8333}
8334
8335bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8336 assert(
8338 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8339 "Tautological conditional branch should have been eliminated already.");
8340
8341 BasicBlock *BB = BI->getParent();
8342 if (!Options.SimplifyCondBranch ||
8343 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8344 return false;
8345
8346 // Conditional branch
8347 if (isValueEqualityComparison(BI)) {
8348 // If we only have one predecessor, and if it is a branch on this value,
8349 // see if that predecessor totally determines the outcome of this
8350 // switch.
8351 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8352 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8353 return requestResimplify();
8354
8355 // This block must be empty, except for the setcond inst, if it exists.
8356 // Ignore dbg and pseudo intrinsics.
8357 auto I = BB->instructionsWithoutDebug(true).begin();
8358 if (&*I == BI) {
8359 if (foldValueComparisonIntoPredecessors(BI, Builder))
8360 return requestResimplify();
8361 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8362 ++I;
8363 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8364 return requestResimplify();
8365 }
8366 }
8367
8368 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8369 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8370 return true;
8371
8372 // If this basic block has dominating predecessor blocks and the dominating
8373 // blocks' conditions imply BI's condition, we know the direction of BI.
8374 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8375 if (Imp) {
8376 // Turn this into a branch on constant.
8377 auto *OldCond = BI->getCondition();
8378 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8379 : ConstantInt::getFalse(BB->getContext());
8380 BI->setCondition(TorF);
8382 return requestResimplify();
8383 }
8384
8385 // If this basic block is ONLY a compare and a branch, and if a predecessor
8386 // branches to us and one of our successors, fold the comparison into the
8387 // predecessor and use logical operations to pick the right destination.
8388 if (Options.SpeculateBlocks &&
8389 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8390 Options.BonusInstThreshold))
8391 return requestResimplify();
8392
8393 // We have a conditional branch to two blocks that are only reachable
8394 // from BI. We know that the condbr dominates the two blocks, so see if
8395 // there is any identical code in the "then" and "else" blocks. If so, we
8396 // can hoist it up to the branching block.
8397 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8398 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8399 if (HoistCommon &&
8400 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8401 return requestResimplify();
8402
8403 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8404 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8405 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8406 auto CanSpeculateConditionalLoadsStores = [&]() {
8407 for (auto *Succ : successors(BB)) {
8408 for (Instruction &I : *Succ) {
8409 if (I.isTerminator()) {
8410 if (I.getNumSuccessors() > 1)
8411 return false;
8412 continue;
8413 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8414 SpeculatedConditionalLoadsStores.size() ==
8416 return false;
8417 }
8418 SpeculatedConditionalLoadsStores.push_back(&I);
8419 }
8420 }
8421 return !SpeculatedConditionalLoadsStores.empty();
8422 };
8423
8424 if (CanSpeculateConditionalLoadsStores()) {
8425 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8426 std::nullopt, nullptr);
8427 return requestResimplify();
8428 }
8429 }
8430 } else {
8431 // If Successor #1 has multiple preds, we may be able to conditionally
8432 // execute Successor #0 if it branches to Successor #1.
8433 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8434 if (Succ0TI->getNumSuccessors() == 1 &&
8435 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8436 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8437 return requestResimplify();
8438 }
8439 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8440 // If Successor #0 has multiple preds, we may be able to conditionally
8441 // execute Successor #1 if it branches to Successor #0.
8442 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8443 if (Succ1TI->getNumSuccessors() == 1 &&
8444 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8445 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8446 return requestResimplify();
8447 }
8448
8449 // If this is a branch on something for which we know the constant value in
8450 // predecessors (e.g. a phi node in the current block), thread control
8451 // through this block.
8452 if (foldCondBranchOnValueKnownInPredecessor(BI))
8453 return requestResimplify();
8454
8455 // Scan predecessor blocks for conditional branches.
8456 for (BasicBlock *Pred : predecessors(BB))
8457 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8458 if (PBI != BI && PBI->isConditional())
8459 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8460 return requestResimplify();
8461
8462 // Look for diamond patterns.
8463 if (MergeCondStores)
8464 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8465 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8466 if (PBI != BI && PBI->isConditional())
8467 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8468 return requestResimplify();
8469
8470 // Look for nested conditional branches.
8471 if (mergeNestedCondBranch(BI, DTU))
8472 return requestResimplify();
8473
8474 return false;
8475}
8476
8477/// Check if passing a value to an instruction will cause undefined behavior.
8478static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8479 assert(V->getType() == I->getType() && "Mismatched types");
8481 if (!C)
8482 return false;
8483
8484 if (I->use_empty())
8485 return false;
8486
8487 if (C->isNullValue() || isa<UndefValue>(C)) {
8488 // Only look at the first use we can handle, avoid hurting compile time with
8489 // long uselists
8490 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8491 auto *Use = cast<Instruction>(U.getUser());
8492 // Change this list when we want to add new instructions.
8493 switch (Use->getOpcode()) {
8494 default:
8495 return false;
8496 case Instruction::GetElementPtr:
8497 case Instruction::Ret:
8498 case Instruction::BitCast:
8499 case Instruction::Load:
8500 case Instruction::Store:
8501 case Instruction::Call:
8502 case Instruction::CallBr:
8503 case Instruction::Invoke:
8504 case Instruction::UDiv:
8505 case Instruction::URem:
8506 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8507 // implemented to avoid code complexity as it is unclear how useful such
8508 // logic is.
8509 case Instruction::SDiv:
8510 case Instruction::SRem:
8511 return true;
8512 }
8513 });
8514 if (FindUse == I->use_end())
8515 return false;
8516 auto &Use = *FindUse;
8517 auto *User = cast<Instruction>(Use.getUser());
8518 // Bail out if User is not in the same BB as I or User == I or User comes
8519 // before I in the block. The latter two can be the case if User is a
8520 // PHI node.
8521 if (User->getParent() != I->getParent() || User == I ||
8522 User->comesBefore(I))
8523 return false;
8524
8525 // Now make sure that there are no instructions in between that can alter
8526 // control flow (eg. calls)
8527 auto InstrRange =
8528 make_range(std::next(I->getIterator()), User->getIterator());
8529 if (any_of(InstrRange, [](Instruction &I) {
8531 }))
8532 return false;
8533
8534 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8536 if (GEP->getPointerOperand() == I) {
8537 // The type of GEP may differ from the type of base pointer.
8538 // Bail out on vector GEPs, as they are not handled by other checks.
8539 if (GEP->getType()->isVectorTy())
8540 return false;
8541 // The current base address is null, there are four cases to consider:
8542 // getelementptr (TY, null, 0) -> null
8543 // getelementptr (TY, null, not zero) -> may be modified
8544 // getelementptr inbounds (TY, null, 0) -> null
8545 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8546 // undefined?
8547 if (!GEP->hasAllZeroIndices() &&
8548 (!GEP->isInBounds() ||
8549 NullPointerIsDefined(GEP->getFunction(),
8550 GEP->getPointerAddressSpace())))
8551 PtrValueMayBeModified = true;
8552 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8553 }
8554
8555 // Look through return.
8556 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8557 bool HasNoUndefAttr =
8558 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8559 // Return undefined to a noundef return value is undefined.
8560 if (isa<UndefValue>(C) && HasNoUndefAttr)
8561 return true;
8562 // Return null to a nonnull+noundef return value is undefined.
8563 if (C->isNullValue() && HasNoUndefAttr &&
8564 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8565 return !PtrValueMayBeModified;
8566 }
8567 }
8568
8569 // Load from null is undefined.
8570 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8571 if (!LI->isVolatile())
8572 return !NullPointerIsDefined(LI->getFunction(),
8573 LI->getPointerAddressSpace());
8574
8575 // Store to null is undefined.
8577 if (!SI->isVolatile())
8578 return (!NullPointerIsDefined(SI->getFunction(),
8579 SI->getPointerAddressSpace())) &&
8580 SI->getPointerOperand() == I;
8581
8582 // llvm.assume(false/undef) always triggers immediate UB.
8583 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8584 // Ignore assume operand bundles.
8585 if (I == Assume->getArgOperand(0))
8586 return true;
8587 }
8588
8589 if (auto *CB = dyn_cast<CallBase>(User)) {
8590 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8591 return false;
8592 // A call to null is undefined.
8593 if (CB->getCalledOperand() == I)
8594 return true;
8595
8596 if (CB->isArgOperand(&Use)) {
8597 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8598 // Passing null to a nonnnull+noundef argument is undefined.
8600 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8601 return !PtrValueMayBeModified;
8602 // Passing undef to a noundef argument is undefined.
8603 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8604 return true;
8605 }
8606 }
8607 // Div/Rem by zero is immediate UB
8608 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8609 return true;
8610 }
8611 return false;
8612}
8613
8614/// If BB has an incoming value that will always trigger undefined behavior
8615/// (eg. null pointer dereference), remove the branch leading here.
8617 DomTreeUpdater *DTU,
8618 AssumptionCache *AC) {
8619 for (PHINode &PHI : BB->phis())
8620 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8621 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8622 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8623 Instruction *T = Predecessor->getTerminator();
8624 IRBuilder<> Builder(T);
8625 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8626 BB->removePredecessor(Predecessor);
8627 // Turn unconditional branches into unreachables and remove the dead
8628 // destination from conditional branches.
8629 if (BI->isUnconditional())
8630 Builder.CreateUnreachable();
8631 else {
8632 // Preserve guarding condition in assume, because it might not be
8633 // inferrable from any dominating condition.
8634 Value *Cond = BI->getCondition();
8635 CallInst *Assumption;
8636 if (BI->getSuccessor(0) == BB)
8637 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8638 else
8639 Assumption = Builder.CreateAssumption(Cond);
8640 if (AC)
8641 AC->registerAssumption(cast<AssumeInst>(Assumption));
8642 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8643 : BI->getSuccessor(0));
8644 }
8645 BI->eraseFromParent();
8646 if (DTU)
8647 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8648 return true;
8649 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8650 // Redirect all branches leading to UB into
8651 // a newly created unreachable block.
8652 BasicBlock *Unreachable = BasicBlock::Create(
8653 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8654 Builder.SetInsertPoint(Unreachable);
8655 // The new block contains only one instruction: Unreachable
8656 Builder.CreateUnreachable();
8657 for (const auto &Case : SI->cases())
8658 if (Case.getCaseSuccessor() == BB) {
8659 BB->removePredecessor(Predecessor);
8660 Case.setSuccessor(Unreachable);
8661 }
8662 if (SI->getDefaultDest() == BB) {
8663 BB->removePredecessor(Predecessor);
8664 SI->setDefaultDest(Unreachable);
8665 }
8666
8667 if (DTU)
8668 DTU->applyUpdates(
8669 { { DominatorTree::Insert, Predecessor, Unreachable },
8670 { DominatorTree::Delete, Predecessor, BB } });
8671 return true;
8672 }
8673 }
8674
8675 return false;
8676}
8677
8678bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8679 bool Changed = false;
8680
8681 assert(BB && BB->getParent() && "Block not embedded in function!");
8682 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8683
8684 // Remove basic blocks that have no predecessors (except the entry block)...
8685 // or that just have themself as a predecessor. These are unreachable.
8686 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8687 BB->getSinglePredecessor() == BB) {
8688 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8689 DeleteDeadBlock(BB, DTU);
8690 return true;
8691 }
8692
8693 // Check to see if we can constant propagate this terminator instruction
8694 // away...
8695 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8696 /*TLI=*/nullptr, DTU);
8697
8698 // Check for and eliminate duplicate PHI nodes in this block.
8700
8701 // Check for and remove branches that will always cause undefined behavior.
8703 return requestResimplify();
8704
8705 // Merge basic blocks into their predecessor if there is only one distinct
8706 // pred, and if there is only one distinct successor of the predecessor, and
8707 // if there are no PHI nodes.
8708 if (MergeBlockIntoPredecessor(BB, DTU))
8709 return true;
8710
8711 if (SinkCommon && Options.SinkCommonInsts)
8712 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8713 mergeCompatibleInvokes(BB, DTU)) {
8714 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8715 // so we may now how duplicate PHI's.
8716 // Let's rerun EliminateDuplicatePHINodes() first,
8717 // before foldTwoEntryPHINode() potentially converts them into select's,
8718 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8719 return true;
8720 }
8721
8722 IRBuilder<> Builder(BB);
8723
8724 if (Options.SpeculateBlocks &&
8725 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8726 // If there is a trivial two-entry PHI node in this basic block, and we can
8727 // eliminate it, do so now.
8728 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8729 if (PN->getNumIncomingValues() == 2)
8730 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8731 Options.SpeculateUnpredictables))
8732 return true;
8733 }
8734
8736 Builder.SetInsertPoint(Terminator);
8737 switch (Terminator->getOpcode()) {
8738 case Instruction::Br:
8739 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8740 break;
8741 case Instruction::Resume:
8742 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8743 break;
8744 case Instruction::CleanupRet:
8745 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8746 break;
8747 case Instruction::Switch:
8748 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8749 break;
8750 case Instruction::Unreachable:
8751 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8752 break;
8753 case Instruction::IndirectBr:
8754 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8755 break;
8756 }
8757
8758 return Changed;
8759}
8760
8761bool SimplifyCFGOpt::run(BasicBlock *BB) {
8762 bool Changed = false;
8763
8764 // Repeated simplify BB as long as resimplification is requested.
8765 do {
8766 Resimplify = false;
8767
8768 // Perform one round of simplifcation. Resimplify flag will be set if
8769 // another iteration is requested.
8770 Changed |= simplifyOnce(BB);
8771 } while (Resimplify);
8772
8773 return Changed;
8774}
8775
8778 ArrayRef<WeakVH> LoopHeaders) {
8779 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8780 Options)
8781 .run(BB);
8782}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:152
const T & front() const
front - Get the first element.
Definition ArrayRef.h:146
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:193
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, StringRef PassName)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1688
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2128
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3871
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1703
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1582
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2088
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257