LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->getSuccessor() != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
853 Cond = dyn_cast<Instruction>(BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
855 Cond = dyn_cast<Instruction>(IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI))
873 if (BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
876 CV = ICI->getOperand(0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
901 Cases.reserve(SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 CondBrInst *BI = cast<CondBrInst>(TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(ICI->getOperand(1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Cond);
918 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(Cases, BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(V1, V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(V1->begin(), V1->end());
954 array_pod_sort(V2->begin(), V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
990 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
995 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(PredCases, ThisCases))
1004 return false;
1005
1006 if (isa<CondBrInst>(TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(i->getCaseValue())) {
1046 Successor->removePredecessor(PredDef);
1047 SI.removeCase(i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Succ);
1098 Succ->removePredecessor(TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(Weights.front(), Weights.back());
1163 }
1164}
1165
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 Instruction *NewBonusInst = BonusInst.clone();
1178
1179 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1180 // Unless the instruction has the same !dbg location as the original
1181 // branch, drop it. When we fold the bonus instructions we want to make
1182 // sure we reset their debug locations in order to avoid stepping on
1183 // dead code caused by folding dead branches.
1184 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1185 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1186 mapAtomInstance(DL, VMap);
1187 }
1188
1189 RemapInstruction(NewBonusInst, VMap,
1191
1192 // If we speculated an instruction, we need to drop any metadata that may
1193 // result in undefined behavior, as the metadata might have been valid
1194 // only given the branch precondition.
1195 // Similarly strip attributes on call parameters that may cause UB in
1196 // location the call is moved to.
1197 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1198
1199 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1200 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1201 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1203
1204 NewBonusInst->takeName(&BonusInst);
1205 BonusInst.setName(NewBonusInst->getName() + ".old");
1206 VMap[&BonusInst] = NewBonusInst;
1207
1208 // Update (liveout) uses of bonus instructions,
1209 // now that the bonus instruction has been cloned into predecessor.
1210 // Note that we expect to be in a block-closed SSA form for this to work!
1211 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1212 auto *UI = cast<Instruction>(U.getUser());
1213 auto *PN = dyn_cast<PHINode>(UI);
1214 if (!PN) {
1215 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1216 "If the user is not a PHI node, then it should be in the same "
1217 "block as, and come after, the original bonus instruction.");
1218 continue; // Keep using the original bonus instruction.
1219 }
1220 // Is this the block-closed SSA form PHI node?
1221 if (PN->getIncomingBlock(U) == BB)
1222 continue; // Great, keep using the original bonus instruction.
1223 // The only other alternative is an "use" when coming from
1224 // the predecessor block - here we should refer to the cloned bonus instr.
1225 assert(PN->getIncomingBlock(U) == PredBlock &&
1226 "Not in block-closed SSA form?");
1227 U.set(NewBonusInst);
1228 }
1229 }
1230
1231 // Key Instructions: We may have propagated atom info into the pred. If the
1232 // pred's terminator already has atom info do nothing as merging would drop
1233 // one atom group anyway. If it doesn't, propagte the remapped atom group
1234 // from BB's terminator.
1235 if (auto &PredDL = PTI->getDebugLoc()) {
1236 auto &DL = BB->getTerminator()->getDebugLoc();
1237 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1238 PredDL.isSameSourceLocation(DL)) {
1239 PTI->setDebugLoc(DL);
1240 RemapSourceAtom(PTI, VMap);
1241 }
1242 }
1243}
1244
1245bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1246 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1247 BasicBlock *BB = TI->getParent();
1248 BasicBlock *Pred = PTI->getParent();
1249
1251
1252 // Figure out which 'cases' to copy from SI to PSI.
1253 std::vector<ValueEqualityComparisonCase> BBCases;
1254 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1255
1256 std::vector<ValueEqualityComparisonCase> PredCases;
1257 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1258
1259 // Based on whether the default edge from PTI goes to BB or not, fill in
1260 // PredCases and PredDefault with the new switch cases we would like to
1261 // build.
1262 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1263
1264 // Update the branch weight metadata along the way
1265 SmallVector<uint64_t, 8> Weights;
1266 bool PredHasWeights = hasBranchWeightMD(*PTI);
1267 bool SuccHasWeights = hasBranchWeightMD(*TI);
1268
1269 if (PredHasWeights) {
1270 getBranchWeights(PTI, Weights);
1271 // branch-weight metadata is inconsistent here.
1272 if (Weights.size() != 1 + PredCases.size())
1273 PredHasWeights = SuccHasWeights = false;
1274 } else if (SuccHasWeights)
1275 // If there are no predecessor weights but there are successor weights,
1276 // populate Weights with 1, which will later be scaled to the sum of
1277 // successor's weights
1278 Weights.assign(1 + PredCases.size(), 1);
1279
1280 SmallVector<uint64_t, 8> SuccWeights;
1281 if (SuccHasWeights) {
1282 getBranchWeights(TI, SuccWeights);
1283 // branch-weight metadata is inconsistent here.
1284 if (SuccWeights.size() != 1 + BBCases.size())
1285 PredHasWeights = SuccHasWeights = false;
1286 } else if (PredHasWeights)
1287 SuccWeights.assign(1 + BBCases.size(), 1);
1288
1289 if (PredDefault == BB) {
1290 // If this is the default destination from PTI, only the edges in TI
1291 // that don't occur in PTI, or that branch to BB will be activated.
1292 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1293 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1294 if (PredCases[i].Dest != BB)
1295 PTIHandled.insert(PredCases[i].Value);
1296 else {
1297 // The default destination is BB, we don't need explicit targets.
1298 std::swap(PredCases[i], PredCases.back());
1299
1300 if (PredHasWeights || SuccHasWeights) {
1301 // Increase weight for the default case.
1302 Weights[0] += Weights[i + 1];
1303 std::swap(Weights[i + 1], Weights.back());
1304 Weights.pop_back();
1305 }
1306
1307 PredCases.pop_back();
1308 --i;
1309 --e;
1310 }
1311
1312 // Reconstruct the new switch statement we will be building.
1313 if (PredDefault != BBDefault) {
1314 PredDefault->removePredecessor(Pred);
1315 if (DTU && PredDefault != BB)
1316 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1317 PredDefault = BBDefault;
1318 ++NewSuccessors[BBDefault];
1319 }
1320
1321 unsigned CasesFromPred = Weights.size();
1322 uint64_t ValidTotalSuccWeight = 0;
1323 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1324 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1325 PredCases.push_back(BBCases[i]);
1326 ++NewSuccessors[BBCases[i].Dest];
1327 if (SuccHasWeights || PredHasWeights) {
1328 // The default weight is at index 0, so weight for the ith case
1329 // should be at index i+1. Scale the cases from successor by
1330 // PredDefaultWeight (Weights[0]).
1331 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1332 ValidTotalSuccWeight += SuccWeights[i + 1];
1333 }
1334 }
1335
1336 if (SuccHasWeights || PredHasWeights) {
1337 ValidTotalSuccWeight += SuccWeights[0];
1338 // Scale the cases from predecessor by ValidTotalSuccWeight.
1339 for (unsigned i = 1; i < CasesFromPred; ++i)
1340 Weights[i] *= ValidTotalSuccWeight;
1341 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1342 Weights[0] *= SuccWeights[0];
1343 }
1344 } else {
1345 // If this is not the default destination from PSI, only the edges
1346 // in SI that occur in PSI with a destination of BB will be
1347 // activated.
1348 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1349 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1350 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1351 if (PredCases[i].Dest == BB) {
1352 PTIHandled.insert(PredCases[i].Value);
1353
1354 if (PredHasWeights || SuccHasWeights) {
1355 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1356 std::swap(Weights[i + 1], Weights.back());
1357 Weights.pop_back();
1358 }
1359
1360 std::swap(PredCases[i], PredCases.back());
1361 PredCases.pop_back();
1362 --i;
1363 --e;
1364 }
1365
1366 // Okay, now we know which constants were sent to BB from the
1367 // predecessor. Figure out where they will all go now.
1368 for (const ValueEqualityComparisonCase &Case : BBCases)
1369 if (PTIHandled.count(Case.Value)) {
1370 // If this is one we are capable of getting...
1371 if (PredHasWeights || SuccHasWeights)
1372 Weights.push_back(WeightsForHandled[Case.Value]);
1373 PredCases.push_back(Case);
1374 ++NewSuccessors[Case.Dest];
1375 PTIHandled.erase(Case.Value); // This constant is taken care of
1376 }
1377
1378 // If there are any constants vectored to BB that TI doesn't handle,
1379 // they must go to the default destination of TI.
1380 for (ConstantInt *I : PTIHandled) {
1381 if (PredHasWeights || SuccHasWeights)
1382 Weights.push_back(WeightsForHandled[I]);
1383 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1384 ++NewSuccessors[BBDefault];
1385 }
1386 }
1387
1388 // Okay, at this point, we know which new successor Pred will get. Make
1389 // sure we update the number of entries in the PHI nodes for these
1390 // successors.
1391 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1392 if (DTU) {
1393 SuccsOfPred = {llvm::from_range, successors(Pred)};
1394 Updates.reserve(Updates.size() + NewSuccessors.size());
1395 }
1396 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1397 NewSuccessors) {
1398 for (auto I : seq(NewSuccessor.second)) {
1399 (void)I;
1400 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1401 }
1402 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1403 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1404 }
1405
1406 Builder.SetInsertPoint(PTI);
1407 // Convert pointer to int before we switch.
1408 if (CV->getType()->isPointerTy()) {
1409 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1410 "Should not end up here with unstable pointers");
1411 CV =
1412 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1413 }
1414
1415 // Now that the successors are updated, create the new Switch instruction.
1416 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1417 NewSI->setDebugLoc(PTI->getDebugLoc());
1418 for (ValueEqualityComparisonCase &V : PredCases)
1419 NewSI->addCase(V.Value, V.Dest);
1420
1421 if (PredHasWeights || SuccHasWeights)
1422 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1423 /*ElideAllZero=*/true);
1424
1426
1427 // Okay, last check. If BB is still a successor of PSI, then we must
1428 // have an infinite loop case. If so, add an infinitely looping block
1429 // to handle the case to preserve the behavior of the code.
1430 BasicBlock *InfLoopBlock = nullptr;
1431 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1432 if (NewSI->getSuccessor(i) == BB) {
1433 if (!InfLoopBlock) {
1434 // Insert it at the end of the function, because it's either code,
1435 // or it won't matter if it's hot. :)
1436 InfLoopBlock =
1437 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1438 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
1439 if (DTU)
1440 Updates.push_back(
1441 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1442 }
1443 NewSI->setSuccessor(i, InfLoopBlock);
1444 }
1445
1446 if (DTU) {
1447 if (InfLoopBlock)
1448 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1449
1450 Updates.push_back({DominatorTree::Delete, Pred, BB});
1451
1452 DTU->applyUpdates(Updates);
1453 }
1454
1455 ++NumFoldValueComparisonIntoPredecessors;
1456 return true;
1457}
1458
1459/// The specified terminator is a value equality comparison instruction
1460/// (either a switch or a branch on "X == c").
1461/// See if any of the predecessors of the terminator block are value comparisons
1462/// on the same value. If so, and if safe to do so, fold them together.
1463bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1464 IRBuilder<> &Builder) {
1465 BasicBlock *BB = TI->getParent();
1466 Value *CV = isValueEqualityComparison(TI); // CondVal
1467 assert(CV && "Not a comparison?");
1468
1469 bool Changed = false;
1470
1471 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1472 while (!Preds.empty()) {
1473 BasicBlock *Pred = Preds.pop_back_val();
1474 Instruction *PTI = Pred->getTerminator();
1475
1476 // Don't try to fold into itself.
1477 if (Pred == BB)
1478 continue;
1479
1480 // See if the predecessor is a comparison with the same value.
1481 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1482 if (PCV != CV)
1483 continue;
1484
1485 SmallSetVector<BasicBlock *, 4> FailBlocks;
1486 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1487 for (auto *Succ : FailBlocks) {
1488 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1489 return false;
1490 }
1491 }
1492
1493 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1494 Changed = true;
1495 }
1496 return Changed;
1497}
1498
1499// If we would need to insert a select that uses the value of this invoke
1500// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1501// need to do this), we can't hoist the invoke, as there is nowhere to put the
1502// select in this case.
1504 Instruction *I1, Instruction *I2) {
1505 for (BasicBlock *Succ : successors(BB1)) {
1506 for (const PHINode &PN : Succ->phis()) {
1507 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1508 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1509 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1510 return false;
1511 }
1512 }
1513 }
1514 return true;
1515}
1516
1517// Get interesting characteristics of instructions that
1518// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1519// instructions can be reordered across.
1525
1527 unsigned Flags = 0;
1528 if (I->mayReadFromMemory())
1529 Flags |= SkipReadMem;
1530 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1531 // inalloca) across stacksave/stackrestore boundaries.
1532 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1533 Flags |= SkipSideEffect;
1535 Flags |= SkipImplicitControlFlow;
1536 return Flags;
1537}
1538
1539// Returns true if it is safe to reorder an instruction across preceding
1540// instructions in a basic block.
1541static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1542 // Don't reorder a store over a load.
1543 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1544 return false;
1545
1546 // If we have seen an instruction with side effects, it's unsafe to reorder an
1547 // instruction which reads memory or itself has side effects.
1548 if ((Flags & SkipSideEffect) &&
1549 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1550 return false;
1551
1552 // Reordering across an instruction which does not necessarily transfer
1553 // control to the next instruction is speculation.
1555 return false;
1556
1557 // Hoisting of llvm.deoptimize is only legal together with the next return
1558 // instruction, which this pass is not always able to do.
1559 if (auto *CB = dyn_cast<CallBase>(I))
1560 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1561 return false;
1562
1563 // It's also unsafe/illegal to hoist an instruction above its instruction
1564 // operands
1565 BasicBlock *BB = I->getParent();
1566 for (Value *Op : I->operands()) {
1567 if (auto *J = dyn_cast<Instruction>(Op))
1568 if (J->getParent() == BB)
1569 return false;
1570 }
1571
1572 return true;
1573}
1574
1575static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1576
1577/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1578/// instructions \p I1 and \p I2 can and should be hoisted.
1580 const TargetTransformInfo &TTI) {
1581 // If we're going to hoist a call, make sure that the two instructions
1582 // we're commoning/hoisting are both marked with musttail, or neither of
1583 // them is marked as such. Otherwise, we might end up in a situation where
1584 // we hoist from a block where the terminator is a `ret` to a block where
1585 // the terminator is a `br`, and `musttail` calls expect to be followed by
1586 // a return.
1587 auto *C1 = dyn_cast<CallInst>(I1);
1588 auto *C2 = dyn_cast<CallInst>(I2);
1589 if (C1 && C2)
1590 if (C1->isMustTailCall() != C2->isMustTailCall())
1591 return false;
1592
1593 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1594 return false;
1595
1596 // If any of the two call sites has nomerge or convergent attribute, stop
1597 // hoisting.
1598 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1599 if (CB1->cannotMerge() || CB1->isConvergent())
1600 return false;
1601 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1602 if (CB2->cannotMerge() || CB2->isConvergent())
1603 return false;
1604
1605 return true;
1606}
1607
1608/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1609/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1610/// hoistCommonCodeFromSuccessors. e.g. The input:
1611/// I1 DVRs: { x, z },
1612/// OtherInsts: { I2 DVRs: { x, y, z } }
1613/// would result in hoisting only DbgVariableRecord x.
1615 Instruction *TI, Instruction *I1,
1616 SmallVectorImpl<Instruction *> &OtherInsts) {
1617 if (!I1->hasDbgRecords())
1618 return;
1619 using CurrentAndEndIt =
1620 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1621 // Vector of {Current, End} iterators.
1623 Itrs.reserve(OtherInsts.size() + 1);
1624 // Helper lambdas for lock-step checks:
1625 // Return true if this Current == End.
1626 auto atEnd = [](const CurrentAndEndIt &Pair) {
1627 return Pair.first == Pair.second;
1628 };
1629 // Return true if all Current are identical.
1630 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1631 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1633 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1634 });
1635 };
1636
1637 // Collect the iterators.
1638 Itrs.push_back(
1639 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1640 for (Instruction *Other : OtherInsts) {
1641 if (!Other->hasDbgRecords())
1642 return;
1643 Itrs.push_back(
1644 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1645 }
1646
1647 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1648 // the lock-step DbgRecord are identical, hoist all of them to TI.
1649 // This replicates the dbg.* intrinsic behaviour in
1650 // hoistCommonCodeFromSuccessors.
1651 while (none_of(Itrs, atEnd)) {
1652 bool HoistDVRs = allIdentical(Itrs);
1653 for (CurrentAndEndIt &Pair : Itrs) {
1654 // Increment Current iterator now as we may be about to move the
1655 // DbgRecord.
1656 DbgRecord &DR = *Pair.first++;
1657 if (HoistDVRs) {
1658 DR.removeFromParent();
1659 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1660 }
1661 }
1662 }
1663}
1664
1666 const Instruction *I2) {
1667 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1668 return true;
1669
1670 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1671 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1672 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1673 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1674 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1675
1676 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1677 return I1->getOperand(0) == I2->getOperand(1) &&
1678 I1->getOperand(1) == I2->getOperand(0) &&
1679 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1680 }
1681
1682 return false;
1683}
1684
1685/// If the target supports conditional faulting,
1686/// we look for the following pattern:
1687/// \code
1688/// BB:
1689/// ...
1690/// %cond = icmp ult %x, %y
1691/// br i1 %cond, label %TrueBB, label %FalseBB
1692/// FalseBB:
1693/// store i32 1, ptr %q, align 4
1694/// ...
1695/// TrueBB:
1696/// %maskedloadstore = load i32, ptr %b, align 4
1697/// store i32 %maskedloadstore, ptr %p, align 4
1698/// ...
1699/// \endcode
1700///
1701/// and transform it into:
1702///
1703/// \code
1704/// BB:
1705/// ...
1706/// %cond = icmp ult %x, %y
1707/// %maskedloadstore = cload i32, ptr %b, %cond
1708/// cstore i32 %maskedloadstore, ptr %p, %cond
1709/// cstore i32 1, ptr %q, ~%cond
1710/// br i1 %cond, label %TrueBB, label %FalseBB
1711/// FalseBB:
1712/// ...
1713/// TrueBB:
1714/// ...
1715/// \endcode
1716///
1717/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1718/// e.g.
1719///
1720/// \code
1721/// %vcond = bitcast i1 %cond to <1 x i1>
1722/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1723/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1724/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1725/// call void @llvm.masked.store.v1i32.p0
1726/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1727/// %cond.not = xor i1 %cond, true
1728/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1729/// call void @llvm.masked.store.v1i32.p0
1730/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1731/// \endcode
1732///
1733/// So we need to turn hoisted load/store into cload/cstore.
1734///
1735/// \param BI The branch instruction.
1736/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1737/// will be speculated.
1738/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1740 CondBrInst *BI,
1741 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1742 std::optional<bool> Invert, Instruction *Sel) {
1743 auto &Context = BI->getParent()->getContext();
1744 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1745 auto *Cond = BI->getCondition();
1746 // Construct the condition if needed.
1747 BasicBlock *BB = BI->getParent();
1748 Value *Mask = nullptr;
1749 Value *MaskFalse = nullptr;
1750 Value *MaskTrue = nullptr;
1751 if (Invert.has_value()) {
1752 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1753 Mask = Builder.CreateBitCast(
1754 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1755 VCondTy);
1756 } else {
1757 IRBuilder<> Builder(BI);
1758 MaskFalse = Builder.CreateBitCast(
1759 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1760 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1761 }
1762 auto PeekThroughBitcasts = [](Value *V) {
1763 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1764 V = BitCast->getOperand(0);
1765 return V;
1766 };
1767 for (auto *I : SpeculatedConditionalLoadsStores) {
1768 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1769 if (!Invert.has_value())
1770 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1771 // We currently assume conditional faulting load/store is supported for
1772 // scalar types only when creating new instructions. This can be easily
1773 // extended for vector types in the future.
1774 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1775 auto *Op0 = I->getOperand(0);
1776 CallInst *MaskedLoadStore = nullptr;
1777 if (auto *LI = dyn_cast<LoadInst>(I)) {
1778 // Handle Load.
1779 auto *Ty = I->getType();
1780 PHINode *PN = nullptr;
1781 Value *PassThru = nullptr;
1782 if (Invert.has_value())
1783 for (User *U : I->users()) {
1784 if ((PN = dyn_cast<PHINode>(U))) {
1785 PassThru = Builder.CreateBitCast(
1786 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1787 FixedVectorType::get(Ty, 1));
1788 } else if (auto *Ins = cast<Instruction>(U);
1789 Sel && Ins->getParent() == BB) {
1790 // This happens when store or/and a speculative instruction between
1791 // load and store were hoisted to the BB. Make sure the masked load
1792 // inserted before its use.
1793 // We assume there's one of such use.
1794 Builder.SetInsertPoint(Ins);
1795 }
1796 }
1797 MaskedLoadStore = Builder.CreateMaskedLoad(
1798 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1799 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1800 if (PN)
1801 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1802 I->replaceAllUsesWith(NewLoadStore);
1803 } else {
1804 // Handle Store.
1805 auto *StoredVal = Builder.CreateBitCast(
1806 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1807 MaskedLoadStore = Builder.CreateMaskedStore(
1808 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1809 }
1810 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1811 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1812 //
1813 // !nonnull, !align : Not support pointer type, no need to keep.
1814 // !range: Load type is changed from scalar to vector, but the metadata on
1815 // vector specifies a per-element range, so the semantics stay the
1816 // same. Keep it.
1817 // !annotation: Not impact semantics. Keep it.
1818 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1819 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1820 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1821 // FIXME: DIAssignID is not supported for masked store yet.
1822 // (Verifier::visitDIAssignIDMetadata)
1824 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1825 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1826 });
1827 MaskedLoadStore->copyMetadata(*I);
1828 I->eraseFromParent();
1829 }
1830}
1831
1833 const TargetTransformInfo &TTI) {
1834 // Not handle volatile or atomic.
1835 bool IsStore = false;
1836 if (auto *L = dyn_cast<LoadInst>(I)) {
1837 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1838 return false;
1839 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1840 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1841 return false;
1842 IsStore = true;
1843 } else
1844 return false;
1845
1846 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1847 // That's why we have the alignment limitation.
1848 // FIXME: Update the prototype of the intrinsics?
1849 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1851}
1852
1853/// Hoist any common code in the successor blocks up into the block. This
1854/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1855/// given, only perform hoisting in case all successors blocks contain matching
1856/// instructions only. In that case, all instructions can be hoisted and the
1857/// original branch will be replaced and selects for PHIs are added.
1858bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1859 bool AllInstsEqOnly) {
1860 // This does very trivial matching, with limited scanning, to find identical
1861 // instructions in the two blocks. In particular, we don't want to get into
1862 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1863 // such, we currently just scan for obviously identical instructions in an
1864 // identical order, possibly separated by the same number of non-identical
1865 // instructions.
1866 BasicBlock *BB = TI->getParent();
1867 unsigned int SuccSize = succ_size(BB);
1868 if (SuccSize < 2)
1869 return false;
1870
1871 // If either of the blocks has it's address taken, then we can't do this fold,
1872 // because the code we'd hoist would no longer run when we jump into the block
1873 // by it's address.
1874 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1875 for (auto *Succ : UniqueSuccessors) {
1876 if (Succ->hasAddressTaken())
1877 return false;
1878 // Use getUniquePredecessor instead of getSinglePredecessor to support
1879 // multi-cases successors in switch.
1880 if (Succ->getUniquePredecessor())
1881 continue;
1882 // If Succ has >1 predecessors, continue to check if the Succ contains only
1883 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1884 // can relax the condition based on the assumptiom that the program would
1885 // never enter Succ and trigger such an UB.
1886 if (isa<UnreachableInst>(*Succ->begin()))
1887 continue;
1888 return false;
1889 }
1890 // The second of pair is a SkipFlags bitmask.
1891 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1892 SmallVector<SuccIterPair, 8> SuccIterPairs;
1893 for (auto *Succ : UniqueSuccessors) {
1894 BasicBlock::iterator SuccItr = Succ->begin();
1895 if (isa<PHINode>(*SuccItr))
1896 return false;
1897 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1898 }
1899
1900 if (AllInstsEqOnly) {
1901 // Check if all instructions in the successor blocks match. This allows
1902 // hoisting all instructions and removing the blocks we are hoisting from,
1903 // so does not add any new instructions.
1904
1905 // Check if sizes and terminators of all successors match.
1906 unsigned Size0 = UniqueSuccessors[0]->size();
1907 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1908 bool AllSame =
1909 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1910 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1911 Succ->size() == Size0;
1912 });
1913 if (!AllSame)
1914 return false;
1915 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1916 while (LRI.isValid()) {
1917 Instruction *I0 = (*LRI)[0];
1918 if (any_of(*LRI, [I0](Instruction *I) {
1919 return !areIdenticalUpToCommutativity(I0, I);
1920 })) {
1921 return false;
1922 }
1923 --LRI;
1924 }
1925 // Now we know that all instructions in all successors can be hoisted. Let
1926 // the loop below handle the hoisting.
1927 }
1928
1929 // Count how many instructions were not hoisted so far. There's a limit on how
1930 // many instructions we skip, serving as a compilation time control as well as
1931 // preventing excessive increase of life ranges.
1932 unsigned NumSkipped = 0;
1933 // If we find an unreachable instruction at the beginning of a basic block, we
1934 // can still hoist instructions from the rest of the basic blocks.
1935 if (SuccIterPairs.size() > 2) {
1936 erase_if(SuccIterPairs,
1937 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1938 if (SuccIterPairs.size() < 2)
1939 return false;
1940 }
1941
1942 bool Changed = false;
1943
1944 for (;;) {
1945 auto *SuccIterPairBegin = SuccIterPairs.begin();
1946 auto &BB1ItrPair = *SuccIterPairBegin++;
1947 auto OtherSuccIterPairRange =
1948 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1949 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1950
1951 Instruction *I1 = &*BB1ItrPair.first;
1952
1953 bool AllInstsAreIdentical = true;
1954 bool HasTerminator = I1->isTerminator();
1955 for (auto &SuccIter : OtherSuccIterRange) {
1956 Instruction *I2 = &*SuccIter;
1957 HasTerminator |= I2->isTerminator();
1958 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1959 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1960 AllInstsAreIdentical = false;
1961 }
1962
1963 SmallVector<Instruction *, 8> OtherInsts;
1964 for (auto &SuccIter : OtherSuccIterRange)
1965 OtherInsts.push_back(&*SuccIter);
1966
1967 // If we are hoisting the terminator instruction, don't move one (making a
1968 // broken BB), instead clone it, and remove BI.
1969 if (HasTerminator) {
1970 // Even if BB, which contains only one unreachable instruction, is ignored
1971 // at the beginning of the loop, we can hoist the terminator instruction.
1972 // If any instructions remain in the block, we cannot hoist terminators.
1973 if (NumSkipped || !AllInstsAreIdentical) {
1974 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1975 return Changed;
1976 }
1977
1978 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1979 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1980 Changed;
1981 }
1982
1983 if (AllInstsAreIdentical) {
1984 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1985 AllInstsAreIdentical =
1986 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1987 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1988 Instruction *I2 = &*Pair.first;
1989 unsigned SkipFlagsBB2 = Pair.second;
1990 // Even if the instructions are identical, it may not
1991 // be safe to hoist them if we have skipped over
1992 // instructions with side effects or their operands
1993 // weren't hoisted.
1994 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1996 });
1997 }
1998
1999 if (AllInstsAreIdentical) {
2000 BB1ItrPair.first++;
2001 // For a normal instruction, we just move one to right before the
2002 // branch, then replace all uses of the other with the first. Finally,
2003 // we remove the now redundant second instruction.
2004 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2005 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2006 // and leave any that were not hoisted behind (by calling moveBefore
2007 // rather than moveBeforePreserving).
2008 I1->moveBefore(TI->getIterator());
2009 for (auto &SuccIter : OtherSuccIterRange) {
2010 Instruction *I2 = &*SuccIter++;
2011 assert(I2 != I1);
2012 if (!I2->use_empty())
2013 I2->replaceAllUsesWith(I1);
2014 I1->andIRFlags(I2);
2015 if (auto *CB = dyn_cast<CallBase>(I1)) {
2016 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2017 assert(Success && "We should not be trying to hoist callbases "
2018 "with non-intersectable attributes");
2019 // For NDEBUG Compile.
2020 (void)Success;
2021 }
2022
2023 combineMetadataForCSE(I1, I2, true);
2024 // I1 and I2 are being combined into a single instruction. Its debug
2025 // location is the merged locations of the original instructions.
2026 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2027 I2->eraseFromParent();
2028 }
2029 if (!Changed)
2030 NumHoistCommonCode += SuccIterPairs.size();
2031 Changed = true;
2032 NumHoistCommonInstrs += SuccIterPairs.size();
2033 } else {
2034 if (NumSkipped >= HoistCommonSkipLimit) {
2035 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2036 return Changed;
2037 }
2038 // We are about to skip over a pair of non-identical instructions. Record
2039 // if any have characteristics that would prevent reordering instructions
2040 // across them.
2041 for (auto &SuccIterPair : SuccIterPairs) {
2042 Instruction *I = &*SuccIterPair.first++;
2043 SuccIterPair.second |= skippedInstrFlags(I);
2044 }
2045 ++NumSkipped;
2046 }
2047 }
2048}
2049
2050bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2051 Instruction *TI, Instruction *I1,
2052 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2053 ArrayRef<BasicBlock *> UniqueSuccessors) {
2054
2055 auto *BI = dyn_cast<CondBrInst>(TI);
2056
2057 bool Changed = false;
2058 BasicBlock *TIParent = TI->getParent();
2059 BasicBlock *BB1 = I1->getParent();
2060
2061 // Use only for an if statement.
2062 auto *I2 = *OtherSuccTIs.begin();
2063 auto *BB2 = I2->getParent();
2064 if (BI) {
2065 assert(OtherSuccTIs.size() == 1);
2066 assert(BI->getSuccessor(0) == I1->getParent());
2067 assert(BI->getSuccessor(1) == I2->getParent());
2068 }
2069
2070 // In the case of an if statement, we try to hoist an invoke.
2071 // FIXME: Can we define a safety predicate for CallBr?
2072 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2073 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2074 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2075 return false;
2076
2077 // TODO: callbr hoisting currently disabled pending further study.
2078 if (isa<CallBrInst>(I1))
2079 return false;
2080
2081 for (BasicBlock *Succ : successors(BB1)) {
2082 for (PHINode &PN : Succ->phis()) {
2083 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2084 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2085 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2086 if (BB1V == BB2V)
2087 continue;
2088
2089 // In the case of an if statement, check for
2090 // passingValueIsAlwaysUndefined here because we would rather eliminate
2091 // undefined control flow then converting it to a select.
2092 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2094 return false;
2095 }
2096 }
2097 }
2098
2099 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2100 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2101 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2102 // Clone the terminator and hoist it into the pred, without any debug info.
2103 Instruction *NT = I1->clone();
2104 NT->insertInto(TIParent, TI->getIterator());
2105 if (!NT->getType()->isVoidTy()) {
2106 I1->replaceAllUsesWith(NT);
2107 for (Instruction *OtherSuccTI : OtherSuccTIs)
2108 OtherSuccTI->replaceAllUsesWith(NT);
2109 NT->takeName(I1);
2110 }
2111 Changed = true;
2112 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2113
2114 // Ensure terminator gets a debug location, even an unknown one, in case
2115 // it involves inlinable calls.
2117 Locs.push_back(I1->getDebugLoc());
2118 for (auto *OtherSuccTI : OtherSuccTIs)
2119 Locs.push_back(OtherSuccTI->getDebugLoc());
2120 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2121
2122 // PHIs created below will adopt NT's merged DebugLoc.
2123 IRBuilder<NoFolder> Builder(NT);
2124
2125 // In the case of an if statement, hoisting one of the terminators from our
2126 // successor is a great thing. Unfortunately, the successors of the if/else
2127 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2128 // must agree for all PHI nodes, so we insert select instruction to compute
2129 // the final result.
2130 if (BI) {
2131 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2132 for (BasicBlock *Succ : successors(BB1)) {
2133 for (PHINode &PN : Succ->phis()) {
2134 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2135 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2136 if (BB1V == BB2V)
2137 continue;
2138
2139 // These values do not agree. Insert a select instruction before NT
2140 // that determines the right value.
2141 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2142 if (!SI) {
2143 // Propagate fast-math-flags from phi node to its replacement select.
2145 BI->getCondition(), BB1V, BB2V,
2146 isa<FPMathOperator>(PN) ? &PN : nullptr,
2147 BB1V->getName() + "." + BB2V->getName(), BI));
2148 }
2149
2150 // Make the PHI node use the select for all incoming values for BB1/BB2
2151 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2152 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2153 PN.setIncomingValue(i, SI);
2154 }
2155 }
2156 }
2157
2159
2160 // Update any PHI nodes in our new successors.
2161 for (BasicBlock *Succ : successors(BB1)) {
2162 addPredecessorToBlock(Succ, TIParent, BB1);
2163 if (DTU)
2164 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2165 }
2166
2167 if (DTU) {
2168 // TI might be a switch with multi-cases destination, so we need to care for
2169 // the duplication of successors.
2170 for (BasicBlock *Succ : UniqueSuccessors)
2171 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2172 }
2173
2175 if (DTU)
2176 DTU->applyUpdates(Updates);
2177 return Changed;
2178}
2179
2180// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2181// into variables.
2183 int OpIdx) {
2184 // Divide/Remainder by constant is typically much cheaper than by variable.
2185 if (I->isIntDivRem())
2186 return OpIdx != 1;
2187 return !isa<IntrinsicInst>(I);
2188}
2189
2190// All instructions in Insts belong to different blocks that all unconditionally
2191// branch to a common successor. Analyze each instruction and return true if it
2192// would be possible to sink them into their successor, creating one common
2193// instruction instead. For every value that would be required to be provided by
2194// PHI node (because an operand varies in each input block), add to PHIOperands.
2197 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2198 // Prune out obviously bad instructions to move. Each instruction must have
2199 // the same number of uses, and we check later that the uses are consistent.
2200 std::optional<unsigned> NumUses;
2201 for (auto *I : Insts) {
2202 // These instructions may change or break semantics if moved.
2203 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2204 I->getType()->isTokenTy())
2205 return false;
2206
2207 // Do not try to sink an instruction in an infinite loop - it can cause
2208 // this algorithm to infinite loop.
2209 if (I->getParent()->getSingleSuccessor() == I->getParent())
2210 return false;
2211
2212 // Conservatively return false if I is an inline-asm instruction. Sinking
2213 // and merging inline-asm instructions can potentially create arguments
2214 // that cannot satisfy the inline-asm constraints.
2215 // If the instruction has nomerge or convergent attribute, return false.
2216 if (const auto *C = dyn_cast<CallBase>(I))
2217 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2218 return false;
2219
2220 if (!NumUses)
2221 NumUses = I->getNumUses();
2222 else if (NumUses != I->getNumUses())
2223 return false;
2224 }
2225
2226 const Instruction *I0 = Insts.front();
2227 const auto I0MMRA = MMRAMetadata(*I0);
2228 for (auto *I : Insts) {
2229 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2230 return false;
2231
2232 // Treat MMRAs conservatively. This pass can be quite aggressive and
2233 // could drop a lot of MMRAs otherwise.
2234 if (MMRAMetadata(*I) != I0MMRA)
2235 return false;
2236 }
2237
2238 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2239 // then the other phi operands must match the instructions from Insts. This
2240 // also has to hold true for any phi nodes that would be created as a result
2241 // of sinking. Both of these cases are represented by PhiOperands.
2242 for (const Use &U : I0->uses()) {
2243 auto It = PHIOperands.find(&U);
2244 if (It == PHIOperands.end())
2245 // There may be uses in other blocks when sinking into a loop header.
2246 return false;
2247 if (!equal(Insts, It->second))
2248 return false;
2249 }
2250
2251 // For calls to be sinkable, they must all be indirect, or have same callee.
2252 // I.e. if we have two direct calls to different callees, we don't want to
2253 // turn that into an indirect call. Likewise, if we have an indirect call,
2254 // and a direct call, we don't actually want to have a single indirect call.
2255 if (isa<CallBase>(I0)) {
2256 auto IsIndirectCall = [](const Instruction *I) {
2257 return cast<CallBase>(I)->isIndirectCall();
2258 };
2259 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2260 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2261 if (HaveIndirectCalls) {
2262 if (!AllCallsAreIndirect)
2263 return false;
2264 } else {
2265 // All callees must be identical.
2266 Value *Callee = nullptr;
2267 for (const Instruction *I : Insts) {
2268 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2269 if (!Callee)
2270 Callee = CurrCallee;
2271 else if (Callee != CurrCallee)
2272 return false;
2273 }
2274 }
2275 }
2276
2277 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2278 Value *Op = I0->getOperand(OI);
2279 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2280 assert(I->getNumOperands() == I0->getNumOperands());
2281 return I->getOperand(OI) == I0->getOperand(OI);
2282 };
2283 if (!all_of(Insts, SameAsI0)) {
2286 // We can't create a PHI from this GEP.
2287 return false;
2288 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2289 for (auto *I : Insts)
2290 Ops.push_back(I->getOperand(OI));
2291 }
2292 }
2293 return true;
2294}
2295
2296// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2297// instruction of every block in Blocks to their common successor, commoning
2298// into one instruction.
2300 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2301
2302 // canSinkInstructions returning true guarantees that every block has at
2303 // least one non-terminator instruction.
2305 for (auto *BB : Blocks) {
2306 Instruction *I = BB->getTerminator();
2307 I = I->getPrevNode();
2308 Insts.push_back(I);
2309 }
2310
2311 // We don't need to do any more checking here; canSinkInstructions should
2312 // have done it all for us.
2313 SmallVector<Value*, 4> NewOperands;
2314 Instruction *I0 = Insts.front();
2315 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2316 // This check is different to that in canSinkInstructions. There, we
2317 // cared about the global view once simplifycfg (and instcombine) have
2318 // completed - it takes into account PHIs that become trivially
2319 // simplifiable. However here we need a more local view; if an operand
2320 // differs we create a PHI and rely on instcombine to clean up the very
2321 // small mess we may make.
2322 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2323 return I->getOperand(O) != I0->getOperand(O);
2324 });
2325 if (!NeedPHI) {
2326 NewOperands.push_back(I0->getOperand(O));
2327 continue;
2328 }
2329
2330 // Create a new PHI in the successor block and populate it.
2331 auto *Op = I0->getOperand(O);
2332 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2333 auto *PN =
2334 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2335 PN->insertBefore(BBEnd->begin());
2336 for (auto *I : Insts)
2337 PN->addIncoming(I->getOperand(O), I->getParent());
2338 NewOperands.push_back(PN);
2339 }
2340
2341 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2342 // and move it to the start of the successor block.
2343 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2344 I0->getOperandUse(O).set(NewOperands[O]);
2345
2346 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2347
2348 // Update metadata and IR flags, and merge debug locations.
2349 for (auto *I : Insts)
2350 if (I != I0) {
2351 // The debug location for the "common" instruction is the merged locations
2352 // of all the commoned instructions. We start with the original location
2353 // of the "common" instruction and iteratively merge each location in the
2354 // loop below.
2355 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2356 // However, as N-way merge for CallInst is rare, so we use simplified API
2357 // instead of using complex API for N-way merge.
2358 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2359 combineMetadataForCSE(I0, I, true);
2360 I0->andIRFlags(I);
2361 if (auto *CB = dyn_cast<CallBase>(I0)) {
2362 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2363 assert(Success && "We should not be trying to sink callbases "
2364 "with non-intersectable attributes");
2365 // For NDEBUG Compile.
2366 (void)Success;
2367 }
2368 }
2369
2370 for (User *U : make_early_inc_range(I0->users())) {
2371 // canSinkLastInstruction checked that all instructions are only used by
2372 // phi nodes in a way that allows replacing the phi node with the common
2373 // instruction.
2374 auto *PN = cast<PHINode>(U);
2375 PN->replaceAllUsesWith(I0);
2376 PN->eraseFromParent();
2377 }
2378
2379 // Finally nuke all instructions apart from the common instruction.
2380 for (auto *I : Insts) {
2381 if (I == I0)
2382 continue;
2383 // The remaining uses are debug users, replace those with the common inst.
2384 // In most (all?) cases this just introduces a use-before-def.
2385 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2386 I->replaceAllUsesWith(I0);
2387 I->eraseFromParent();
2388 }
2389}
2390
2391/// Check whether BB's predecessors end with unconditional branches. If it is
2392/// true, sink any common code from the predecessors to BB.
2394 DomTreeUpdater *DTU) {
2395 // We support two situations:
2396 // (1) all incoming arcs are unconditional
2397 // (2) there are non-unconditional incoming arcs
2398 //
2399 // (2) is very common in switch defaults and
2400 // else-if patterns;
2401 //
2402 // if (a) f(1);
2403 // else if (b) f(2);
2404 //
2405 // produces:
2406 //
2407 // [if]
2408 // / \
2409 // [f(1)] [if]
2410 // | | \
2411 // | | |
2412 // | [f(2)]|
2413 // \ | /
2414 // [ end ]
2415 //
2416 // [end] has two unconditional predecessor arcs and one conditional. The
2417 // conditional refers to the implicit empty 'else' arc. This conditional
2418 // arc can also be caused by an empty default block in a switch.
2419 //
2420 // In this case, we attempt to sink code from all *unconditional* arcs.
2421 // If we can sink instructions from these arcs (determined during the scan
2422 // phase below) we insert a common successor for all unconditional arcs and
2423 // connect that to [end], to enable sinking:
2424 //
2425 // [if]
2426 // / \
2427 // [x(1)] [if]
2428 // | | \
2429 // | | \
2430 // | [x(2)] |
2431 // \ / |
2432 // [sink.split] |
2433 // \ /
2434 // [ end ]
2435 //
2436 SmallVector<BasicBlock*,4> UnconditionalPreds;
2437 bool HaveNonUnconditionalPredecessors = false;
2438 for (auto *PredBB : predecessors(BB)) {
2439 auto *PredBr = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2440 if (PredBr)
2441 UnconditionalPreds.push_back(PredBB);
2442 else
2443 HaveNonUnconditionalPredecessors = true;
2444 }
2445 if (UnconditionalPreds.size() < 2)
2446 return false;
2447
2448 // We take a two-step approach to tail sinking. First we scan from the end of
2449 // each block upwards in lockstep. If the n'th instruction from the end of each
2450 // block can be sunk, those instructions are added to ValuesToSink and we
2451 // carry on. If we can sink an instruction but need to PHI-merge some operands
2452 // (because they're not identical in each instruction) we add these to
2453 // PHIOperands.
2454 // We prepopulate PHIOperands with the phis that already exist in BB.
2456 for (PHINode &PN : BB->phis()) {
2458 for (const Use &U : PN.incoming_values())
2459 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2460 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2461 for (BasicBlock *Pred : UnconditionalPreds)
2462 Ops.push_back(*IncomingVals[Pred]);
2463 }
2464
2465 int ScanIdx = 0;
2466 SmallPtrSet<Value*,4> InstructionsToSink;
2467 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2468 while (LRI.isValid() &&
2469 canSinkInstructions(*LRI, PHIOperands)) {
2470 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2471 << "\n");
2472 InstructionsToSink.insert_range(*LRI);
2473 ++ScanIdx;
2474 --LRI;
2475 }
2476
2477 // If no instructions can be sunk, early-return.
2478 if (ScanIdx == 0)
2479 return false;
2480
2481 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2482
2483 if (!followedByDeoptOrUnreachable) {
2484 // Check whether this is the pointer operand of a load/store.
2485 auto IsMemOperand = [](Use &U) {
2486 auto *I = cast<Instruction>(U.getUser());
2487 if (isa<LoadInst>(I))
2488 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2489 if (isa<StoreInst>(I))
2490 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2491 return false;
2492 };
2493
2494 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2495 // actually sink before encountering instruction that is unprofitable to
2496 // sink?
2497 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2498 unsigned NumPHIInsts = 0;
2499 for (Use &U : (*LRI)[0]->operands()) {
2500 auto It = PHIOperands.find(&U);
2501 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2502 return InstructionsToSink.contains(V);
2503 })) {
2504 ++NumPHIInsts;
2505 // Do not separate a load/store from the gep producing the address.
2506 // The gep can likely be folded into the load/store as an addressing
2507 // mode. Additionally, a load of a gep is easier to analyze than a
2508 // load of a phi.
2509 if (IsMemOperand(U) &&
2510 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2511 return false;
2512 // FIXME: this check is overly optimistic. We may end up not sinking
2513 // said instruction, due to the very same profitability check.
2514 // See @creating_too_many_phis in sink-common-code.ll.
2515 }
2516 }
2517 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2518 return NumPHIInsts <= 1;
2519 };
2520
2521 // We've determined that we are going to sink last ScanIdx instructions,
2522 // and recorded them in InstructionsToSink. Now, some instructions may be
2523 // unprofitable to sink. But that determination depends on the instructions
2524 // that we are going to sink.
2525
2526 // First, forward scan: find the first instruction unprofitable to sink,
2527 // recording all the ones that are profitable to sink.
2528 // FIXME: would it be better, after we detect that not all are profitable.
2529 // to either record the profitable ones, or erase the unprofitable ones?
2530 // Maybe we need to choose (at runtime) the one that will touch least
2531 // instrs?
2532 LRI.reset();
2533 int Idx = 0;
2534 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2535 while (Idx < ScanIdx) {
2536 if (!ProfitableToSinkInstruction(LRI)) {
2537 // Too many PHIs would be created.
2538 LLVM_DEBUG(
2539 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2540 break;
2541 }
2542 InstructionsProfitableToSink.insert_range(*LRI);
2543 --LRI;
2544 ++Idx;
2545 }
2546
2547 // If no instructions can be sunk, early-return.
2548 if (Idx == 0)
2549 return false;
2550
2551 // Did we determine that (only) some instructions are unprofitable to sink?
2552 if (Idx < ScanIdx) {
2553 // Okay, some instructions are unprofitable.
2554 ScanIdx = Idx;
2555 InstructionsToSink = InstructionsProfitableToSink;
2556
2557 // But, that may make other instructions unprofitable, too.
2558 // So, do a backward scan, do any earlier instructions become
2559 // unprofitable?
2560 assert(
2561 !ProfitableToSinkInstruction(LRI) &&
2562 "We already know that the last instruction is unprofitable to sink");
2563 ++LRI;
2564 --Idx;
2565 while (Idx >= 0) {
2566 // If we detect that an instruction becomes unprofitable to sink,
2567 // all earlier instructions won't be sunk either,
2568 // so preemptively keep InstructionsProfitableToSink in sync.
2569 // FIXME: is this the most performant approach?
2570 for (auto *I : *LRI)
2571 InstructionsProfitableToSink.erase(I);
2572 if (!ProfitableToSinkInstruction(LRI)) {
2573 // Everything starting with this instruction won't be sunk.
2574 ScanIdx = Idx;
2575 InstructionsToSink = InstructionsProfitableToSink;
2576 }
2577 ++LRI;
2578 --Idx;
2579 }
2580 }
2581
2582 // If no instructions can be sunk, early-return.
2583 if (ScanIdx == 0)
2584 return false;
2585 }
2586
2587 bool Changed = false;
2588
2589 if (HaveNonUnconditionalPredecessors) {
2590 if (!followedByDeoptOrUnreachable) {
2591 // It is always legal to sink common instructions from unconditional
2592 // predecessors. However, if not all predecessors are unconditional,
2593 // this transformation might be pessimizing. So as a rule of thumb,
2594 // don't do it unless we'd sink at least one non-speculatable instruction.
2595 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2596 LRI.reset();
2597 int Idx = 0;
2598 bool Profitable = false;
2599 while (Idx < ScanIdx) {
2600 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2601 Profitable = true;
2602 break;
2603 }
2604 --LRI;
2605 ++Idx;
2606 }
2607 if (!Profitable)
2608 return false;
2609 }
2610
2611 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2612 // We have a conditional edge and we're going to sink some instructions.
2613 // Insert a new block postdominating all blocks we're going to sink from.
2614 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2615 // Edges couldn't be split.
2616 return false;
2617 Changed = true;
2618 }
2619
2620 // Now that we've analyzed all potential sinking candidates, perform the
2621 // actual sink. We iteratively sink the last non-terminator of the source
2622 // blocks into their common successor unless doing so would require too
2623 // many PHI instructions to be generated (currently only one PHI is allowed
2624 // per sunk instruction).
2625 //
2626 // We can use InstructionsToSink to discount values needing PHI-merging that will
2627 // actually be sunk in a later iteration. This allows us to be more
2628 // aggressive in what we sink. This does allow a false positive where we
2629 // sink presuming a later value will also be sunk, but stop half way through
2630 // and never actually sink it which means we produce more PHIs than intended.
2631 // This is unlikely in practice though.
2632 int SinkIdx = 0;
2633 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2634 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2635 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2636 << "\n");
2637
2638 // Because we've sunk every instruction in turn, the current instruction to
2639 // sink is always at index 0.
2640 LRI.reset();
2641
2642 sinkLastInstruction(UnconditionalPreds);
2643 NumSinkCommonInstrs++;
2644 Changed = true;
2645 }
2646 if (SinkIdx != 0)
2647 ++NumSinkCommonCode;
2648 return Changed;
2649}
2650
2651namespace {
2652
2653struct CompatibleSets {
2654 using SetTy = SmallVector<InvokeInst *, 2>;
2655
2657
2658 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2659
2660 SetTy &getCompatibleSet(InvokeInst *II);
2661
2662 void insert(InvokeInst *II);
2663};
2664
2665CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2666 // Perform a linear scan over all the existing sets, see if the new `invoke`
2667 // is compatible with any particular set. Since we know that all the `invokes`
2668 // within a set are compatible, only check the first `invoke` in each set.
2669 // WARNING: at worst, this has quadratic complexity.
2670 for (CompatibleSets::SetTy &Set : Sets) {
2671 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2672 return Set;
2673 }
2674
2675 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2676 return Sets.emplace_back();
2677}
2678
2679void CompatibleSets::insert(InvokeInst *II) {
2680 getCompatibleSet(II).emplace_back(II);
2681}
2682
2683bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2684 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2685
2686 // Can we theoretically merge these `invoke`s?
2687 auto IsIllegalToMerge = [](InvokeInst *II) {
2688 return II->cannotMerge() || II->isInlineAsm();
2689 };
2690 if (any_of(Invokes, IsIllegalToMerge))
2691 return false;
2692
2693 // Either both `invoke`s must be direct,
2694 // or both `invoke`s must be indirect.
2695 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2696 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2697 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2698 if (HaveIndirectCalls) {
2699 if (!AllCallsAreIndirect)
2700 return false;
2701 } else {
2702 // All callees must be identical.
2703 Value *Callee = nullptr;
2704 for (InvokeInst *II : Invokes) {
2705 Value *CurrCallee = II->getCalledOperand();
2706 assert(CurrCallee && "There is always a called operand.");
2707 if (!Callee)
2708 Callee = CurrCallee;
2709 else if (Callee != CurrCallee)
2710 return false;
2711 }
2712 }
2713
2714 // Either both `invoke`s must not have a normal destination,
2715 // or both `invoke`s must have a normal destination,
2716 auto HasNormalDest = [](InvokeInst *II) {
2717 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2718 };
2719 if (any_of(Invokes, HasNormalDest)) {
2720 // Do not merge `invoke` that does not have a normal destination with one
2721 // that does have a normal destination, even though doing so would be legal.
2722 if (!all_of(Invokes, HasNormalDest))
2723 return false;
2724
2725 // All normal destinations must be identical.
2726 BasicBlock *NormalBB = nullptr;
2727 for (InvokeInst *II : Invokes) {
2728 BasicBlock *CurrNormalBB = II->getNormalDest();
2729 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2730 if (!NormalBB)
2731 NormalBB = CurrNormalBB;
2732 else if (NormalBB != CurrNormalBB)
2733 return false;
2734 }
2735
2736 // In the normal destination, the incoming values for these two `invoke`s
2737 // must be compatible.
2738 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2740 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2741 &EquivalenceSet))
2742 return false;
2743 }
2744
2745#ifndef NDEBUG
2746 // All unwind destinations must be identical.
2747 // We know that because we have started from said unwind destination.
2748 BasicBlock *UnwindBB = nullptr;
2749 for (InvokeInst *II : Invokes) {
2750 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2751 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2752 if (!UnwindBB)
2753 UnwindBB = CurrUnwindBB;
2754 else
2755 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2756 }
2757#endif
2758
2759 // In the unwind destination, the incoming values for these two `invoke`s
2760 // must be compatible.
2762 Invokes.front()->getUnwindDest(),
2763 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2764 return false;
2765
2766 // Ignoring arguments, these `invoke`s must be identical,
2767 // including operand bundles.
2768 const InvokeInst *II0 = Invokes.front();
2769 for (auto *II : Invokes.drop_front())
2770 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2771 return false;
2772
2773 // Can we theoretically form the data operands for the merged `invoke`?
2774 auto IsIllegalToMergeArguments = [](auto Ops) {
2775 Use &U0 = std::get<0>(Ops);
2776 Use &U1 = std::get<1>(Ops);
2777 if (U0 == U1)
2778 return false;
2780 U0.getOperandNo());
2781 };
2782 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2783 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2784 IsIllegalToMergeArguments))
2785 return false;
2786
2787 return true;
2788}
2789
2790} // namespace
2791
2792// Merge all invokes in the provided set, all of which are compatible
2793// as per the `CompatibleSets::shouldBelongToSameSet()`.
2795 DomTreeUpdater *DTU) {
2796 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2797
2799 if (DTU)
2800 Updates.reserve(2 + 3 * Invokes.size());
2801
2802 bool HasNormalDest =
2803 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2804
2805 // Clone one of the invokes into a new basic block.
2806 // Since they are all compatible, it doesn't matter which invoke is cloned.
2807 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2808 InvokeInst *II0 = Invokes.front();
2809 BasicBlock *II0BB = II0->getParent();
2810 BasicBlock *InsertBeforeBlock =
2811 II0->getParent()->getIterator()->getNextNode();
2812 Function *Func = II0BB->getParent();
2813 LLVMContext &Ctx = II0->getContext();
2814
2815 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2816 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2817
2818 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2819 // NOTE: all invokes have the same attributes, so no handling needed.
2820 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2821
2822 if (!HasNormalDest) {
2823 // This set does not have a normal destination,
2824 // so just form a new block with unreachable terminator.
2825 BasicBlock *MergedNormalDest = BasicBlock::Create(
2826 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2827 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2828 UI->setDebugLoc(DebugLoc::getTemporary());
2829 MergedInvoke->setNormalDest(MergedNormalDest);
2830 }
2831
2832 // The unwind destination, however, remainds identical for all invokes here.
2833
2834 return MergedInvoke;
2835 }();
2836
2837 if (DTU) {
2838 // Predecessor blocks that contained these invokes will now branch to
2839 // the new block that contains the merged invoke, ...
2840 for (InvokeInst *II : Invokes)
2841 Updates.push_back(
2842 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2843
2844 // ... which has the new `unreachable` block as normal destination,
2845 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2846 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2847 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2848 SuccBBOfMergedInvoke});
2849
2850 // Since predecessor blocks now unconditionally branch to a new block,
2851 // they no longer branch to their original successors.
2852 for (InvokeInst *II : Invokes)
2853 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2854 Updates.push_back(
2855 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2856 }
2857
2858 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2859
2860 // Form the merged operands for the merged invoke.
2861 for (Use &U : MergedInvoke->operands()) {
2862 // Only PHI together the indirect callees and data operands.
2863 if (MergedInvoke->isCallee(&U)) {
2864 if (!IsIndirectCall)
2865 continue;
2866 } else if (!MergedInvoke->isDataOperand(&U))
2867 continue;
2868
2869 // Don't create trivial PHI's with all-identical incoming values.
2870 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2871 return II->getOperand(U.getOperandNo()) != U.get();
2872 });
2873 if (!NeedPHI)
2874 continue;
2875
2876 // Form a PHI out of all the data ops under this index.
2878 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2879 for (InvokeInst *II : Invokes)
2880 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2881
2882 U.set(PN);
2883 }
2884
2885 // We've ensured that each PHI node has compatible (identical) incoming values
2886 // when coming from each of the `invoke`s in the current merge set,
2887 // so update the PHI nodes accordingly.
2888 for (BasicBlock *Succ : successors(MergedInvoke))
2889 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2890 /*ExistPred=*/Invokes.front()->getParent());
2891
2892 // And finally, replace the original `invoke`s with an unconditional branch
2893 // to the block with the merged `invoke`. Also, give that merged `invoke`
2894 // the merged debugloc of all the original `invoke`s.
2895 DILocation *MergedDebugLoc = nullptr;
2896 for (InvokeInst *II : Invokes) {
2897 // Compute the debug location common to all the original `invoke`s.
2898 if (!MergedDebugLoc)
2899 MergedDebugLoc = II->getDebugLoc();
2900 else
2901 MergedDebugLoc =
2902 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2903
2904 // And replace the old `invoke` with an unconditionally branch
2905 // to the block with the merged `invoke`.
2906 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2907 OrigSuccBB->removePredecessor(II->getParent());
2908 auto *BI = UncondBrInst::Create(MergedInvoke->getParent(), II->getParent());
2909 // The unconditional branch is part of the replacement for the original
2910 // invoke, so should use its DebugLoc.
2911 BI->setDebugLoc(II->getDebugLoc());
2912 bool Success = MergedInvoke->tryIntersectAttributes(II);
2913 assert(Success && "Merged invokes with incompatible attributes");
2914 // For NDEBUG Compile
2915 (void)Success;
2916 II->replaceAllUsesWith(MergedInvoke);
2917 II->eraseFromParent();
2918 ++NumInvokesMerged;
2919 }
2920 MergedInvoke->setDebugLoc(MergedDebugLoc);
2921 ++NumInvokeSetsFormed;
2922
2923 if (DTU)
2924 DTU->applyUpdates(Updates);
2925}
2926
2927/// If this block is a `landingpad` exception handling block, categorize all
2928/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2929/// being "mergeable" together, and then merge invokes in each set together.
2930///
2931/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2932/// [...] [...]
2933/// | |
2934/// [invoke0] [invoke1]
2935/// / \ / \
2936/// [cont0] [landingpad] [cont1]
2937/// to:
2938/// [...] [...]
2939/// \ /
2940/// [invoke]
2941/// / \
2942/// [cont] [landingpad]
2943///
2944/// But of course we can only do that if the invokes share the `landingpad`,
2945/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2946/// and the invoked functions are "compatible".
2949 return false;
2950
2951 bool Changed = false;
2952
2953 // FIXME: generalize to all exception handling blocks?
2954 if (!BB->isLandingPad())
2955 return Changed;
2956
2957 CompatibleSets Grouper;
2958
2959 // Record all the predecessors of this `landingpad`. As per verifier,
2960 // the only allowed predecessor is the unwind edge of an `invoke`.
2961 // We want to group "compatible" `invokes` into the same set to be merged.
2962 for (BasicBlock *PredBB : predecessors(BB))
2963 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2964
2965 // And now, merge `invoke`s that were grouped togeter.
2966 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2967 if (Invokes.size() < 2)
2968 continue;
2969 Changed = true;
2970 mergeCompatibleInvokesImpl(Invokes, DTU);
2971 }
2972
2973 return Changed;
2974}
2975
2976namespace {
2977/// Track ephemeral values, which should be ignored for cost-modelling
2978/// purposes. Requires walking instructions in reverse order.
2979class EphemeralValueTracker {
2980 SmallPtrSet<const Instruction *, 32> EphValues;
2981
2982 bool isEphemeral(const Instruction *I) {
2983 if (isa<AssumeInst>(I))
2984 return true;
2985 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2986 all_of(I->users(), [&](const User *U) {
2987 return EphValues.count(cast<Instruction>(U));
2988 });
2989 }
2990
2991public:
2992 bool track(const Instruction *I) {
2993 if (isEphemeral(I)) {
2994 EphValues.insert(I);
2995 return true;
2996 }
2997 return false;
2998 }
2999
3000 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3001};
3002} // namespace
3003
3004/// Determine if we can hoist sink a sole store instruction out of a
3005/// conditional block.
3006///
3007/// We are looking for code like the following:
3008/// BrBB:
3009/// store i32 %add, i32* %arrayidx2
3010/// ... // No other stores or function calls (we could be calling a memory
3011/// ... // function).
3012/// %cmp = icmp ult %x, %y
3013/// br i1 %cmp, label %EndBB, label %ThenBB
3014/// ThenBB:
3015/// store i32 %add5, i32* %arrayidx2
3016/// br label EndBB
3017/// EndBB:
3018/// ...
3019/// We are going to transform this into:
3020/// BrBB:
3021/// store i32 %add, i32* %arrayidx2
3022/// ... //
3023/// %cmp = icmp ult %x, %y
3024/// %add.add5 = select i1 %cmp, i32 %add, %add5
3025/// store i32 %add.add5, i32* %arrayidx2
3026/// ...
3027///
3028/// \return The pointer to the value of the previous store if the store can be
3029/// hoisted into the predecessor block. 0 otherwise.
3031 BasicBlock *StoreBB, BasicBlock *EndBB) {
3032 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3033 if (!StoreToHoist)
3034 return nullptr;
3035
3036 // Volatile or atomic.
3037 if (!StoreToHoist->isSimple())
3038 return nullptr;
3039
3040 Value *StorePtr = StoreToHoist->getPointerOperand();
3041 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3042
3043 // Look for a store to the same pointer in BrBB.
3044 unsigned MaxNumInstToLookAt = 9;
3045 // Skip pseudo probe intrinsic calls which are not really killing any memory
3046 // accesses.
3047 for (Instruction &CurI : reverse(*BrBB)) {
3048 if (!MaxNumInstToLookAt)
3049 break;
3050 --MaxNumInstToLookAt;
3051
3052 if (isa<PseudoProbeInst>(CurI))
3053 continue;
3054
3055 // Could be calling an instruction that affects memory like free().
3056 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3057 return nullptr;
3058
3059 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3060 // Found the previous store to same location and type. Make sure it is
3061 // simple, to avoid introducing a spurious non-atomic write after an
3062 // atomic write.
3063 if (SI->getPointerOperand() == StorePtr &&
3064 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3065 SI->getAlign() >= StoreToHoist->getAlign())
3066 // Found the previous store, return its value operand.
3067 return SI->getValueOperand();
3068 return nullptr; // Unknown store.
3069 }
3070
3071 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3072 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3073 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3074 Value *Obj = getUnderlyingObject(StorePtr);
3075 bool ExplicitlyDereferenceableOnly;
3076 // The dereferenceability query here is only required to satisfy the
3077 // writable contract, actual dereferenceability is proven by the
3078 // presence of an access. As such, we can ignore frees.
3079 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3082 .WithoutRet) &&
3083 (!ExplicitlyDereferenceableOnly ||
3084 isDereferenceablePointer(StorePtr, StoreTy, LI->getDataLayout(),
3085 /*IgnoreFree=*/true))) {
3086 // Found a previous load, return it.
3087 return LI;
3088 }
3089 }
3090 // The load didn't work out, but we may still find a store.
3091 }
3092 }
3093
3094 return nullptr;
3095}
3096
3097/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3098/// converted to selects.
3100 BasicBlock *EndBB,
3101 unsigned &SpeculatedInstructions,
3102 InstructionCost &Cost,
3103 const TargetTransformInfo &TTI) {
3105 BB->getParent()->hasMinSize()
3108
3109 bool HaveRewritablePHIs = false;
3110 for (PHINode &PN : EndBB->phis()) {
3111 Value *OrigV = PN.getIncomingValueForBlock(BB);
3112 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3113
3114 // FIXME: Try to remove some of the duplication with
3115 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3116 if (ThenV == OrigV)
3117 continue;
3118
3119 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3120 CmpInst::makeCmpResultType(PN.getType()),
3122
3123 // Don't convert to selects if we could remove undefined behavior instead.
3124 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3126 return false;
3127
3128 HaveRewritablePHIs = true;
3129 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3130 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3131 if (!OrigCE && !ThenCE)
3132 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3133
3134 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3135 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3136 InstructionCost MaxCost =
3138 if (OrigCost + ThenCost > MaxCost)
3139 return false;
3140
3141 // Account for the cost of an unfolded ConstantExpr which could end up
3142 // getting expanded into Instructions.
3143 // FIXME: This doesn't account for how many operations are combined in the
3144 // constant expression.
3145 ++SpeculatedInstructions;
3146 if (SpeculatedInstructions > 1)
3147 return false;
3148 }
3149
3150 return HaveRewritablePHIs;
3151}
3152
3154 std::optional<bool> Invert,
3155 const TargetTransformInfo &TTI) {
3156 // If the branch is non-unpredictable, and is predicted to *not* branch to
3157 // the `then` block, then avoid speculating it.
3158 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3159 return true;
3160
3161 uint64_t TWeight, FWeight;
3162 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3163 return true;
3164
3165 if (!Invert.has_value())
3166 return false;
3167
3168 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3169 BranchProbability BIEndProb =
3170 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3171 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3172 return BIEndProb < Likely;
3173}
3174
3175/// Speculate a conditional basic block flattening the CFG.
3176///
3177/// Note that this is a very risky transform currently. Speculating
3178/// instructions like this is most often not desirable. Instead, there is an MI
3179/// pass which can do it with full awareness of the resource constraints.
3180/// However, some cases are "obvious" and we should do directly. An example of
3181/// this is speculating a single, reasonably cheap instruction.
3182///
3183/// There is only one distinct advantage to flattening the CFG at the IR level:
3184/// it makes very common but simplistic optimizations such as are common in
3185/// instcombine and the DAG combiner more powerful by removing CFG edges and
3186/// modeling their effects with easier to reason about SSA value graphs.
3187///
3188///
3189/// An illustration of this transform is turning this IR:
3190/// \code
3191/// BB:
3192/// %cmp = icmp ult %x, %y
3193/// br i1 %cmp, label %EndBB, label %ThenBB
3194/// ThenBB:
3195/// %sub = sub %x, %y
3196/// br label BB2
3197/// EndBB:
3198/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3199/// ...
3200/// \endcode
3201///
3202/// Into this IR:
3203/// \code
3204/// BB:
3205/// %cmp = icmp ult %x, %y
3206/// %sub = sub %x, %y
3207/// %cond = select i1 %cmp, 0, %sub
3208/// ...
3209/// \endcode
3210///
3211/// \returns true if the conditional block is removed.
3212bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3213 BasicBlock *ThenBB) {
3214 if (!Options.SpeculateBlocks)
3215 return false;
3216
3217 // Be conservative for now. FP select instruction can often be expensive.
3218 Value *BrCond = BI->getCondition();
3219 if (isa<FCmpInst>(BrCond))
3220 return false;
3221
3222 BasicBlock *BB = BI->getParent();
3223 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3224 InstructionCost Budget =
3226
3227 // If ThenBB is actually on the false edge of the conditional branch, remember
3228 // to swap the select operands later.
3229 bool Invert = false;
3230 if (ThenBB != BI->getSuccessor(0)) {
3231 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3232 Invert = true;
3233 }
3234 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3235
3236 if (!isProfitableToSpeculate(BI, Invert, TTI))
3237 return false;
3238
3239 // Keep a count of how many times instructions are used within ThenBB when
3240 // they are candidates for sinking into ThenBB. Specifically:
3241 // - They are defined in BB, and
3242 // - They have no side effects, and
3243 // - All of their uses are in ThenBB.
3244 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3245
3246 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3247
3248 unsigned SpeculatedInstructions = 0;
3249 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3250 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3251 Value *SpeculatedStoreValue = nullptr;
3252 StoreInst *SpeculatedStore = nullptr;
3253 EphemeralValueTracker EphTracker;
3254 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3255 // Skip pseudo probes. The consequence is we lose track of the branch
3256 // probability for ThenBB, which is fine since the optimization here takes
3257 // place regardless of the branch probability.
3258 if (isa<PseudoProbeInst>(I)) {
3259 // The probe should be deleted so that it will not be over-counted when
3260 // the samples collected on the non-conditional path are counted towards
3261 // the conditional path. We leave it for the counts inference algorithm to
3262 // figure out a proper count for an unknown probe.
3263 SpeculatedPseudoProbes.push_back(&I);
3264 continue;
3265 }
3266
3267 // Ignore ephemeral values, they will be dropped by the transform.
3268 if (EphTracker.track(&I))
3269 continue;
3270
3271 // Only speculatively execute a single instruction (not counting the
3272 // terminator) for now.
3273 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3275 SpeculatedConditionalLoadsStores.size() <
3277 // Not count load/store into cost if target supports conditional faulting
3278 // b/c it's cheap to speculate it.
3279 if (IsSafeCheapLoadStore)
3280 SpeculatedConditionalLoadsStores.push_back(&I);
3281 else
3282 ++SpeculatedInstructions;
3283
3284 if (SpeculatedInstructions > 1)
3285 return false;
3286
3287 // Don't hoist the instruction if it's unsafe or expensive.
3288 if (!IsSafeCheapLoadStore &&
3290 !(HoistCondStores && !SpeculatedStoreValue &&
3291 (SpeculatedStoreValue =
3292 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3293 return false;
3294 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3297 return false;
3298
3299 // Store the store speculation candidate.
3300 if (!SpeculatedStore && SpeculatedStoreValue)
3301 SpeculatedStore = cast<StoreInst>(&I);
3302
3303 // Do not hoist the instruction if any of its operands are defined but not
3304 // used in BB. The transformation will prevent the operand from
3305 // being sunk into the use block.
3306 for (Use &Op : I.operands()) {
3308 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3309 continue; // Not a candidate for sinking.
3310
3311 ++SinkCandidateUseCounts[OpI];
3312 }
3313 }
3314
3315 // Consider any sink candidates which are only used in ThenBB as costs for
3316 // speculation. Note, while we iterate over a DenseMap here, we are summing
3317 // and so iteration order isn't significant.
3318 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3319 if (Inst->hasNUses(Count)) {
3320 ++SpeculatedInstructions;
3321 if (SpeculatedInstructions > 1)
3322 return false;
3323 }
3324
3325 // Check that we can insert the selects and that it's not too expensive to do
3326 // so.
3327 bool Convert =
3328 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3330 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3331 SpeculatedInstructions, Cost, TTI);
3332 if (!Convert || Cost > Budget)
3333 return false;
3334
3335 // If we get here, we can hoist the instruction and if-convert.
3336 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3337
3338 Instruction *Sel = nullptr;
3339 // Insert a select of the value of the speculated store.
3340 if (SpeculatedStoreValue) {
3341 IRBuilder<NoFolder> Builder(BI);
3342 Value *OrigV = SpeculatedStore->getValueOperand();
3343 Value *TrueV = SpeculatedStore->getValueOperand();
3344 Value *FalseV = SpeculatedStoreValue;
3345 if (Invert)
3346 std::swap(TrueV, FalseV);
3347 Value *S = Builder.CreateSelect(
3348 BrCond, TrueV, FalseV, "spec.store.select", BI);
3349 Sel = cast<Instruction>(S);
3350 SpeculatedStore->setOperand(0, S);
3351 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3352 SpeculatedStore->getDebugLoc());
3353 // The value stored is still conditional, but the store itself is now
3354 // unconditionally executed, so we must be sure that any linked dbg.assign
3355 // intrinsics are tracking the new stored value (the result of the
3356 // select). If we don't, and the store were to be removed by another pass
3357 // (e.g. DSE), then we'd eventually end up emitting a location describing
3358 // the conditional value, unconditionally.
3359 //
3360 // === Before this transformation ===
3361 // pred:
3362 // store %one, %x.dest, !DIAssignID !1
3363 // dbg.assign %one, "x", ..., !1, ...
3364 // br %cond if.then
3365 //
3366 // if.then:
3367 // store %two, %x.dest, !DIAssignID !2
3368 // dbg.assign %two, "x", ..., !2, ...
3369 //
3370 // === After this transformation ===
3371 // pred:
3372 // store %one, %x.dest, !DIAssignID !1
3373 // dbg.assign %one, "x", ..., !1
3374 /// ...
3375 // %merge = select %cond, %two, %one
3376 // store %merge, %x.dest, !DIAssignID !2
3377 // dbg.assign %merge, "x", ..., !2
3378 for (DbgVariableRecord *DbgAssign :
3379 at::getDVRAssignmentMarkers(SpeculatedStore))
3380 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3381 DbgAssign->replaceVariableLocationOp(OrigV, S);
3382 }
3383
3384 // Metadata can be dependent on the condition we are hoisting above.
3385 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3386 // to avoid making it appear as if the condition is a constant, which would
3387 // be misleading while debugging.
3388 // Similarly strip attributes that maybe dependent on condition we are
3389 // hoisting above.
3390 for (auto &I : make_early_inc_range(*ThenBB)) {
3391 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3392 I.dropLocation();
3393 }
3394 I.dropUBImplyingAttrsAndMetadata();
3395
3396 // Drop ephemeral values.
3397 if (EphTracker.contains(&I)) {
3398 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3399 I.eraseFromParent();
3400 }
3401 }
3402
3403 // Hoist the instructions.
3404 // Drop DbgVariableRecords attached to these instructions.
3405 for (auto &It : *ThenBB)
3406 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3407 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3408 // equivalent).
3409 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3410 !DVR || !DVR->isDbgAssign())
3411 It.dropOneDbgRecord(&DR);
3412 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3413 std::prev(ThenBB->end()));
3414
3415 if (!SpeculatedConditionalLoadsStores.empty())
3416 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3417 Sel);
3418
3419 // Insert selects and rewrite the PHI operands.
3420 IRBuilder<NoFolder> Builder(BI);
3421 for (PHINode &PN : EndBB->phis()) {
3422 unsigned OrigI = PN.getBasicBlockIndex(BB);
3423 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3424 Value *OrigV = PN.getIncomingValue(OrigI);
3425 Value *ThenV = PN.getIncomingValue(ThenI);
3426
3427 // Skip PHIs which are trivial.
3428 if (OrigV == ThenV)
3429 continue;
3430
3431 // Create a select whose true value is the speculatively executed value and
3432 // false value is the pre-existing value. Swap them if the branch
3433 // destinations were inverted.
3434 Value *TrueV = ThenV, *FalseV = OrigV;
3435 if (Invert)
3436 std::swap(TrueV, FalseV);
3437 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3438 PN.setIncomingValue(OrigI, V);
3439 PN.setIncomingValue(ThenI, V);
3440 }
3441
3442 // Remove speculated pseudo probes.
3443 for (Instruction *I : SpeculatedPseudoProbes)
3444 I->eraseFromParent();
3445
3446 ++NumSpeculations;
3447 return true;
3448}
3449
3451
3452// Return false if number of blocks searched is too much.
3453static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3454 BlocksSet &ReachesNonLocalUses) {
3455 if (BB == DefBB)
3456 return true;
3457 if (!ReachesNonLocalUses.insert(BB).second)
3458 return true;
3459
3460 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3461 return false;
3462 for (BasicBlock *Pred : predecessors(BB))
3463 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3464 return false;
3465 return true;
3466}
3467
3468/// Return true if we can thread a branch across this block.
3470 BlocksSet &NonLocalUseBlocks) {
3471 int Size = 0;
3472 EphemeralValueTracker EphTracker;
3473
3474 // Walk the loop in reverse so that we can identify ephemeral values properly
3475 // (values only feeding assumes).
3476 for (Instruction &I : reverse(*BB)) {
3477 // Can't fold blocks that contain noduplicate or convergent calls.
3478 if (CallInst *CI = dyn_cast<CallInst>(&I))
3479 if (CI->cannotDuplicate() || CI->isConvergent())
3480 return false;
3481
3482 // Ignore ephemeral values which are deleted during codegen.
3483 // We will delete Phis while threading, so Phis should not be accounted in
3484 // block's size.
3485 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3486 if (Size++ > MaxSmallBlockSize)
3487 return false; // Don't clone large BB's.
3488 }
3489
3490 // Record blocks with non-local uses of values defined in the current basic
3491 // block.
3492 for (User *U : I.users()) {
3494 BasicBlock *UsedInBB = UI->getParent();
3495 if (UsedInBB == BB) {
3496 if (isa<PHINode>(UI))
3497 return false;
3498 } else
3499 NonLocalUseBlocks.insert(UsedInBB);
3500 }
3501
3502 // Looks ok, continue checking.
3503 }
3504
3505 return true;
3506}
3507
3509 BasicBlock *To) {
3510 // Don't look past the block defining the value, we might get the value from
3511 // a previous loop iteration.
3512 auto *I = dyn_cast<Instruction>(V);
3513 if (I && I->getParent() == To)
3514 return nullptr;
3515
3516 // We know the value if the From block branches on it.
3517 auto *BI = dyn_cast<CondBrInst>(From->getTerminator());
3518 if (BI && BI->getCondition() == V &&
3519 BI->getSuccessor(0) != BI->getSuccessor(1))
3520 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3521 : ConstantInt::getFalse(BI->getContext());
3522
3523 return nullptr;
3524}
3525
3526/// If we have a conditional branch on something for which we know the constant
3527/// value in predecessors (e.g. a phi node in the current block), thread edges
3528/// from the predecessor to their ultimate destination.
3529static std::optional<bool>
3531 const DataLayout &DL,
3532 AssumptionCache *AC) {
3534 BasicBlock *BB = BI->getParent();
3535 Value *Cond = BI->getCondition();
3537 if (PN && PN->getParent() == BB) {
3538 // Degenerate case of a single entry PHI.
3539 if (PN->getNumIncomingValues() == 1) {
3541 return true;
3542 }
3543
3544 for (Use &U : PN->incoming_values())
3545 if (auto *CB = dyn_cast<ConstantInt>(U))
3546 KnownValues[CB].insert(PN->getIncomingBlock(U));
3547 } else {
3548 for (BasicBlock *Pred : predecessors(BB)) {
3549 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3550 KnownValues[CB].insert(Pred);
3551 }
3552 }
3553
3554 if (KnownValues.empty())
3555 return false;
3556
3557 // Now we know that this block has multiple preds and two succs.
3558 // Check that the block is small enough and record which non-local blocks use
3559 // values defined in the block.
3560
3561 BlocksSet NonLocalUseBlocks;
3562 BlocksSet ReachesNonLocalUseBlocks;
3563 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3564 return false;
3565
3566 // Jump-threading can only be done to destinations where no values defined
3567 // in BB are live.
3568
3569 // Quickly check if both destinations have uses. If so, jump-threading cannot
3570 // be done.
3571 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3572 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3573 return false;
3574
3575 // Search backward from NonLocalUseBlocks to find which blocks
3576 // reach non-local uses.
3577 for (BasicBlock *UseBB : NonLocalUseBlocks)
3578 // Give up if too many blocks are searched.
3579 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3580 return false;
3581
3582 for (const auto &Pair : KnownValues) {
3583 ConstantInt *CB = Pair.first;
3584 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3585 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3586
3587 // Okay, we now know that all edges from PredBB should be revectored to
3588 // branch to RealDest.
3589 if (RealDest == BB)
3590 continue; // Skip self loops.
3591
3592 // Skip if the predecessor's terminator is an indirect branch.
3593 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3594 return isa<IndirectBrInst>(PredBB->getTerminator());
3595 }))
3596 continue;
3597
3598 // Only revector to RealDest if no values defined in BB are live.
3599 if (ReachesNonLocalUseBlocks.contains(RealDest))
3600 continue;
3601
3602 LLVM_DEBUG({
3603 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3604 << " has value " << *Pair.first << " in predecessors:\n";
3605 for (const BasicBlock *PredBB : Pair.second)
3606 dbgs() << " " << PredBB->getName() << "\n";
3607 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3608 });
3609
3610 // Split the predecessors we are threading into a new edge block. We'll
3611 // clone the instructions into this block, and then redirect it to RealDest.
3612 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3613 if (!EdgeBB)
3614 continue;
3615
3616 // TODO: These just exist to reduce test diff, we can drop them if we like.
3617 EdgeBB->setName(RealDest->getName() + ".critedge");
3618 EdgeBB->moveBefore(RealDest);
3619
3620 // Update PHI nodes.
3621 addPredecessorToBlock(RealDest, EdgeBB, BB);
3622
3623 // BB may have instructions that are being threaded over. Clone these
3624 // instructions into EdgeBB. We know that there will be no uses of the
3625 // cloned instructions outside of EdgeBB.
3626 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3627 ValueToValueMapTy TranslateMap; // Track translated values.
3628 TranslateMap[Cond] = CB;
3629
3630 // RemoveDIs: track instructions that we optimise away while folding, so
3631 // that we can copy DbgVariableRecords from them later.
3632 BasicBlock::iterator SrcDbgCursor = BB->begin();
3633 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3634 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3635 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3636 continue;
3637 }
3638 // Clone the instruction.
3639 Instruction *N = BBI->clone();
3640 // Insert the new instruction into its new home.
3641 N->insertInto(EdgeBB, InsertPt);
3642
3643 if (BBI->hasName())
3644 N->setName(BBI->getName() + ".c");
3645
3646 // Update operands due to translation.
3647 // Key Instructions: Remap all the atom groups.
3648 if (const DebugLoc &DL = BBI->getDebugLoc())
3649 mapAtomInstance(DL, TranslateMap);
3650 RemapInstruction(N, TranslateMap,
3652
3653 // Check for trivial simplification.
3654 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3655 if (!BBI->use_empty())
3656 TranslateMap[&*BBI] = V;
3657 if (!N->mayHaveSideEffects()) {
3658 N->eraseFromParent(); // Instruction folded away, don't need actual
3659 // inst
3660 N = nullptr;
3661 }
3662 } else {
3663 if (!BBI->use_empty())
3664 TranslateMap[&*BBI] = N;
3665 }
3666 if (N) {
3667 // Copy all debug-info attached to instructions from the last we
3668 // successfully clone, up to this instruction (they might have been
3669 // folded away).
3670 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3671 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3672 SrcDbgCursor = std::next(BBI);
3673 // Clone debug-info on this instruction too.
3674 N->cloneDebugInfoFrom(&*BBI);
3675
3676 // Register the new instruction with the assumption cache if necessary.
3677 if (auto *Assume = dyn_cast<AssumeInst>(N))
3678 if (AC)
3679 AC->registerAssumption(Assume);
3680 }
3681 }
3682
3683 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3684 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3685 InsertPt->cloneDebugInfoFrom(BI);
3686
3687 BB->removePredecessor(EdgeBB);
3688 UncondBrInst *EdgeBI = cast<UncondBrInst>(EdgeBB->getTerminator());
3689 EdgeBI->setSuccessor(0, RealDest);
3690 EdgeBI->setDebugLoc(BI->getDebugLoc());
3691
3692 if (DTU) {
3694 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3695 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3696 DTU->applyUpdates(Updates);
3697 }
3698
3699 // For simplicity, we created a separate basic block for the edge. Merge
3700 // it back into the predecessor if possible. This not only avoids
3701 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3702 // bypass the check for trivial cycles above.
3703 MergeBlockIntoPredecessor(EdgeBB, DTU);
3704
3705 // Signal repeat, simplifying any other constants.
3706 return std::nullopt;
3707 }
3708
3709 return false;
3710}
3711
3712bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3713 // Note: If BB is a loop header then there is a risk that threading introduces
3714 // a non-canonical loop by moving a back edge. So we avoid this optimization
3715 // for loop headers if NeedCanonicalLoop is set.
3716 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3717 return false;
3718
3719 std::optional<bool> Result;
3720 bool EverChanged = false;
3721 do {
3722 // Note that None means "we changed things, but recurse further."
3723 Result =
3725 EverChanged |= Result == std::nullopt || *Result;
3726 } while (Result == std::nullopt);
3727 return EverChanged;
3728}
3729
3730/// Given a BB that starts with the specified two-entry PHI node,
3731/// see if we can eliminate it.
3734 const DataLayout &DL,
3735 bool SpeculateUnpredictables) {
3736 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3737 // statement", which has a very simple dominance structure. Basically, we
3738 // are trying to find the condition that is being branched on, which
3739 // subsequently causes this merge to happen. We really want control
3740 // dependence information for this check, but simplifycfg can't keep it up
3741 // to date, and this catches most of the cases we care about anyway.
3742 BasicBlock *BB = PN->getParent();
3743
3744 BasicBlock *IfTrue, *IfFalse;
3745 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3746 if (!DomBI)
3747 return false;
3748 Value *IfCond = DomBI->getCondition();
3749 // Don't bother if the branch will be constant folded trivially.
3750 if (isa<ConstantInt>(IfCond))
3751 return false;
3752
3753 BasicBlock *DomBlock = DomBI->getParent();
3755 llvm::copy_if(PN->blocks(), std::back_inserter(IfBlocks),
3756 [](BasicBlock *IfBlock) {
3757 return isa<UncondBrInst>(IfBlock->getTerminator());
3758 });
3759 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3760 "Will have either one or two blocks to speculate.");
3761
3762 // If the branch is non-unpredictable, see if we either predictably jump to
3763 // the merge bb (if we have only a single 'then' block), or if we predictably
3764 // jump to one specific 'then' block (if we have two of them).
3765 // It isn't beneficial to speculatively execute the code
3766 // from the block that we know is predictably not entered.
3767 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3768 if (!IsUnpredictable) {
3769 uint64_t TWeight, FWeight;
3770 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3771 (TWeight + FWeight) != 0) {
3772 BranchProbability BITrueProb =
3773 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3774 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3775 BranchProbability BIFalseProb = BITrueProb.getCompl();
3776 if (IfBlocks.size() == 1) {
3777 BranchProbability BIBBProb =
3778 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3779 if (BIBBProb >= Likely)
3780 return false;
3781 } else {
3782 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3783 return false;
3784 }
3785 }
3786 }
3787
3788 // Don't try to fold an unreachable block. For example, the phi node itself
3789 // can't be the candidate if-condition for a select that we want to form.
3790 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3791 if (IfCondPhiInst->getParent() == BB)
3792 return false;
3793
3794 // Okay, we found that we can merge this two-entry phi node into a select.
3795 // Doing so would require us to fold *all* two entry phi nodes in this block.
3796 // At some point this becomes non-profitable (particularly if the target
3797 // doesn't support cmov's). Only do this transformation if there are two or
3798 // fewer PHI nodes in this block.
3799 unsigned NumPhis = 0;
3800 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3801 if (NumPhis > 2)
3802 return false;
3803
3804 // Loop over the PHI's seeing if we can promote them all to select
3805 // instructions. While we are at it, keep track of the instructions
3806 // that need to be moved to the dominating block.
3807 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3808 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3809 InstructionCost Cost = 0;
3810 InstructionCost Budget =
3812 if (SpeculateUnpredictables && IsUnpredictable)
3813 Budget += TTI.getBranchMispredictPenalty();
3814
3815 bool Changed = false;
3816 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3817 PHINode *PN = cast<PHINode>(II++);
3818 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3819 PN->replaceAllUsesWith(V);
3820 PN->eraseFromParent();
3821 Changed = true;
3822 continue;
3823 }
3824
3825 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3826 AggressiveInsts, Cost, Budget, TTI, AC,
3827 ZeroCostInstructions) ||
3828 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3829 AggressiveInsts, Cost, Budget, TTI, AC,
3830 ZeroCostInstructions))
3831 return Changed;
3832 }
3833
3834 // If we folded the first phi, PN dangles at this point. Refresh it. If
3835 // we ran out of PHIs then we simplified them all.
3836 PN = dyn_cast<PHINode>(BB->begin());
3837 if (!PN)
3838 return true;
3839
3840 // Don't fold i1 branches on PHIs which contain binary operators or
3841 // (possibly inverted) select form of or/ands if their parameters are
3842 // an equality test.
3843 auto IsBinOpOrAndEq = [](Value *V) {
3844 CmpPredicate Pred;
3845 if (match(V, m_CombineOr(
3847 m_BinOp(m_Cmp(Pred, m_Value(), m_Value()), m_Value()),
3848 m_BinOp(m_Value(), m_Cmp(Pred, m_Value(), m_Value()))),
3850 m_Cmp(Pred, m_Value(), m_Value()))))) {
3851 return CmpInst::isEquality(Pred);
3852 }
3853 return false;
3854 };
3855 if (PN->getType()->isIntegerTy(1) &&
3856 (IsBinOpOrAndEq(PN->getIncomingValue(0)) ||
3857 IsBinOpOrAndEq(PN->getIncomingValue(1)) || IsBinOpOrAndEq(IfCond)))
3858 return Changed;
3859
3860 // If all PHI nodes are promotable, check to make sure that all instructions
3861 // in the predecessor blocks can be promoted as well. If not, we won't be able
3862 // to get rid of the control flow, so it's not worth promoting to select
3863 // instructions.
3864 for (BasicBlock *IfBlock : IfBlocks)
3865 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3866 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3867 // This is not an aggressive instruction that we can promote.
3868 // Because of this, we won't be able to get rid of the control flow, so
3869 // the xform is not worth it.
3870 return Changed;
3871 }
3872
3873 // If either of the blocks has it's address taken, we can't do this fold.
3874 if (any_of(IfBlocks,
3875 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3876 return Changed;
3877
3878 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3879 if (IsUnpredictable) dbgs() << " (unpredictable)";
3880 dbgs() << " T: " << IfTrue->getName()
3881 << " F: " << IfFalse->getName() << "\n");
3882
3883 // If we can still promote the PHI nodes after this gauntlet of tests,
3884 // do all of the PHI's now.
3885
3886 // Move all 'aggressive' instructions, which are defined in the
3887 // conditional parts of the if's up to the dominating block.
3888 for (BasicBlock *IfBlock : IfBlocks)
3889 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3890
3891 IRBuilder<NoFolder> Builder(DomBI);
3892 // Propagate fast-math-flags from phi nodes to replacement selects.
3893 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3894 // Change the PHI node into a select instruction.
3895 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3896 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3897
3898 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3899 isa<FPMathOperator>(PN) ? PN : nullptr,
3900 "", DomBI);
3901 PN->replaceAllUsesWith(Sel);
3902 Sel->takeName(PN);
3903 PN->eraseFromParent();
3904 }
3905
3906 // At this point, all IfBlocks are empty, so our if statement
3907 // has been flattened. Change DomBlock to jump directly to our new block to
3908 // avoid other simplifycfg's kicking in on the diamond.
3909 Builder.CreateBr(BB);
3910
3912 if (DTU) {
3913 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3914 for (auto *Successor : successors(DomBlock))
3915 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3916 }
3917
3918 DomBI->eraseFromParent();
3919 if (DTU)
3920 DTU->applyUpdates(Updates);
3921
3922 return true;
3923}
3924
3927 Value *RHS, const Twine &Name = "") {
3928 // Try to relax logical op to binary op.
3929 if (impliesPoison(RHS, LHS))
3930 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3931 if (Opc == Instruction::And)
3932 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3933 if (Opc == Instruction::Or)
3934 return Builder.CreateLogicalOr(LHS, RHS, Name);
3935 llvm_unreachable("Invalid logical opcode");
3936}
3937
3938/// Return true if either PBI or BI has branch weight available, and store
3939/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3940/// not have branch weight, use 1:1 as its weight.
3942 uint64_t &PredTrueWeight,
3943 uint64_t &PredFalseWeight,
3944 uint64_t &SuccTrueWeight,
3945 uint64_t &SuccFalseWeight) {
3946 bool PredHasWeights =
3947 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3948 bool SuccHasWeights =
3949 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3950 if (PredHasWeights || SuccHasWeights) {
3951 if (!PredHasWeights)
3952 PredTrueWeight = PredFalseWeight = 1;
3953 if (!SuccHasWeights)
3954 SuccTrueWeight = SuccFalseWeight = 1;
3955 return true;
3956 } else {
3957 return false;
3958 }
3959}
3960
3961/// Determine if the two branches share a common destination and deduce a glue
3962/// that joins the branches' conditions to arrive at the common destination if
3963/// that would be profitable.
3964static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3966 const TargetTransformInfo *TTI) {
3967 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3968 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3969 "PredBB must be a predecessor of BB.");
3970
3971 // We have the potential to fold the conditions together, but if the
3972 // predecessor branch is predictable, we may not want to merge them.
3973 uint64_t PTWeight, PFWeight;
3974 BranchProbability PBITrueProb, Likely;
3975 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3976 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3977 (PTWeight + PFWeight) != 0) {
3978 PBITrueProb =
3979 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3980 Likely = TTI->getPredictableBranchThreshold();
3981 }
3982
3983 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3984 // Speculate the 2nd condition unless the 1st is probably true.
3985 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3986 return {{BI->getSuccessor(0), Instruction::Or, false}};
3987 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3988 // Speculate the 2nd condition unless the 1st is probably false.
3989 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3990 return {{BI->getSuccessor(1), Instruction::And, false}};
3991 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3992 // Speculate the 2nd condition unless the 1st is probably true.
3993 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3994 return {{BI->getSuccessor(1), Instruction::And, true}};
3995 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3996 // Speculate the 2nd condition unless the 1st is probably false.
3997 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3998 return {{BI->getSuccessor(0), Instruction::Or, true}};
3999 }
4000 return std::nullopt;
4001}
4002
4004 DomTreeUpdater *DTU,
4005 MemorySSAUpdater *MSSAU,
4006 const TargetTransformInfo *TTI) {
4007 BasicBlock *BB = BI->getParent();
4008 BasicBlock *PredBlock = PBI->getParent();
4009
4010 // Determine if the two branches share a common destination.
4011 BasicBlock *CommonSucc;
4013 bool InvertPredCond;
4014 std::tie(CommonSucc, Opc, InvertPredCond) =
4016
4017 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4018
4019 IRBuilder<> Builder(PBI);
4020 // The builder is used to create instructions to eliminate the branch in BB.
4021 // If BB's terminator has !annotation metadata, add it to the new
4022 // instructions.
4023 Builder.CollectMetadataToCopy(BB->getTerminator(),
4024 {LLVMContext::MD_annotation});
4025
4026 // If we need to invert the condition in the pred block to match, do so now.
4027 if (InvertPredCond) {
4028 InvertBranch(PBI, Builder);
4029 }
4030
4031 BasicBlock *UniqueSucc =
4032 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4033
4034 // Before cloning instructions, notify the successor basic block that it
4035 // is about to have a new predecessor. This will update PHI nodes,
4036 // which will allow us to update live-out uses of bonus instructions.
4037 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4038
4039 // Try to update branch weights.
4040 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4041 SmallVector<uint64_t, 2> MDWeights;
4042 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4043 SuccTrueWeight, SuccFalseWeight)) {
4044
4045 if (PBI->getSuccessor(0) == BB) {
4046 // PBI: br i1 %x, BB, FalseDest
4047 // BI: br i1 %y, UniqueSucc, FalseDest
4048 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4049 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4050 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4051 // TrueWeight for PBI * FalseWeight for BI.
4052 // We assume that total weights of a CondBrInst can fit into 32 bits.
4053 // Therefore, we will not have overflow using 64-bit arithmetic.
4054 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4055 PredTrueWeight * SuccFalseWeight);
4056 } else {
4057 // PBI: br i1 %x, TrueDest, BB
4058 // BI: br i1 %y, TrueDest, UniqueSucc
4059 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4060 // FalseWeight for PBI * TrueWeight for BI.
4061 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4062 PredFalseWeight * SuccTrueWeight);
4063 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4064 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4065 }
4066
4067 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4068 /*ElideAllZero=*/true);
4069
4070 // TODO: If BB is reachable from all paths through PredBlock, then we
4071 // could replace PBI's branch probabilities with BI's.
4072 } else
4073 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4074
4075 // Now, update the CFG.
4076 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4077
4078 if (DTU)
4079 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4080 {DominatorTree::Delete, PredBlock, BB}});
4081
4082 // If BI was a loop latch, it may have had associated loop metadata.
4083 // We need to copy it to the new latch, that is, PBI.
4084 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4085 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4086
4087 ValueToValueMapTy VMap; // maps original values to cloned values
4089
4090 Module *M = BB->getModule();
4091
4092 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4093 for (DbgVariableRecord &DVR :
4095 RemapDbgRecord(M, &DVR, VMap,
4097 }
4098
4099 // Now that the Cond was cloned into the predecessor basic block,
4100 // or/and the two conditions together.
4101 Value *BICond = VMap[BI->getCondition()];
4102 PBI->setCondition(
4103 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4105 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4106 if (!MDWeights.empty()) {
4107 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4108 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4109 /*IsExpected=*/false, /*ElideAllZero=*/true);
4110 }
4111
4112 ++NumFoldBranchToCommonDest;
4113 return true;
4114}
4115
4116/// Return if an instruction's type or any of its operands' types are a vector
4117/// type.
4118static bool isVectorOp(Instruction &I) {
4119 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4120 return U->getType()->isVectorTy();
4121 });
4122}
4123
4124/// If this basic block is simple enough, and if a predecessor branches to us
4125/// and one of our successors, fold the block into the predecessor and use
4126/// logical operations to pick the right destination.
4128 MemorySSAUpdater *MSSAU,
4129 const TargetTransformInfo *TTI,
4130 AssumptionCache *AC,
4131 unsigned BonusInstThreshold) {
4132 BasicBlock *BB = BI->getParent();
4136
4138
4140 Cond->getParent() != BB || !Cond->hasOneUse())
4141 return false;
4142
4143 // Finally, don't infinitely unroll conditional loops.
4144 if (is_contained(successors(BB), BB))
4145 return false;
4146
4147 // With which predecessors will we want to deal with?
4149 for (BasicBlock *PredBlock : predecessors(BB)) {
4150 CondBrInst *PBI = dyn_cast<CondBrInst>(PredBlock->getTerminator());
4151
4152 // Check that we have two conditional branches. If there is a PHI node in
4153 // the common successor, verify that the same value flows in from both
4154 // blocks.
4155 if (!PBI || !safeToMergeTerminators(BI, PBI))
4156 continue;
4157
4158 // Determine if the two branches share a common destination.
4159 BasicBlock *CommonSucc;
4161 bool InvertPredCond;
4162 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4163 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4164 else
4165 continue;
4166
4167 // Check the cost of inserting the necessary logic before performing the
4168 // transformation.
4169 if (TTI) {
4170 Type *Ty = BI->getCondition()->getType();
4171 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4172 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4173 !isa<CmpInst>(PBI->getCondition())))
4174 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4175
4177 continue;
4178 }
4179
4180 // Ok, we do want to deal with this predecessor. Record it.
4181 Preds.emplace_back(PredBlock);
4182 }
4183
4184 // If there aren't any predecessors into which we can fold,
4185 // don't bother checking the cost.
4186 if (Preds.empty())
4187 return false;
4188
4189 // Only allow this transformation if computing the condition doesn't involve
4190 // too many instructions and these involved instructions can be executed
4191 // unconditionally. We denote all involved instructions except the condition
4192 // as "bonus instructions", and only allow this transformation when the
4193 // number of the bonus instructions we'll need to create when cloning into
4194 // each predecessor does not exceed a certain threshold.
4195 unsigned NumBonusInsts = 0;
4196 bool SawVectorOp = false;
4197 const unsigned PredCount = Preds.size();
4198 // Speculated instructions will be inserted before the terminator of the
4199 // predecessor. Only handle the simple case of one predecessor.
4200 const Instruction *CxtI =
4201 PredCount == 1 ? Preds[0]->getTerminator() : nullptr;
4202 for (Instruction &I : *BB) {
4203 // Don't check the branch condition comparison itself.
4204 if (&I == Cond)
4205 continue;
4206 // Ignore the terminator.
4208 continue;
4209 // I must be safe to execute unconditionally.
4210 if (!isSafeToSpeculativelyExecute(&I, CxtI, AC))
4211 return false;
4212 SawVectorOp |= isVectorOp(I);
4213
4214 // Account for the cost of duplicating this instruction into each
4215 // predecessor. Ignore free instructions.
4216 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4218 NumBonusInsts += PredCount;
4219
4220 // Early exits once we reach the limit.
4221 if (NumBonusInsts >
4222 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4223 return false;
4224 }
4225
4226 auto IsBCSSAUse = [BB, &I](Use &U) {
4227 auto *UI = cast<Instruction>(U.getUser());
4228 if (auto *PN = dyn_cast<PHINode>(UI))
4229 return PN->getIncomingBlock(U) == BB;
4230 return UI->getParent() == BB && I.comesBefore(UI);
4231 };
4232
4233 // Does this instruction require rewriting of uses?
4234 if (!all_of(I.uses(), IsBCSSAUse))
4235 return false;
4236 }
4237 if (NumBonusInsts >
4238 BonusInstThreshold *
4239 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4240 return false;
4241
4242 // Ok, we have the budget. Perform the transformation.
4243 for (BasicBlock *PredBlock : Preds) {
4244 auto *PBI = cast<CondBrInst>(PredBlock->getTerminator());
4245 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4246 }
4247 return false;
4248}
4249
4250// If there is only one store in BB1 and BB2, return it, otherwise return
4251// nullptr.
4253 StoreInst *S = nullptr;
4254 for (auto *BB : {BB1, BB2}) {
4255 if (!BB)
4256 continue;
4257 for (auto &I : *BB)
4258 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4259 if (S)
4260 // Multiple stores seen.
4261 return nullptr;
4262 else
4263 S = SI;
4264 }
4265 }
4266 return S;
4267}
4268
4270 Value *AlternativeV = nullptr) {
4271 // PHI is going to be a PHI node that allows the value V that is defined in
4272 // BB to be referenced in BB's only successor.
4273 //
4274 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4275 // doesn't matter to us what the other operand is (it'll never get used). We
4276 // could just create a new PHI with an undef incoming value, but that could
4277 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4278 // other PHI. So here we directly look for some PHI in BB's successor with V
4279 // as an incoming operand. If we find one, we use it, else we create a new
4280 // one.
4281 //
4282 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4283 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4284 // where OtherBB is the single other predecessor of BB's only successor.
4285 PHINode *PHI = nullptr;
4286 BasicBlock *Succ = BB->getSingleSuccessor();
4287
4288 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4289 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4290 PHI = cast<PHINode>(I);
4291 if (!AlternativeV)
4292 break;
4293
4294 assert(Succ->hasNPredecessors(2));
4295 auto PredI = pred_begin(Succ);
4296 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4297 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4298 break;
4299 PHI = nullptr;
4300 }
4301 if (PHI)
4302 return PHI;
4303
4304 // If V is not an instruction defined in BB, just return it.
4305 if (!AlternativeV &&
4306 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4307 return V;
4308
4309 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4310 PHI->insertBefore(Succ->begin());
4311 PHI->addIncoming(V, BB);
4312 for (BasicBlock *PredBB : predecessors(Succ))
4313 if (PredBB != BB)
4314 PHI->addIncoming(
4315 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4316 return PHI;
4317}
4318
4320 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4321 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4322 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4323 // For every pointer, there must be exactly two stores, one coming from
4324 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4325 // store (to any address) in PTB,PFB or QTB,QFB.
4326 // FIXME: We could relax this restriction with a bit more work and performance
4327 // testing.
4328 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4329 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4330 if (!PStore || !QStore)
4331 return false;
4332
4333 // Now check the stores are compatible.
4334 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4335 PStore->getOrdering() != QStore->getOrdering() ||
4336 PStore->getSyncScopeID() != QStore->getSyncScopeID() ||
4337 PStore->getValueOperand()->getType() !=
4338 QStore->getValueOperand()->getType())
4339 return false;
4340
4341 // Check that sinking the store won't cause program behavior changes. Sinking
4342 // the store out of the Q blocks won't change any behavior as we're sinking
4343 // from a block to its unconditional successor. But we're moving a store from
4344 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4345 // So we need to check that there are no aliasing loads or stores in
4346 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4347 // operations between PStore and the end of its parent block.
4348 //
4349 // The ideal way to do this is to query AliasAnalysis, but we don't
4350 // preserve AA currently so that is dangerous. Be super safe and just
4351 // check there are no other memory operations at all.
4352 for (auto &I : *QFB->getSinglePredecessor())
4353 if (I.mayReadOrWriteMemory())
4354 return false;
4355 for (auto &I : *QFB)
4356 if (&I != QStore && I.mayReadOrWriteMemory())
4357 return false;
4358 if (QTB)
4359 for (auto &I : *QTB)
4360 if (&I != QStore && I.mayReadOrWriteMemory())
4361 return false;
4362 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4363 I != E; ++I)
4364 if (&*I != PStore && I->mayReadOrWriteMemory())
4365 return false;
4366
4367 // If we're not in aggressive mode, we only optimize if we have some
4368 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4369 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4370 if (!BB)
4371 return true;
4372 // Heuristic: if the block can be if-converted/phi-folded and the
4373 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4374 // thread this store.
4375 InstructionCost Cost = 0;
4376 InstructionCost Budget =
4378 for (auto &I : *BB) {
4379 // Consider terminator instruction to be free.
4380 if (I.isTerminator())
4381 continue;
4382 // If this is one the stores that we want to speculate out of this BB,
4383 // then don't count it's cost, consider it to be free.
4384 if (auto *S = dyn_cast<StoreInst>(&I))
4385 if (llvm::find(FreeStores, S))
4386 continue;
4387 // Else, we have a white-list of instructions that we are ak speculating.
4389 return false; // Not in white-list - not worthwhile folding.
4390 // And finally, if this is a non-free instruction that we are okay
4391 // speculating, ensure that we consider the speculation budget.
4392 Cost +=
4393 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4394 if (Cost > Budget)
4395 return false; // Eagerly refuse to fold as soon as we're out of budget.
4396 }
4397 assert(Cost <= Budget &&
4398 "When we run out of budget we will eagerly return from within the "
4399 "per-instruction loop.");
4400 return true;
4401 };
4402
4403 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4405 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4406 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4407 return false;
4408
4409 // If PostBB has more than two predecessors, we need to split it so we can
4410 // sink the store.
4411 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4412 // We know that QFB's only successor is PostBB. And QFB has a single
4413 // predecessor. If QTB exists, then its only successor is also PostBB.
4414 // If QTB does not exist, then QFB's only predecessor has a conditional
4415 // branch to QFB and PostBB.
4416 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4417 BasicBlock *NewBB =
4418 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4419 if (!NewBB)
4420 return false;
4421 PostBB = NewBB;
4422 }
4423
4424 // OK, we're going to sink the stores to PostBB. The store has to be
4425 // conditional though, so first create the predicate.
4426 CondBrInst *PBranch =
4428 CondBrInst *QBranch =
4430 Value *PCond = PBranch->getCondition();
4431 Value *QCond = QBranch->getCondition();
4432
4434 PStore->getParent());
4436 QStore->getParent(), PPHI);
4437
4438 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4439 IRBuilder<> QB(PostBB, PostBBFirst);
4440 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4441
4442 InvertPCond ^= (PStore->getParent() != PTB);
4443 InvertQCond ^= (QStore->getParent() != QTB);
4444 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4445 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4446
4447 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4448
4449 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4450 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4451 /*Unreachable=*/false,
4452 /*BranchWeights=*/nullptr, DTU);
4453 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4455 SmallVector<uint32_t, 2> PWeights, QWeights;
4456 extractBranchWeights(*PBranch, PWeights);
4457 extractBranchWeights(*QBranch, QWeights);
4458 if (InvertPCond)
4459 std::swap(PWeights[0], PWeights[1]);
4460 if (InvertQCond)
4461 std::swap(QWeights[0], QWeights[1]);
4462 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4464 {CombinedWeights[0], CombinedWeights[1]},
4465 /*IsExpected=*/false, /*ElideAllZero=*/true);
4466 }
4467
4468 QB.SetInsertPoint(T);
4469 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4470 combineMetadataForCSE(QStore, PStore, true);
4471 SI->copyMetadata(*QStore);
4472 // Update any dbg.assign intrinsics to track the merged value (QPHI) instead
4473 // of the original constant values, likely making these identical.
4474 for (auto *DbgAssign : at::getDVRAssignmentMarkers(SI)) {
4475 if (llvm::is_contained(DbgAssign->location_ops(),
4476 PStore->getValueOperand()))
4477 DbgAssign->replaceVariableLocationOp(PStore->getValueOperand(), QPHI);
4478 if (llvm::is_contained(DbgAssign->location_ops(),
4479 QStore->getValueOperand()))
4480 DbgAssign->replaceVariableLocationOp(QStore->getValueOperand(), QPHI);
4481 }
4482
4483 // Choose the minimum alignment. If we could prove both stores execute, we
4484 // could use biggest one. In this case, though, we only know that one of the
4485 // stores executes. And we don't know it's safe to take the alignment from a
4486 // store that doesn't execute.
4487 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4488
4489 if (QStore->isAtomic())
4490 SI->setAtomic(QStore->getOrdering(), QStore->getSyncScopeID());
4491
4492 QStore->eraseFromParent();
4493 PStore->eraseFromParent();
4494
4495 return true;
4496}
4497
4499 DomTreeUpdater *DTU, const DataLayout &DL,
4500 const TargetTransformInfo &TTI) {
4501 // The intention here is to find diamonds or triangles (see below) where each
4502 // conditional block contains a store to the same address. Both of these
4503 // stores are conditional, so they can't be unconditionally sunk. But it may
4504 // be profitable to speculatively sink the stores into one merged store at the
4505 // end, and predicate the merged store on the union of the two conditions of
4506 // PBI and QBI.
4507 //
4508 // This can reduce the number of stores executed if both of the conditions are
4509 // true, and can allow the blocks to become small enough to be if-converted.
4510 // This optimization will also chain, so that ladders of test-and-set
4511 // sequences can be if-converted away.
4512 //
4513 // We only deal with simple diamonds or triangles:
4514 //
4515 // PBI or PBI or a combination of the two
4516 // / \ | \
4517 // PTB PFB | PFB
4518 // \ / | /
4519 // QBI QBI
4520 // / \ | \
4521 // QTB QFB | QFB
4522 // \ / | /
4523 // PostBB PostBB
4524 //
4525 // We model triangles as a type of diamond with a nullptr "true" block.
4526 // Triangles are canonicalized so that the fallthrough edge is represented by
4527 // a true condition, as in the diagram above.
4528 BasicBlock *PTB = PBI->getSuccessor(0);
4529 BasicBlock *PFB = PBI->getSuccessor(1);
4530 BasicBlock *QTB = QBI->getSuccessor(0);
4531 BasicBlock *QFB = QBI->getSuccessor(1);
4532 BasicBlock *PostBB = QFB->getSingleSuccessor();
4533
4534 // Make sure we have a good guess for PostBB. If QTB's only successor is
4535 // QFB, then QFB is a better PostBB.
4536 if (QTB->getSingleSuccessor() == QFB)
4537 PostBB = QFB;
4538
4539 // If we couldn't find a good PostBB, stop.
4540 if (!PostBB)
4541 return false;
4542
4543 bool InvertPCond = false, InvertQCond = false;
4544 // Canonicalize fallthroughs to the true branches.
4545 if (PFB == QBI->getParent()) {
4546 std::swap(PFB, PTB);
4547 InvertPCond = true;
4548 }
4549 if (QFB == PostBB) {
4550 std::swap(QFB, QTB);
4551 InvertQCond = true;
4552 }
4553
4554 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4555 // and QFB may not. Model fallthroughs as a nullptr block.
4556 if (PTB == QBI->getParent())
4557 PTB = nullptr;
4558 if (QTB == PostBB)
4559 QTB = nullptr;
4560
4561 // Legality bailouts. We must have at least the non-fallthrough blocks and
4562 // the post-dominating block, and the non-fallthroughs must only have one
4563 // predecessor.
4564 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4565 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4566 };
4567 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4568 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4569 return false;
4570 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4571 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4572 return false;
4573 if (!QBI->getParent()->hasNUses(2))
4574 return false;
4575
4576 // OK, this is a sequence of two diamonds or triangles.
4577 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4578 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4579 for (auto *BB : {PTB, PFB}) {
4580 if (!BB)
4581 continue;
4582 for (auto &I : *BB)
4584 PStoreAddresses.insert(SI->getPointerOperand());
4585 }
4586 for (auto *BB : {QTB, QFB}) {
4587 if (!BB)
4588 continue;
4589 for (auto &I : *BB)
4591 QStoreAddresses.insert(SI->getPointerOperand());
4592 }
4593
4594 set_intersect(PStoreAddresses, QStoreAddresses);
4595 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4596 // clear what it contains.
4597 auto &CommonAddresses = PStoreAddresses;
4598
4599 bool Changed = false;
4600 for (auto *Address : CommonAddresses)
4601 Changed |=
4602 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4603 InvertPCond, InvertQCond, DTU, DL, TTI);
4604 return Changed;
4605}
4606
4607/// If the previous block ended with a widenable branch, determine if reusing
4608/// the target block is profitable and legal. This will have the effect of
4609/// "widening" PBI, but doesn't require us to reason about hosting safety.
4611 DomTreeUpdater *DTU) {
4612 // TODO: This can be generalized in two important ways:
4613 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4614 // values from the PBI edge.
4615 // 2) We can sink side effecting instructions into BI's fallthrough
4616 // successor provided they doesn't contribute to computation of
4617 // BI's condition.
4618 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4619 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4620 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4621 !BI->getParent()->getSinglePredecessor())
4622 return false;
4623 if (!IfFalseBB->phis().empty())
4624 return false; // TODO
4625 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4626 // may undo the transform done here.
4627 // TODO: There might be a more fine-grained solution to this.
4628 if (!llvm::succ_empty(IfFalseBB))
4629 return false;
4630 // Use lambda to lazily compute expensive condition after cheap ones.
4631 auto NoSideEffects = [](BasicBlock &BB) {
4632 return llvm::none_of(BB, [](const Instruction &I) {
4633 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4634 });
4635 };
4636 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4637 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4638 NoSideEffects(*BI->getParent())) {
4639 auto *OldSuccessor = BI->getSuccessor(1);
4640 OldSuccessor->removePredecessor(BI->getParent());
4641 BI->setSuccessor(1, IfFalseBB);
4642 if (DTU)
4643 DTU->applyUpdates(
4644 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4645 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4646 return true;
4647 }
4648 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4649 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4650 NoSideEffects(*BI->getParent())) {
4651 auto *OldSuccessor = BI->getSuccessor(0);
4652 OldSuccessor->removePredecessor(BI->getParent());
4653 BI->setSuccessor(0, IfFalseBB);
4654 if (DTU)
4655 DTU->applyUpdates(
4656 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4657 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4658 return true;
4659 }
4660 return false;
4661}
4662
4663/// If we have a conditional branch as a predecessor of another block,
4664/// this function tries to simplify it. We know
4665/// that PBI and BI are both conditional branches, and BI is in one of the
4666/// successor blocks of PBI - PBI branches to BI.
4668 DomTreeUpdater *DTU,
4669 const DataLayout &DL,
4670 const TargetTransformInfo &TTI) {
4671 BasicBlock *BB = BI->getParent();
4672
4673 // If this block ends with a branch instruction, and if there is a
4674 // predecessor that ends on a branch of the same condition, make
4675 // this conditional branch redundant.
4676 if (PBI->getCondition() == BI->getCondition() &&
4677 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4678 // Okay, the outcome of this conditional branch is statically
4679 // knowable. If this block had a single pred, handle specially, otherwise
4680 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4681 if (BB->getSinglePredecessor()) {
4682 // Turn this into a branch on constant.
4683 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4684 BI->setCondition(
4685 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4686 return true; // Nuke the branch on constant.
4687 }
4688 }
4689
4690 // If the previous block ended with a widenable branch, determine if reusing
4691 // the target block is profitable and legal. This will have the effect of
4692 // "widening" PBI, but doesn't require us to reason about hosting safety.
4693 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4694 return true;
4695
4696 // If both branches are conditional and both contain stores to the same
4697 // address, remove the stores from the conditionals and create a conditional
4698 // merged store at the end.
4699 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4700 return true;
4701
4702 // If this is a conditional branch in an empty block, and if any
4703 // predecessors are a conditional branch to one of our destinations,
4704 // fold the conditions into logical ops and one cond br.
4705
4706 // Ignore dbg intrinsics.
4707 if (&*BB->begin() != BI)
4708 return false;
4709
4710 int PBIOp, BIOp;
4711 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4712 PBIOp = 0;
4713 BIOp = 0;
4714 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4715 PBIOp = 0;
4716 BIOp = 1;
4717 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4718 PBIOp = 1;
4719 BIOp = 0;
4720 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4721 PBIOp = 1;
4722 BIOp = 1;
4723 } else {
4724 return false;
4725 }
4726
4727 // Check to make sure that the other destination of this branch
4728 // isn't BB itself. If so, this is an infinite loop that will
4729 // keep getting unwound.
4730 if (PBI->getSuccessor(PBIOp) == BB)
4731 return false;
4732
4733 // If predecessor's branch probability to BB is too low don't merge branches.
4734 SmallVector<uint32_t, 2> PredWeights;
4735 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4736 extractBranchWeights(*PBI, PredWeights) &&
4737 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4738
4740 PredWeights[PBIOp],
4741 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4742
4743 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4744 if (CommonDestProb >= Likely)
4745 return false;
4746 }
4747
4748 // Do not perform this transformation if it would require
4749 // insertion of a large number of select instructions. For targets
4750 // without predication/cmovs, this is a big pessimization.
4751
4752 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4753 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4754 unsigned NumPhis = 0;
4755 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4756 ++II, ++NumPhis) {
4757 if (NumPhis > 2) // Disable this xform.
4758 return false;
4759 }
4760
4761 // Finally, if everything is ok, fold the branches to logical ops.
4762 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4763
4764 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4765 << "AND: " << *BI->getParent());
4766
4768
4769 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4770 // branch in it, where one edge (OtherDest) goes back to itself but the other
4771 // exits. We don't *know* that the program avoids the infinite loop
4772 // (even though that seems likely). If we do this xform naively, we'll end up
4773 // recursively unpeeling the loop. Since we know that (after the xform is
4774 // done) that the block *is* infinite if reached, we just make it an obviously
4775 // infinite loop with no cond branch.
4776 if (OtherDest == BB) {
4777 // Insert it at the end of the function, because it's either code,
4778 // or it won't matter if it's hot. :)
4779 BasicBlock *InfLoopBlock =
4780 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4781 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
4782 if (DTU)
4783 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4784 OtherDest = InfLoopBlock;
4785 }
4786
4787 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4788
4789 // BI may have other predecessors. Because of this, we leave
4790 // it alone, but modify PBI.
4791
4792 // Make sure we get to CommonDest on True&True directions.
4793 Value *PBICond = PBI->getCondition();
4794 IRBuilder<NoFolder> Builder(PBI);
4795 if (PBIOp)
4796 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4797
4798 Value *BICond = BI->getCondition();
4799 if (BIOp)
4800 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4801
4802 // Merge the conditions.
4803 Value *Cond =
4804 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4805
4806 // Modify PBI to branch on the new condition to the new dests.
4807 PBI->setCondition(Cond);
4808 PBI->setSuccessor(0, CommonDest);
4809 PBI->setSuccessor(1, OtherDest);
4810
4811 if (DTU) {
4812 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4813 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4814
4815 DTU->applyUpdates(Updates);
4816 }
4817
4818 // Update branch weight for PBI.
4819 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4820 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4821 bool HasWeights =
4822 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4823 SuccTrueWeight, SuccFalseWeight);
4824 if (HasWeights) {
4825 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4826 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4827 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4828 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4829 // The weight to CommonDest should be PredCommon * SuccTotal +
4830 // PredOther * SuccCommon.
4831 // The weight to OtherDest should be PredOther * SuccOther.
4832 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4833 PredOther * SuccCommon,
4834 PredOther * SuccOther};
4835
4836 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4837 /*ElideAllZero=*/true);
4838 // Cond may be a select instruction with the first operand set to "true", or
4839 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4841 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4842 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4843 // The select is predicated on PBICond
4844 assert(SI->getCondition() == PBICond);
4845 // The corresponding probabilities are what was referred to above as
4846 // PredCommon and PredOther.
4847 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4848 /*IsExpected=*/false, /*ElideAllZero=*/true);
4849 }
4850 }
4851
4852 // OtherDest may have phi nodes. If so, add an entry from PBI's
4853 // block that are identical to the entries for BI's block.
4854 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4855
4856 // We know that the CommonDest already had an edge from PBI to
4857 // it. If it has PHIs though, the PHIs may have different
4858 // entries for BB and PBI's BB. If so, insert a select to make
4859 // them agree.
4860 for (PHINode &PN : CommonDest->phis()) {
4861 Value *BIV = PN.getIncomingValueForBlock(BB);
4862 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4863 Value *PBIV = PN.getIncomingValue(PBBIdx);
4864 if (BIV != PBIV) {
4865 // Insert a select in PBI to pick the right value.
4867 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4868 PN.setIncomingValue(PBBIdx, NV);
4869 // The select has the same condition as PBI, in the same BB. The
4870 // probabilities don't change.
4871 if (HasWeights) {
4872 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4873 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4874 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4875 /*IsExpected=*/false, /*ElideAllZero=*/true);
4876 }
4877 }
4878 }
4879
4880 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4881 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4882
4883 // This basic block is probably dead. We know it has at least
4884 // one fewer predecessor.
4885 return true;
4886}
4887
4888// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4889// true or to FalseBB if Cond is false.
4890// Takes care of updating the successors and removing the old terminator.
4891// Also makes sure not to introduce new successors by assuming that edges to
4892// non-successor TrueBBs and FalseBBs aren't reachable.
4893bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4894 Value *Cond, BasicBlock *TrueBB,
4895 BasicBlock *FalseBB,
4896 uint32_t TrueWeight,
4897 uint32_t FalseWeight) {
4898 auto *BB = OldTerm->getParent();
4899 // Remove any superfluous successor edges from the CFG.
4900 // First, figure out which successors to preserve.
4901 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4902 // successor.
4903 BasicBlock *KeepEdge1 = TrueBB;
4904 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4905
4906 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4907
4908 // Then remove the rest.
4909 for (BasicBlock *Succ : successors(OldTerm)) {
4910 // Make sure only to keep exactly one copy of each edge.
4911 if (Succ == KeepEdge1)
4912 KeepEdge1 = nullptr;
4913 else if (Succ == KeepEdge2)
4914 KeepEdge2 = nullptr;
4915 else {
4916 Succ->removePredecessor(BB,
4917 /*KeepOneInputPHIs=*/true);
4918
4919 if (Succ != TrueBB && Succ != FalseBB)
4920 RemovedSuccessors.insert(Succ);
4921 }
4922 }
4923
4924 IRBuilder<> Builder(OldTerm);
4925 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4926
4927 // Insert an appropriate new terminator.
4928 if (!KeepEdge1 && !KeepEdge2) {
4929 if (TrueBB == FalseBB) {
4930 // We were only looking for one successor, and it was present.
4931 // Create an unconditional branch to it.
4932 Builder.CreateBr(TrueBB);
4933 } else {
4934 // We found both of the successors we were looking for.
4935 // Create a conditional branch sharing the condition of the select.
4936 CondBrInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4937 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4938 /*IsExpected=*/false, /*ElideAllZero=*/true);
4939 }
4940 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4941 // Neither of the selected blocks were successors, so this
4942 // terminator must be unreachable.
4943 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4944 } else {
4945 // One of the selected values was a successor, but the other wasn't.
4946 // Insert an unconditional branch to the one that was found;
4947 // the edge to the one that wasn't must be unreachable.
4948 if (!KeepEdge1) {
4949 // Only TrueBB was found.
4950 Builder.CreateBr(TrueBB);
4951 } else {
4952 // Only FalseBB was found.
4953 Builder.CreateBr(FalseBB);
4954 }
4955 }
4956
4958
4959 if (DTU) {
4960 SmallVector<DominatorTree::UpdateType, 2> Updates;
4961 Updates.reserve(RemovedSuccessors.size());
4962 for (auto *RemovedSuccessor : RemovedSuccessors)
4963 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4964 DTU->applyUpdates(Updates);
4965 }
4966
4967 return true;
4968}
4969
4970// Replaces
4971// (switch (select cond, X, Y)) on constant X, Y
4972// with a branch - conditional if X and Y lead to distinct BBs,
4973// unconditional otherwise.
4974bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4975 SelectInst *Select) {
4976 // Check for constant integer values in the select.
4977 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4978 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4979 if (!TrueVal || !FalseVal)
4980 return false;
4981
4982 // Find the relevant condition and destinations.
4983 Value *Condition = Select->getCondition();
4984 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4985 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4986
4987 // Get weight for TrueBB and FalseBB.
4988 uint32_t TrueWeight = 0, FalseWeight = 0;
4989 SmallVector<uint64_t, 8> Weights;
4990 bool HasWeights = hasBranchWeightMD(*SI);
4991 if (HasWeights) {
4992 getBranchWeights(SI, Weights);
4993 if (Weights.size() == 1 + SI->getNumCases()) {
4994 TrueWeight =
4995 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4996 FalseWeight =
4997 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4998 }
4999 }
5000
5001 // Perform the actual simplification.
5002 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
5003 FalseWeight);
5004}
5005
5006// Replaces
5007// (indirectbr (select cond, blockaddress(@fn, BlockA),
5008// blockaddress(@fn, BlockB)))
5009// with
5010// (br cond, BlockA, BlockB).
5011bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5012 SelectInst *SI) {
5013 // Check that both operands of the select are block addresses.
5014 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5015 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5016 if (!TBA || !FBA)
5017 return false;
5018
5019 // Extract the actual blocks.
5020 BasicBlock *TrueBB = TBA->getBasicBlock();
5021 BasicBlock *FalseBB = FBA->getBasicBlock();
5022
5023 // The select's profile becomes the profile of the conditional branch that
5024 // replaces the indirect branch.
5025 SmallVector<uint32_t> SelectBranchWeights(2);
5027 extractBranchWeights(*SI, SelectBranchWeights);
5028 // Perform the actual simplification.
5029 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5030 SelectBranchWeights[0],
5031 SelectBranchWeights[1]);
5032}
5033
5034/// This is called when we find an icmp instruction
5035/// (a seteq/setne with a constant) as the only instruction in a
5036/// block that ends with an uncond branch. We are looking for a very specific
5037/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5038/// this case, we merge the first two "or's of icmp" into a switch, but then the
5039/// default value goes to an uncond block with a seteq in it, we get something
5040/// like:
5041///
5042/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5043/// DEFAULT:
5044/// %tmp = icmp eq i8 %A, 92
5045/// br label %end
5046/// end:
5047/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5048///
5049/// We prefer to split the edge to 'end' so that there is a true/false entry to
5050/// the PHI, merging the third icmp into the switch.
5051bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5052 ICmpInst *ICI, IRBuilder<> &Builder) {
5053 // Select == nullptr means we assume that there is a hidden no-op select
5054 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5055 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5056}
5057
5058/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5059/// case. This is called when we find an icmp instruction (a seteq/setne with a
5060/// constant) and its following select instruction as the only TWO instructions
5061/// in a block that ends with an uncond branch. We are looking for a very
5062/// specific pattern that occurs when "
5063/// if (A == 1) return C1;
5064/// if (A == 2) return C2;
5065/// if (A < 3) return C3;
5066/// return C4;
5067/// " gets simplified. In this case, we merge the first two "branches of icmp"
5068/// into a switch, but then the default value goes to an uncond block with a lt
5069/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5070/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5071/// get something like:
5072///
5073/// case1:
5074/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5075/// case2:
5076/// br label %end
5077/// DEFAULT:
5078/// %tmp = icmp eq i8 %A, 2
5079/// %val = select i1 %tmp, i8 C3, i8 C4
5080/// br label %end
5081/// end:
5082/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5083///
5084/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5085/// to the PHI, merging the icmp & select into the switch, as follows:
5086///
5087/// case1:
5088/// switch i8 %A, label %DEFAULT [
5089/// i8 0, label %end
5090/// i8 1, label %case2
5091/// i8 2, label %case3
5092/// ]
5093/// case2:
5094/// br label %end
5095/// case3:
5096/// br label %end
5097/// DEFAULT:
5098/// br label %end
5099/// end:
5100/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5101bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5102 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5103 BasicBlock *BB = ICI->getParent();
5104
5105 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5106 // too complex.
5107 /// TODO: support multi-phis in succ BB of select's BB.
5108 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5109 (Select && !Select->hasOneUse()))
5110 return false;
5111
5112 // The pattern we're looking for is where our only predecessor is a switch on
5113 // 'V' and this block is the default case for the switch. In this case we can
5114 // fold the compared value into the switch to simplify things.
5115 BasicBlock *Pred = BB->getSinglePredecessor();
5116 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5117 return false;
5118
5119 Value *IcmpCond;
5120 ConstantInt *NewCaseVal;
5121 CmpPredicate Predicate;
5122
5123 // Match icmp X, C
5124 if (!match(ICI,
5125 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5126 return false;
5127
5128 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5130 if (!Select) {
5131 // If Select == nullptr, we can assume that there is a hidden no-op select
5132 // just after icmp
5133 SelectCond = ICI;
5134 SelectTrueVal = Builder.getTrue();
5135 SelectFalseVal = Builder.getFalse();
5136 User = ICI->user_back();
5137 } else {
5138 SelectCond = Select->getCondition();
5139 // Check if the select condition is the same as the icmp condition.
5140 if (SelectCond != ICI)
5141 return false;
5142 SelectTrueVal = Select->getTrueValue();
5143 SelectFalseVal = Select->getFalseValue();
5144 User = Select->user_back();
5145 }
5146
5147 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5148 if (SI->getCondition() != IcmpCond)
5149 return false;
5150
5151 // If BB is reachable on a non-default case, then we simply know the value of
5152 // V in this block. Substitute it and constant fold the icmp instruction
5153 // away.
5154 if (SI->getDefaultDest() != BB) {
5155 ConstantInt *VVal = SI->findCaseDest(BB);
5156 assert(VVal && "Should have a unique destination value");
5157 ICI->setOperand(0, VVal);
5158
5159 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5160 ICI->replaceAllUsesWith(V);
5161 ICI->eraseFromParent();
5162 }
5163 // BB is now empty, so it is likely to simplify away.
5164 return requestResimplify();
5165 }
5166
5167 // Ok, the block is reachable from the default dest. If the constant we're
5168 // comparing exists in one of the other edges, then we can constant fold ICI
5169 // and zap it.
5170 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5171 Value *V;
5172 if (Predicate == ICmpInst::ICMP_EQ)
5174 else
5176
5177 ICI->replaceAllUsesWith(V);
5178 ICI->eraseFromParent();
5179 // BB is now empty, so it is likely to simplify away.
5180 return requestResimplify();
5181 }
5182
5183 // The use of the select has to be in the 'end' block, by the only PHI node in
5184 // the block.
5185 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5186 PHINode *PHIUse = dyn_cast<PHINode>(User);
5187 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5189 return false;
5190
5191 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5192 // edge gets SelectTrueVal in the PHI.
5193 Value *DefaultCst = SelectFalseVal;
5194 Value *NewCst = SelectTrueVal;
5195
5196 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5197 std::swap(DefaultCst, NewCst);
5198
5199 // Replace Select (which is used by the PHI for the default value) with
5200 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5201 if (Select) {
5202 Select->replaceAllUsesWith(DefaultCst);
5203 Select->eraseFromParent();
5204 } else {
5205 ICI->replaceAllUsesWith(DefaultCst);
5206 }
5207 ICI->eraseFromParent();
5208
5209 SmallVector<DominatorTree::UpdateType, 2> Updates;
5210
5211 // Okay, the switch goes to this block on a default value. Add an edge from
5212 // the switch to the merge point on the compared value.
5213 BasicBlock *NewBB =
5214 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5215 {
5216 SwitchInstProfUpdateWrapper SIW(*SI);
5217 auto W0 = SIW.getSuccessorWeight(0);
5219 if (W0) {
5220 NewW = ((uint64_t(*W0) + 1) >> 1);
5221 SIW.setSuccessorWeight(0, *NewW);
5222 }
5223 SIW.addCase(NewCaseVal, NewBB, NewW);
5224 if (DTU)
5225 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5226 }
5227
5228 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5229 Builder.SetInsertPoint(NewBB);
5230 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5231 Builder.CreateBr(SuccBlock);
5232 PHIUse->addIncoming(NewCst, NewBB);
5233 if (DTU) {
5234 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5235 DTU->applyUpdates(Updates);
5236 }
5237 return true;
5238}
5239
5240/// Check to see if it is branching on an or/and chain of icmp instructions, and
5241/// fold it into a switch instruction if so.
5242bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5243 IRBuilder<> &Builder,
5244 const DataLayout &DL) {
5246 if (!Cond)
5247 return false;
5248
5249 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5250 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5251 // 'setne's and'ed together, collect them.
5252
5253 // Try to gather values from a chain of and/or to be turned into a switch
5254 ConstantComparesGatherer ConstantCompare(Cond, DL);
5255 // Unpack the result
5256 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5257 Value *CompVal = ConstantCompare.CompValue;
5258 unsigned UsedICmps = ConstantCompare.UsedICmps;
5259 Value *ExtraCase = ConstantCompare.Extra;
5260 bool TrueWhenEqual = ConstantCompare.IsEq;
5261
5262 // If we didn't have a multiply compared value, fail.
5263 if (!CompVal)
5264 return false;
5265
5266 // Avoid turning single icmps into a switch.
5267 if (UsedICmps <= 1)
5268 return false;
5269
5270 // There might be duplicate constants in the list, which the switch
5271 // instruction can't handle, remove them now.
5272 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5273 Values.erase(llvm::unique(Values), Values.end());
5274
5275 // If Extra was used, we require at least two switch values to do the
5276 // transformation. A switch with one value is just a conditional branch.
5277 if (ExtraCase && Values.size() < 2)
5278 return false;
5279
5280 SmallVector<uint32_t> BranchWeights;
5281 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5282 extractBranchWeights(*BI, BranchWeights);
5283
5284 // Figure out which block is which destination.
5285 BasicBlock *DefaultBB = BI->getSuccessor(1);
5286 BasicBlock *EdgeBB = BI->getSuccessor(0);
5287 if (!TrueWhenEqual) {
5288 std::swap(DefaultBB, EdgeBB);
5289 if (HasProfile)
5290 std::swap(BranchWeights[0], BranchWeights[1]);
5291 }
5292
5293 BasicBlock *BB = BI->getParent();
5294
5295 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5296 << " cases into SWITCH. BB is:\n"
5297 << *BB);
5298
5299 SmallVector<DominatorTree::UpdateType, 2> Updates;
5300
5301 // If there are any extra values that couldn't be folded into the switch
5302 // then we evaluate them with an explicit branch first. Split the block
5303 // right before the condbr to handle it.
5304 if (ExtraCase) {
5305 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5306 /*MSSAU=*/nullptr, "switch.early.test");
5307
5308 // Remove the uncond branch added to the old block.
5309 Instruction *OldTI = BB->getTerminator();
5310 Builder.SetInsertPoint(OldTI);
5311
5312 // There can be an unintended UB if extra values are Poison. Before the
5313 // transformation, extra values may not be evaluated according to the
5314 // condition, and it will not raise UB. But after transformation, we are
5315 // evaluating extra values before checking the condition, and it will raise
5316 // UB. It can be solved by adding freeze instruction to extra values.
5317 AssumptionCache *AC = Options.AC;
5318
5319 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5320 ExtraCase = Builder.CreateFreeze(ExtraCase);
5321
5322 // We don't have any info about this condition.
5323 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5324 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5326
5327 OldTI->eraseFromParent();
5328
5329 if (DTU)
5330 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5331
5332 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5333 // for the edge we just added.
5334 addPredecessorToBlock(EdgeBB, BB, NewBB);
5335
5336 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5337 << "\nEXTRABB = " << *BB);
5338 BB = NewBB;
5339 }
5340
5341 Builder.SetInsertPoint(BI);
5342 // Convert pointer to int before we switch.
5343 if (CompVal->getType()->isPointerTy()) {
5344 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5345 "Should not end up here with unstable pointers");
5346 CompVal = Builder.CreatePtrToInt(
5347 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5348 }
5349
5350 // Check if we can represent the values as a contiguous range. If so, we use a
5351 // range check + conditional branch instead of a switch.
5352 if (Values.front()->getValue() - Values.back()->getValue() ==
5353 Values.size() - 1) {
5354 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5355 Values.back()->getValue(), Values.front()->getValue() + 1);
5356 APInt Offset, RHS;
5357 ICmpInst::Predicate Pred;
5358 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5359 Value *X = CompVal;
5360 if (!Offset.isZero())
5361 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5362 Value *Cond =
5363 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5364 CondBrInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5365 if (HasProfile)
5366 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5367 // We don't need to update PHI nodes since we don't add any new edges.
5368 } else {
5369 // Create the new switch instruction now.
5370 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5371 if (HasProfile) {
5372 // We know the weight of the default case. We don't know the weight of the
5373 // other cases, but rather than completely lose profiling info, we split
5374 // the remaining probability equally over them.
5375 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5376 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5377 // if TrueWhenEqual.
5378 for (auto &V : drop_begin(NewWeights))
5379 V = BranchWeights[0] / Values.size();
5380 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5381 }
5382
5383 // Add all of the 'cases' to the switch instruction.
5384 for (ConstantInt *Val : Values)
5385 New->addCase(Val, EdgeBB);
5386
5387 // We added edges from PI to the EdgeBB. As such, if there were any
5388 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5389 // the number of edges added.
5390 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5391 PHINode *PN = cast<PHINode>(BBI);
5392 Value *InVal = PN->getIncomingValueForBlock(BB);
5393 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5394 PN->addIncoming(InVal, BB);
5395 }
5396 }
5397
5398 // Erase the old branch instruction.
5400 if (DTU)
5401 DTU->applyUpdates(Updates);
5402
5403 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5404 return true;
5405}
5406
5407bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5408 if (isa<PHINode>(RI->getValue()))
5409 return simplifyCommonResume(RI);
5410 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5411 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5412 // The resume must unwind the exception that caused control to branch here.
5413 return simplifySingleResume(RI);
5414
5415 return false;
5416}
5417
5418// Check if cleanup block is empty
5420 for (Instruction &I : R) {
5421 auto *II = dyn_cast<IntrinsicInst>(&I);
5422 if (!II)
5423 return false;
5424
5425 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5426 switch (IntrinsicID) {
5427 case Intrinsic::dbg_declare:
5428 case Intrinsic::dbg_value:
5429 case Intrinsic::dbg_label:
5430 case Intrinsic::lifetime_end:
5431 break;
5432 default:
5433 return false;
5434 }
5435 }
5436 return true;
5437}
5438
5439// Simplify resume that is shared by several landing pads (phi of landing pad).
5440bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5441 BasicBlock *BB = RI->getParent();
5442
5443 // Check that there are no other instructions except for debug and lifetime
5444 // intrinsics between the phi's and resume instruction.
5445 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5446 BB->getTerminator()->getIterator())))
5447 return false;
5448
5449 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5450 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5451
5452 // Check incoming blocks to see if any of them are trivial.
5453 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5454 Idx++) {
5455 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5456 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5457
5458 // If the block has other successors, we can not delete it because
5459 // it has other dependents.
5460 if (IncomingBB->getUniqueSuccessor() != BB)
5461 continue;
5462
5463 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5464 // Not the landing pad that caused the control to branch here.
5465 if (IncomingValue != LandingPad)
5466 continue;
5467
5469 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5470 TrivialUnwindBlocks.insert(IncomingBB);
5471 }
5472
5473 // If no trivial unwind blocks, don't do any simplifications.
5474 if (TrivialUnwindBlocks.empty())
5475 return false;
5476
5477 // Turn all invokes that unwind here into calls.
5478 for (auto *TrivialBB : TrivialUnwindBlocks) {
5479 // Blocks that will be simplified should be removed from the phi node.
5480 // Note there could be multiple edges to the resume block, and we need
5481 // to remove them all.
5482 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5483 BB->removePredecessor(TrivialBB, true);
5484
5485 for (BasicBlock *Pred :
5487 removeUnwindEdge(Pred, DTU);
5488 ++NumInvokes;
5489 }
5490
5491 // In each SimplifyCFG run, only the current processed block can be erased.
5492 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5493 // of erasing TrivialBB, we only remove the branch to the common resume
5494 // block so that we can later erase the resume block since it has no
5495 // predecessors.
5496 TrivialBB->getTerminator()->eraseFromParent();
5497 new UnreachableInst(RI->getContext(), TrivialBB);
5498 if (DTU)
5499 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5500 }
5501
5502 // Delete the resume block if all its predecessors have been removed.
5503 if (pred_empty(BB))
5504 DeleteDeadBlock(BB, DTU);
5505
5506 return !TrivialUnwindBlocks.empty();
5507}
5508
5509// Simplify resume that is only used by a single (non-phi) landing pad.
5510bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5511 BasicBlock *BB = RI->getParent();
5512 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5513 assert(RI->getValue() == LPInst &&
5514 "Resume must unwind the exception that caused control to here");
5515
5516 // Check that there are no other instructions except for debug intrinsics.
5518 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5519 return false;
5520
5521 // Turn all invokes that unwind here into calls and delete the basic block.
5522 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5523 removeUnwindEdge(Pred, DTU);
5524 ++NumInvokes;
5525 }
5526
5527 // The landingpad is now unreachable. Zap it.
5528 DeleteDeadBlock(BB, DTU);
5529 return true;
5530}
5531
5533 // If this is a trivial cleanup pad that executes no instructions, it can be
5534 // eliminated. If the cleanup pad continues to the caller, any predecessor
5535 // that is an EH pad will be updated to continue to the caller and any
5536 // predecessor that terminates with an invoke instruction will have its invoke
5537 // instruction converted to a call instruction. If the cleanup pad being
5538 // simplified does not continue to the caller, each predecessor will be
5539 // updated to continue to the unwind destination of the cleanup pad being
5540 // simplified.
5541 BasicBlock *BB = RI->getParent();
5542 CleanupPadInst *CPInst = RI->getCleanupPad();
5543 if (CPInst->getParent() != BB)
5544 // This isn't an empty cleanup.
5545 return false;
5546
5547 // We cannot kill the pad if it has multiple uses. This typically arises
5548 // from unreachable basic blocks.
5549 if (!CPInst->hasOneUse())
5550 return false;
5551
5552 // Check that there are no other instructions except for benign intrinsics.
5554 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5555 return false;
5556
5557 // If the cleanup return we are simplifying unwinds to the caller, this will
5558 // set UnwindDest to nullptr.
5559 BasicBlock *UnwindDest = RI->getUnwindDest();
5560
5561 // We're about to remove BB from the control flow. Before we do, sink any
5562 // PHINodes into the unwind destination. Doing this before changing the
5563 // control flow avoids some potentially slow checks, since we can currently
5564 // be certain that UnwindDest and BB have no common predecessors (since they
5565 // are both EH pads).
5566 if (UnwindDest) {
5567 // First, go through the PHI nodes in UnwindDest and update any nodes that
5568 // reference the block we are removing
5569 for (PHINode &DestPN : UnwindDest->phis()) {
5570 int Idx = DestPN.getBasicBlockIndex(BB);
5571 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5572 assert(Idx != -1);
5573 // This PHI node has an incoming value that corresponds to a control
5574 // path through the cleanup pad we are removing. If the incoming
5575 // value is in the cleanup pad, it must be a PHINode (because we
5576 // verified above that the block is otherwise empty). Otherwise, the
5577 // value is either a constant or a value that dominates the cleanup
5578 // pad being removed.
5579 //
5580 // Because BB and UnwindDest are both EH pads, all of their
5581 // predecessors must unwind to these blocks, and since no instruction
5582 // can have multiple unwind destinations, there will be no overlap in
5583 // incoming blocks between SrcPN and DestPN.
5584 Value *SrcVal = DestPN.getIncomingValue(Idx);
5585 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5586
5587 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5588 for (auto *Pred : predecessors(BB)) {
5589 Value *Incoming =
5590 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5591 DestPN.addIncoming(Incoming, Pred);
5592 }
5593 }
5594
5595 // Sink any remaining PHI nodes directly into UnwindDest.
5596 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5597 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5598 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5599 // If the PHI node has no uses or all of its uses are in this basic
5600 // block (meaning they are debug or lifetime intrinsics), just leave
5601 // it. It will be erased when we erase BB below.
5602 continue;
5603
5604 // Otherwise, sink this PHI node into UnwindDest.
5605 // Any predecessors to UnwindDest which are not already represented
5606 // must be back edges which inherit the value from the path through
5607 // BB. In this case, the PHI value must reference itself.
5608 for (auto *pred : predecessors(UnwindDest))
5609 if (pred != BB)
5610 PN.addIncoming(&PN, pred);
5611 PN.moveBefore(InsertPt);
5612 // Also, add a dummy incoming value for the original BB itself,
5613 // so that the PHI is well-formed until we drop said predecessor.
5614 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5615 }
5616 }
5617
5618 std::vector<DominatorTree::UpdateType> Updates;
5619
5620 // We use make_early_inc_range here because we will remove all predecessors.
5622 if (UnwindDest == nullptr) {
5623 if (DTU) {
5624 DTU->applyUpdates(Updates);
5625 Updates.clear();
5626 }
5627 removeUnwindEdge(PredBB, DTU);
5628 ++NumInvokes;
5629 } else {
5630 BB->removePredecessor(PredBB);
5631 Instruction *TI = PredBB->getTerminator();
5632 TI->replaceUsesOfWith(BB, UnwindDest);
5633 if (DTU) {
5634 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5635 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5636 }
5637 }
5638 }
5639
5640 if (DTU)
5641 DTU->applyUpdates(Updates);
5642
5643 DeleteDeadBlock(BB, DTU);
5644
5645 return true;
5646}
5647
5648// Try to merge two cleanuppads together.
5650 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5651 // with.
5652 BasicBlock *UnwindDest = RI->getUnwindDest();
5653 if (!UnwindDest)
5654 return false;
5655
5656 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5657 // be safe to merge without code duplication.
5658 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5659 return false;
5660
5661 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5662 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5663 if (!SuccessorCleanupPad)
5664 return false;
5665
5666 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5667 // Replace any uses of the successor cleanupad with the predecessor pad
5668 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5669 // funclet bundle operands.
5670 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5671 // Remove the old cleanuppad.
5672 SuccessorCleanupPad->eraseFromParent();
5673 // Now, we simply replace the cleanupret with a branch to the unwind
5674 // destination.
5675 UncondBrInst::Create(UnwindDest, RI->getParent());
5676 RI->eraseFromParent();
5677
5678 return true;
5679}
5680
5681bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5682 // It is possible to transiantly have an undef cleanuppad operand because we
5683 // have deleted some, but not all, dead blocks.
5684 // Eventually, this block will be deleted.
5685 if (isa<UndefValue>(RI->getOperand(0)))
5686 return false;
5687
5688 if (mergeCleanupPad(RI))
5689 return true;
5690
5691 if (removeEmptyCleanup(RI, DTU))
5692 return true;
5693
5694 return false;
5695}
5696
5697// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5698bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5699 BasicBlock *BB = UI->getParent();
5700
5701 bool Changed = false;
5702
5703 // Ensure that any debug-info records that used to occur after the Unreachable
5704 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5705 // the block.
5707
5708 // Debug-info records on the unreachable inst itself should be deleted, as
5709 // below we delete everything past the final executable instruction.
5710 UI->dropDbgRecords();
5711
5712 // If there are any instructions immediately before the unreachable that can
5713 // be removed, do so.
5714 while (UI->getIterator() != BB->begin()) {
5716 --BBI;
5717
5719 break; // Can not drop any more instructions. We're done here.
5720 // Otherwise, this instruction can be freely erased,
5721 // even if it is not side-effect free.
5722
5723 // Note that deleting EH's here is in fact okay, although it involves a bit
5724 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5725 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5726 // and we can therefore guarantee this block will be erased.
5727
5728 // If we're deleting this, we're deleting any subsequent debug info, so
5729 // delete DbgRecords.
5730 BBI->dropDbgRecords();
5731
5732 // Delete this instruction (any uses are guaranteed to be dead)
5733 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5734 BBI->eraseFromParent();
5735 Changed = true;
5736 }
5737
5738 // If the unreachable instruction is the first in the block, take a gander
5739 // at all of the predecessors of this instruction, and simplify them.
5740 if (&BB->front() != UI)
5741 return Changed;
5742
5743 std::vector<DominatorTree::UpdateType> Updates;
5744
5745 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5746 for (BasicBlock *Predecessor : Preds) {
5747 Instruction *TI = Predecessor->getTerminator();
5748 IRBuilder<> Builder(TI);
5749 if (isa<UncondBrInst>(TI)) {
5750 new UnreachableInst(TI->getContext(), TI->getIterator());
5751 TI->eraseFromParent();
5752 Changed = true;
5753 if (DTU)
5754 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5755 } else if (auto *BI = dyn_cast<CondBrInst>(TI)) {
5756 // We could either have a proper unconditional branch,
5757 // or a degenerate conditional branch with matching destinations.
5758 if (BI->getSuccessor(0) == BI->getSuccessor(1)) {
5759 new UnreachableInst(TI->getContext(), TI->getIterator());
5760 TI->eraseFromParent();
5761 Changed = true;
5762 } else {
5763 Value* Cond = BI->getCondition();
5764 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5765 "The destinations are guaranteed to be different here.");
5766 CallInst *Assumption;
5767 if (BI->getSuccessor(0) == BB) {
5768 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5769 Builder.CreateBr(BI->getSuccessor(1));
5770 } else {
5771 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5772 Assumption = Builder.CreateAssumption(Cond);
5773 Builder.CreateBr(BI->getSuccessor(0));
5774 }
5775 if (Options.AC)
5776 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5777
5779 Changed = true;
5780 }
5781 if (DTU)
5782 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5783 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5784 SwitchInstProfUpdateWrapper SU(*SI);
5785 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5786 if (i->getCaseSuccessor() != BB) {
5787 ++i;
5788 continue;
5789 }
5790 BB->removePredecessor(SU->getParent());
5791 i = SU.removeCase(i);
5792 e = SU->case_end();
5793 Changed = true;
5794 }
5795 // Note that the default destination can't be removed!
5796 if (DTU && SI->getDefaultDest() != BB)
5797 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5798 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5799 if (II->getUnwindDest() == BB) {
5800 if (DTU) {
5801 DTU->applyUpdates(Updates);
5802 Updates.clear();
5803 }
5804 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5805 if (!CI->doesNotThrow())
5806 CI->setDoesNotThrow();
5807 Changed = true;
5808 }
5809 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5810 if (CSI->getUnwindDest() == BB) {
5811 if (DTU) {
5812 DTU->applyUpdates(Updates);
5813 Updates.clear();
5814 }
5815 removeUnwindEdge(TI->getParent(), DTU);
5816 Changed = true;
5817 continue;
5818 }
5819
5820 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5821 E = CSI->handler_end();
5822 I != E; ++I) {
5823 if (*I == BB) {
5824 CSI->removeHandler(I);
5825 --I;
5826 --E;
5827 Changed = true;
5828 }
5829 }
5830 if (DTU)
5831 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5832 if (CSI->getNumHandlers() == 0) {
5833 if (CSI->hasUnwindDest()) {
5834 // Redirect all predecessors of the block containing CatchSwitchInst
5835 // to instead branch to the CatchSwitchInst's unwind destination.
5836 if (DTU) {
5837 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5838 Updates.push_back({DominatorTree::Insert,
5839 PredecessorOfPredecessor,
5840 CSI->getUnwindDest()});
5841 Updates.push_back({DominatorTree::Delete,
5842 PredecessorOfPredecessor, Predecessor});
5843 }
5844 }
5845 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5846 } else {
5847 // Rewrite all preds to unwind to caller (or from invoke to call).
5848 if (DTU) {
5849 DTU->applyUpdates(Updates);
5850 Updates.clear();
5851 }
5852 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5853 for (BasicBlock *EHPred : EHPreds)
5854 removeUnwindEdge(EHPred, DTU);
5855 }
5856 // The catchswitch is no longer reachable.
5857 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5858 CSI->eraseFromParent();
5859 Changed = true;
5860 }
5861 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5862 (void)CRI;
5863 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5864 "Expected to always have an unwind to BB.");
5865 if (DTU)
5866 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5867 new UnreachableInst(TI->getContext(), TI->getIterator());
5868 TI->eraseFromParent();
5869 Changed = true;
5870 }
5871 }
5872
5873 if (DTU)
5874 DTU->applyUpdates(Updates);
5875
5876 // If this block is now dead, remove it.
5877 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5878 DeleteDeadBlock(BB, DTU);
5879 return true;
5880 }
5881
5882 return Changed;
5883}
5884
5893
5894static std::optional<ContiguousCasesResult>
5897 BasicBlock *Dest, BasicBlock *OtherDest) {
5898 assert(Cases.size() >= 1);
5899
5901 const APInt &Min = Cases.back()->getValue();
5902 const APInt &Max = Cases.front()->getValue();
5903 APInt Offset = Max - Min;
5904 size_t ContiguousOffset = Cases.size() - 1;
5905 if (Offset == ContiguousOffset) {
5906 return ContiguousCasesResult{
5907 /*Min=*/Cases.back(),
5908 /*Max=*/Cases.front(),
5909 /*Dest=*/Dest,
5910 /*OtherDest=*/OtherDest,
5911 /*Cases=*/&Cases,
5912 /*OtherCases=*/&OtherCases,
5913 };
5914 }
5915 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false,
5916 SimplifyQuery(Dest->getDataLayout()));
5917 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5918 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5919 // contiguous range for the other destination. N.B. If CR is not a full range,
5920 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5921 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5922 assert(Cases.size() >= 2);
5923 auto *It =
5924 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5925 return L->getValue() != R->getValue() + 1;
5926 });
5927 if (It == Cases.end())
5928 return std::nullopt;
5929 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5930 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5931 Cases.size() - 2) {
5932 return ContiguousCasesResult{
5933 /*Min=*/cast<ConstantInt>(
5934 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5935 /*Max=*/
5937 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5938 /*Dest=*/OtherDest,
5939 /*OtherDest=*/Dest,
5940 /*Cases=*/&OtherCases,
5941 /*OtherCases=*/&Cases,
5942 };
5943 }
5944 }
5945 return std::nullopt;
5946}
5947
5949 DomTreeUpdater *DTU,
5950 bool RemoveOrigDefaultBlock = true) {
5951 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5952 auto *BB = Switch->getParent();
5953 auto *OrigDefaultBlock = Switch->getDefaultDest();
5954 if (RemoveOrigDefaultBlock)
5955 OrigDefaultBlock->removePredecessor(BB);
5956 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5957 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5958 OrigDefaultBlock);
5959 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5961 Switch->setDefaultDest(&*NewDefaultBlock);
5962 if (DTU) {
5964 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5965 if (RemoveOrigDefaultBlock &&
5966 !is_contained(successors(BB), OrigDefaultBlock))
5967 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5968 DTU->applyUpdates(Updates);
5969 }
5970}
5971
5972/// Turn a switch into an integer range comparison and branch.
5973/// Switches with more than 2 destinations are ignored.
5974/// Switches with 1 destination are also ignored.
5975bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5976 IRBuilder<> &Builder) {
5977 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5978
5979 bool HasDefault = !SI->defaultDestUnreachable();
5980
5981 auto *BB = SI->getParent();
5982 // Partition the cases into two sets with different destinations.
5983 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5984 BasicBlock *DestB = nullptr;
5987
5988 for (auto Case : SI->cases()) {
5989 BasicBlock *Dest = Case.getCaseSuccessor();
5990 if (!DestA)
5991 DestA = Dest;
5992 if (Dest == DestA) {
5993 CasesA.push_back(Case.getCaseValue());
5994 continue;
5995 }
5996 if (!DestB)
5997 DestB = Dest;
5998 if (Dest == DestB) {
5999 CasesB.push_back(Case.getCaseValue());
6000 continue;
6001 }
6002 return false; // More than two destinations.
6003 }
6004 if (!DestB)
6005 return false; // All destinations are the same and the default is unreachable
6006
6007 assert(DestA && DestB &&
6008 "Single-destination switch should have been folded.");
6009 assert(DestA != DestB);
6010 assert(DestB != SI->getDefaultDest());
6011 assert(!CasesB.empty() && "There must be non-default cases.");
6012 assert(!CasesA.empty() || HasDefault);
6013
6014 // Figure out if one of the sets of cases form a contiguous range.
6015 std::optional<ContiguousCasesResult> ContiguousCases;
6016
6017 // Only one icmp is needed when there is only one case.
6018 if (!HasDefault && CasesA.size() == 1)
6019 ContiguousCases = ContiguousCasesResult{
6020 /*Min=*/CasesA[0],
6021 /*Max=*/CasesA[0],
6022 /*Dest=*/DestA,
6023 /*OtherDest=*/DestB,
6024 /*Cases=*/&CasesA,
6025 /*OtherCases=*/&CasesB,
6026 };
6027 else if (CasesB.size() == 1)
6028 ContiguousCases = ContiguousCasesResult{
6029 /*Min=*/CasesB[0],
6030 /*Max=*/CasesB[0],
6031 /*Dest=*/DestB,
6032 /*OtherDest=*/DestA,
6033 /*Cases=*/&CasesB,
6034 /*OtherCases=*/&CasesA,
6035 };
6036 // Correctness: Cases to the default destination cannot be contiguous cases.
6037 else if (!HasDefault)
6038 ContiguousCases =
6039 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6040
6041 if (!ContiguousCases)
6042 ContiguousCases =
6043 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6044
6045 if (!ContiguousCases)
6046 return false;
6047
6048 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6049
6050 // Start building the compare and branch.
6051
6053 Constant *NumCases = ConstantInt::get(Offset->getType(),
6054 Max->getValue() - Min->getValue() + 1);
6055 Instruction *NewBI;
6056 if (NumCases->isOneValue()) {
6057 assert(Max->getValue() == Min->getValue());
6058 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6059 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6060 }
6061 // If NumCases overflowed, then all possible values jump to the successor.
6062 else if (NumCases->isNullValue() && !Cases->empty()) {
6063 NewBI = Builder.CreateBr(Dest);
6064 } else {
6065 Value *Sub = SI->getCondition();
6066 if (!Offset->isNullValue())
6067 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6068 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6069 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6070 }
6071
6072 // Update weight for the newly-created conditional branch.
6073 if (hasBranchWeightMD(*SI) && isa<CondBrInst>(NewBI)) {
6074 SmallVector<uint64_t, 8> Weights;
6075 getBranchWeights(SI, Weights);
6076 if (Weights.size() == 1 + SI->getNumCases()) {
6077 uint64_t TrueWeight = 0;
6078 uint64_t FalseWeight = 0;
6079 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6080 if (SI->getSuccessor(I) == Dest)
6081 TrueWeight += Weights[I];
6082 else
6083 FalseWeight += Weights[I];
6084 }
6085 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6086 TrueWeight /= 2;
6087 FalseWeight /= 2;
6088 }
6089 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6090 /*IsExpected=*/false, /*ElideAllZero=*/true);
6091 }
6092 }
6093
6094 // Prune obsolete incoming values off the successors' PHI nodes.
6095 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6096 unsigned PreviousEdges = Cases->size();
6097 if (Dest == SI->getDefaultDest())
6098 ++PreviousEdges;
6099 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6100 PHI.removeIncomingValue(SI->getParent());
6101 }
6102 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6103 unsigned PreviousEdges = OtherCases->size();
6104 if (OtherDest == SI->getDefaultDest())
6105 ++PreviousEdges;
6106 unsigned E = PreviousEdges - 1;
6107 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6108 if (isa<UncondBrInst>(NewBI))
6109 ++E;
6110 for (unsigned I = 0; I != E; ++I)
6111 PHI.removeIncomingValue(SI->getParent());
6112 }
6113
6114 // Clean up the default block - it may have phis or other instructions before
6115 // the unreachable terminator.
6116 if (!HasDefault)
6118
6119 auto *UnreachableDefault = SI->getDefaultDest();
6120
6121 // Drop the switch.
6122 SI->eraseFromParent();
6123
6124 if (!HasDefault && DTU)
6125 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6126
6127 return true;
6128}
6129
6130/// Compute masked bits for the condition of a switch
6131/// and use it to remove dead cases.
6133 AssumptionCache *AC,
6134 const DataLayout &DL) {
6135 Value *Cond = SI->getCondition();
6136 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6138 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6139
6140 // We can also eliminate cases by determining that their values are outside of
6141 // the limited range of the condition based on how many significant (non-sign)
6142 // bits are in the condition value.
6143 unsigned MaxSignificantBitsInCond =
6145
6146 // Gather dead cases.
6148 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6149 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6150 for (const auto &Case : SI->cases()) {
6151 auto *Successor = Case.getCaseSuccessor();
6152 if (DTU) {
6153 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6154 if (Inserted)
6155 UniqueSuccessors.push_back(Successor);
6156 ++It->second;
6157 }
6158 ConstantInt *CaseC = Case.getCaseValue();
6159 const APInt &CaseVal = CaseC->getValue();
6160 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6161 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6162 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6163 DeadCases.push_back(CaseC);
6164 if (DTU)
6165 --NumPerSuccessorCases[Successor];
6166 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6167 << " is dead.\n");
6168 } else if (IsKnownValuesValid)
6169 KnownValues.erase(CaseC);
6170 }
6171
6172 // If we can prove that the cases must cover all possible values, the
6173 // default destination becomes dead and we can remove it. If we know some
6174 // of the bits in the value, we can use that to more precisely compute the
6175 // number of possible unique case values.
6176 bool HasDefault = !SI->defaultDestUnreachable();
6177 const unsigned NumUnknownBits =
6178 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6179 assert(NumUnknownBits <= Known.getBitWidth());
6180 if (HasDefault && DeadCases.empty()) {
6181 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6183 return true;
6184 }
6185
6186 if (NumUnknownBits < 64 /* avoid overflow */) {
6187 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6188 if (SI->getNumCases() == AllNumCases) {
6190 return true;
6191 }
6192 // When only one case value is missing, replace default with that case.
6193 // Eliminating the default branch will provide more opportunities for
6194 // optimization, such as lookup tables.
6195 if (SI->getNumCases() == AllNumCases - 1) {
6196 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6197 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6198 if (CondTy->getIntegerBitWidth() > 64 ||
6199 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6200 return false;
6201
6202 uint64_t MissingCaseVal = 0;
6203 for (const auto &Case : SI->cases())
6204 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6205 auto *MissingCase = cast<ConstantInt>(
6206 ConstantInt::get(Cond->getType(), MissingCaseVal));
6208 SIW.addCase(MissingCase, SI->getDefaultDest(),
6209 SIW.getSuccessorWeight(0));
6211 /*RemoveOrigDefaultBlock*/ false);
6212 SIW.setSuccessorWeight(0, 0);
6213 return true;
6214 }
6215 }
6216 }
6217
6218 if (DeadCases.empty())
6219 return false;
6220
6222 for (ConstantInt *DeadCase : DeadCases) {
6223 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6224 assert(CaseI != SI->case_default() &&
6225 "Case was not found. Probably mistake in DeadCases forming.");
6226 // Prune unused values from PHI nodes.
6227 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6228 SIW.removeCase(CaseI);
6229 }
6230
6231 if (DTU) {
6232 std::vector<DominatorTree::UpdateType> Updates;
6233 for (auto *Successor : UniqueSuccessors)
6234 if (NumPerSuccessorCases[Successor] == 0)
6235 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6236 DTU->applyUpdates(Updates);
6237 }
6238
6239 return true;
6240}
6241
6242/// If BB would be eligible for simplification by
6243/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6244/// by an unconditional branch), look at the phi node for BB in the successor
6245/// block and see if the incoming value is equal to CaseValue. If so, return
6246/// the phi node, and set PhiIndex to BB's index in the phi node.
6248 BasicBlock *BB, int *PhiIndex) {
6249 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6250 return nullptr; // BB must be empty to be a candidate for simplification.
6251 if (!BB->getSinglePredecessor())
6252 return nullptr; // BB must be dominated by the switch.
6253
6255 if (!Branch)
6256 return nullptr; // Terminator must be unconditional branch.
6257
6258 BasicBlock *Succ = Branch->getSuccessor();
6259
6260 for (PHINode &PHI : Succ->phis()) {
6261 int Idx = PHI.getBasicBlockIndex(BB);
6262 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6263
6264 Value *InValue = PHI.getIncomingValue(Idx);
6265 if (InValue != CaseValue)
6266 continue;
6267
6268 *PhiIndex = Idx;
6269 return &PHI;
6270 }
6271
6272 return nullptr;
6273}
6274
6275/// Try to forward the condition of a switch instruction to a phi node
6276/// dominated by the switch, if that would mean that some of the destination
6277/// blocks of the switch can be folded away. Return true if a change is made.
6279 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6280
6281 ForwardingNodesMap ForwardingNodes;
6282 BasicBlock *SwitchBlock = SI->getParent();
6283 bool Changed = false;
6284 for (const auto &Case : SI->cases()) {
6285 ConstantInt *CaseValue = Case.getCaseValue();
6286 BasicBlock *CaseDest = Case.getCaseSuccessor();
6287
6288 // Replace phi operands in successor blocks that are using the constant case
6289 // value rather than the switch condition variable:
6290 // switchbb:
6291 // switch i32 %x, label %default [
6292 // i32 17, label %succ
6293 // ...
6294 // succ:
6295 // %r = phi i32 ... [ 17, %switchbb ] ...
6296 // -->
6297 // %r = phi i32 ... [ %x, %switchbb ] ...
6298
6299 for (PHINode &Phi : CaseDest->phis()) {
6300 // This only works if there is exactly 1 incoming edge from the switch to
6301 // a phi. If there is >1, that means multiple cases of the switch map to 1
6302 // value in the phi, and that phi value is not the switch condition. Thus,
6303 // this transform would not make sense (the phi would be invalid because
6304 // a phi can't have different incoming values from the same block).
6305 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6306 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6307 count(Phi.blocks(), SwitchBlock) == 1) {
6308 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6309 Changed = true;
6310 }
6311 }
6312
6313 // Collect phi nodes that are indirectly using this switch's case constants.
6314 int PhiIdx;
6315 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6316 ForwardingNodes[Phi].push_back(PhiIdx);
6317 }
6318
6319 for (auto &ForwardingNode : ForwardingNodes) {
6320 PHINode *Phi = ForwardingNode.first;
6321 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6322 // Check if it helps to fold PHI.
6323 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6324 continue;
6325
6326 for (int Index : Indexes)
6327 Phi->setIncomingValue(Index, SI->getCondition());
6328 Changed = true;
6329 }
6330
6331 return Changed;
6332}
6333
6334/// Return true if the backend will be able to handle
6335/// initializing an array of constants like C.
6337 if (C->isThreadDependent())
6338 return false;
6339 if (C->isDLLImportDependent())
6340 return false;
6341
6344 return false;
6345
6346 // Globals cannot contain scalable types.
6347 if (C->getType()->isScalableTy())
6348 return false;
6349
6351 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6352 // materializing the array of constants.
6353 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6354 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6355 return false;
6356 }
6357
6358 if (!TTI.shouldBuildLookupTablesForConstant(C))
6359 return false;
6360
6361 return true;
6362}
6363
6364/// If V is a Constant, return it. Otherwise, try to look up
6365/// its constant value in ConstantPool, returning 0 if it's not there.
6366static Constant *
6369 if (Constant *C = dyn_cast<Constant>(V))
6370 return C;
6371 return ConstantPool.lookup(V);
6372}
6373
6374/// Try to fold instruction I into a constant. This works for
6375/// simple instructions such as binary operations where both operands are
6376/// constant or can be replaced by constants from the ConstantPool. Returns the
6377/// resulting constant on success, 0 otherwise.
6378static Constant *
6382 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6383 if (!A)
6384 return nullptr;
6385 if (A->isAllOnesValue())
6386 return lookupConstant(Select->getTrueValue(), ConstantPool);
6387 if (A->isNullValue())
6388 return lookupConstant(Select->getFalseValue(), ConstantPool);
6389 return nullptr;
6390 }
6391
6393 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6394 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6395 COps.push_back(A);
6396 else
6397 return nullptr;
6398 }
6399
6400 return ConstantFoldInstOperands(I, COps, DL);
6401}
6402
6403/// Try to determine the resulting constant values in phi nodes
6404/// at the common destination basic block, *CommonDest, for one of the case
6405/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6406/// default case), of a switch instruction SI.
6407static bool
6409 BasicBlock **CommonDest,
6410 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6411 const DataLayout &DL, const TargetTransformInfo &TTI) {
6412 // The block from which we enter the common destination.
6413 BasicBlock *Pred = SI->getParent();
6414
6415 // If CaseDest is empty except for some side-effect free instructions through
6416 // which we can constant-propagate the CaseVal, continue to its successor.
6418 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6419 for (Instruction &I : *CaseDest) {
6420 if (I.isTerminator()) {
6421 // If the terminator is a simple branch, continue to the next block.
6422 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6423 return false;
6424 Pred = CaseDest;
6425 CaseDest = I.getSuccessor(0);
6426 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6427 // Instruction is side-effect free and constant.
6428
6429 // If the instruction has uses outside this block or a phi node slot for
6430 // the block, it is not safe to bypass the instruction since it would then
6431 // no longer dominate all its uses.
6432 for (auto &Use : I.uses()) {
6433 User *User = Use.getUser();
6435 if (I->getParent() == CaseDest)
6436 continue;
6437 if (PHINode *Phi = dyn_cast<PHINode>(User))
6438 if (Phi->getIncomingBlock(Use) == CaseDest)
6439 continue;
6440 return false;
6441 }
6442
6443 ConstantPool.insert(std::make_pair(&I, C));
6444 } else {
6445 break;
6446 }
6447 }
6448
6449 // If we did not have a CommonDest before, use the current one.
6450 if (!*CommonDest)
6451 *CommonDest = CaseDest;
6452 // If the destination isn't the common one, abort.
6453 if (CaseDest != *CommonDest)
6454 return false;
6455
6456 // Get the values for this case from phi nodes in the destination block.
6457 for (PHINode &PHI : (*CommonDest)->phis()) {
6458 int Idx = PHI.getBasicBlockIndex(Pred);
6459 if (Idx == -1)
6460 continue;
6461
6462 Constant *ConstVal =
6463 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6464 if (!ConstVal)
6465 return false;
6466
6467 // Be conservative about which kinds of constants we support.
6468 if (!validLookupTableConstant(ConstVal, TTI))
6469 return false;
6470
6471 Res.push_back(std::make_pair(&PHI, ConstVal));
6472 }
6473
6474 return Res.size() > 0;
6475}
6476
6477// Helper function used to add CaseVal to the list of cases that generate
6478// Result. Returns the updated number of cases that generate this result.
6479static size_t mapCaseToResult(ConstantInt *CaseVal,
6480 SwitchCaseResultVectorTy &UniqueResults,
6481 Constant *Result) {
6482 for (auto &I : UniqueResults) {
6483 if (I.first == Result) {
6484 I.second.push_back(CaseVal);
6485 return I.second.size();
6486 }
6487 }
6488 UniqueResults.push_back(
6489 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6490 return 1;
6491}
6492
6493// Helper function that initializes a map containing
6494// results for the PHI node of the common destination block for a switch
6495// instruction. Returns false if multiple PHI nodes have been found or if
6496// there is not a common destination block for the switch.
6498 BasicBlock *&CommonDest,
6499 SwitchCaseResultVectorTy &UniqueResults,
6500 Constant *&DefaultResult,
6501 const DataLayout &DL,
6502 const TargetTransformInfo &TTI,
6503 uintptr_t MaxUniqueResults) {
6504 for (const auto &I : SI->cases()) {
6505 ConstantInt *CaseVal = I.getCaseValue();
6506
6507 // Resulting value at phi nodes for this case value.
6508 SwitchCaseResultsTy Results;
6509 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6510 DL, TTI))
6511 return false;
6512
6513 // Only one value per case is permitted.
6514 if (Results.size() > 1)
6515 return false;
6516
6517 // Add the case->result mapping to UniqueResults.
6518 const size_t NumCasesForResult =
6519 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6520
6521 // Early out if there are too many cases for this result.
6522 if (NumCasesForResult > MaxSwitchCasesPerResult)
6523 return false;
6524
6525 // Early out if there are too many unique results.
6526 if (UniqueResults.size() > MaxUniqueResults)
6527 return false;
6528
6529 // Check the PHI consistency.
6530 if (!PHI)
6531 PHI = Results[0].first;
6532 else if (PHI != Results[0].first)
6533 return false;
6534 }
6535 // Find the default result value.
6537 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6538 DL, TTI);
6539 // If the default value is not found abort unless the default destination
6540 // is unreachable.
6541 DefaultResult =
6542 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6543
6544 return DefaultResult || SI->defaultDestUnreachable();
6545}
6546
6547// Helper function that checks if it is possible to transform a switch with only
6548// two cases (or two cases + default) that produces a result into a select.
6549// TODO: Handle switches with more than 2 cases that map to the same result.
6550// The branch weights correspond to the provided Condition (i.e. if Condition is
6551// modified from the original SwitchInst, the caller must adjust the weights)
6552static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6553 Constant *DefaultResult, Value *Condition,
6554 IRBuilder<> &Builder, const DataLayout &DL,
6555 ArrayRef<uint32_t> BranchWeights) {
6556 // If we are selecting between only two cases transform into a simple
6557 // select or a two-way select if default is possible.
6558 // Example:
6559 // switch (a) { %0 = icmp eq i32 %a, 10
6560 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6561 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6562 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6563 // }
6564
6565 const bool HasBranchWeights =
6566 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6567
6568 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6569 ResultVector[1].second.size() == 1) {
6570 ConstantInt *FirstCase = ResultVector[0].second[0];
6571 ConstantInt *SecondCase = ResultVector[1].second[0];
6572 Value *SelectValue = ResultVector[1].first;
6573 if (DefaultResult) {
6574 Value *ValueCompare =
6575 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6576 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6577 DefaultResult, "switch.select");
6578 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6579 SI && HasBranchWeights) {
6580 // We start with 3 probabilities, where the numerator is the
6581 // corresponding BranchWeights[i], and the denominator is the sum over
6582 // BranchWeights. We want the probability and negative probability of
6583 // Condition == SecondCase.
6584 assert(BranchWeights.size() == 3);
6586 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6587 /*IsExpected=*/false, /*ElideAllZero=*/true);
6588 }
6589 }
6590 Value *ValueCompare =
6591 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6592 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6593 SelectValue, "switch.select");
6594 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6595 // We may have had a DefaultResult. Base the position of the first and
6596 // second's branch weights accordingly. Also the proability that Condition
6597 // != FirstCase needs to take that into account.
6598 assert(BranchWeights.size() >= 2);
6599 size_t FirstCasePos = (Condition != nullptr);
6600 size_t SecondCasePos = FirstCasePos + 1;
6601 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6603 {BranchWeights[FirstCasePos],
6604 DefaultCase + BranchWeights[SecondCasePos]},
6605 /*IsExpected=*/false, /*ElideAllZero=*/true);
6606 }
6607 return Ret;
6608 }
6609
6610 // Handle the degenerate case where two cases have the same result value.
6611 if (ResultVector.size() == 1 && DefaultResult) {
6612 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6613 unsigned CaseCount = CaseValues.size();
6614 // n bits group cases map to the same result:
6615 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6616 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6617 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6618 if (isPowerOf2_32(CaseCount)) {
6619 ConstantInt *MinCaseVal = CaseValues[0];
6620 // If there are bits that are set exclusively by CaseValues, we
6621 // can transform the switch into a select if the conjunction of
6622 // all the values uniquely identify CaseValues.
6623 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6624
6625 // Find the minimum value and compute the and of all the case values.
6626 for (auto *Case : CaseValues) {
6627 if (Case->getValue().slt(MinCaseVal->getValue()))
6628 MinCaseVal = Case;
6629 AndMask &= Case->getValue();
6630 }
6631 KnownBits Known = computeKnownBits(Condition, DL);
6632
6633 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6634 // Compute the number of bits that are free to vary.
6635 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6636
6637 // Check if the number of values covered by the mask is equal
6638 // to the number of cases.
6639 if (FreeBits == Log2_32(CaseCount)) {
6640 Value *And = Builder.CreateAnd(Condition, AndMask);
6641 Value *Cmp = Builder.CreateICmpEQ(
6642 And, Constant::getIntegerValue(And->getType(), AndMask));
6643 Value *Ret =
6644 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6645 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6646 // We know there's a Default case. We base the resulting branch
6647 // weights off its probability.
6648 assert(BranchWeights.size() >= 2);
6650 *SI,
6651 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6652 /*IsExpected=*/false, /*ElideAllZero=*/true);
6653 }
6654 return Ret;
6655 }
6656 }
6657
6658 // Mark the bits case number touched.
6659 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6660 for (auto *Case : CaseValues)
6661 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6662
6663 // Check if cases with the same result can cover all number
6664 // in touched bits.
6665 if (BitMask.popcount() == Log2_32(CaseCount)) {
6666 if (!MinCaseVal->isNullValue())
6667 Condition = Builder.CreateSub(Condition, MinCaseVal);
6668 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6669 Value *Cmp = Builder.CreateICmpEQ(
6670 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6671 Value *Ret =
6672 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6673 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6674 assert(BranchWeights.size() >= 2);
6676 *SI,
6677 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6678 /*IsExpected=*/false, /*ElideAllZero=*/true);
6679 }
6680 return Ret;
6681 }
6682 }
6683
6684 // Handle the degenerate case where two cases have the same value.
6685 if (CaseValues.size() == 2) {
6686 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6687 "switch.selectcmp.case1");
6688 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6689 "switch.selectcmp.case2");
6690 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6691 Value *Ret =
6692 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6693 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6694 assert(BranchWeights.size() >= 2);
6696 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6697 /*IsExpected=*/false, /*ElideAllZero=*/true);
6698 }
6699 return Ret;
6700 }
6701 }
6702
6703 return nullptr;
6704}
6705
6706// Helper function to cleanup a switch instruction that has been converted into
6707// a select, fixing up PHI nodes and basic blocks.
6709 Value *SelectValue,
6710 IRBuilder<> &Builder,
6711 DomTreeUpdater *DTU) {
6712 std::vector<DominatorTree::UpdateType> Updates;
6713
6714 BasicBlock *SelectBB = SI->getParent();
6715 BasicBlock *DestBB = PHI->getParent();
6716
6717 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6718 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6719 Builder.CreateBr(DestBB);
6720
6721 // Remove the switch.
6722
6723 PHI->removeIncomingValueIf(
6724 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6725 PHI->addIncoming(SelectValue, SelectBB);
6726
6727 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6728 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6729 BasicBlock *Succ = SI->getSuccessor(i);
6730
6731 if (Succ == DestBB)
6732 continue;
6733 Succ->removePredecessor(SelectBB);
6734 if (DTU && RemovedSuccessors.insert(Succ).second)
6735 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6736 }
6737 SI->eraseFromParent();
6738 if (DTU)
6739 DTU->applyUpdates(Updates);
6740}
6741
6742/// If a switch is only used to initialize one or more phi nodes in a common
6743/// successor block with only two different constant values, try to replace the
6744/// switch with a select. Returns true if the fold was made.
6746 DomTreeUpdater *DTU, const DataLayout &DL,
6747 const TargetTransformInfo &TTI) {
6748 Value *const Cond = SI->getCondition();
6749 PHINode *PHI = nullptr;
6750 BasicBlock *CommonDest = nullptr;
6751 Constant *DefaultResult;
6752 SwitchCaseResultVectorTy UniqueResults;
6753 // Collect all the cases that will deliver the same value from the switch.
6754 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6755 DL, TTI, /*MaxUniqueResults*/ 2))
6756 return false;
6757
6758 assert(PHI != nullptr && "PHI for value select not found");
6759 Builder.SetInsertPoint(SI);
6760 SmallVector<uint32_t, 4> BranchWeights;
6762 [[maybe_unused]] auto HasWeights =
6764 assert(!HasWeights == (BranchWeights.empty()));
6765 }
6766 assert(BranchWeights.empty() ||
6767 (BranchWeights.size() >=
6768 UniqueResults.size() + (DefaultResult != nullptr)));
6769
6770 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6771 Builder, DL, BranchWeights);
6772 if (!SelectValue)
6773 return false;
6774
6775 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6776 return true;
6777}
6778
6779namespace {
6780
6781/// This class finds alternatives for switches to ultimately
6782/// replace the switch.
6783class SwitchReplacement {
6784public:
6785 /// Create a helper for optimizations to use as a switch replacement.
6786 /// Find a better representation for the content of Values,
6787 /// using DefaultValue to fill any holes in the table.
6788 SwitchReplacement(
6789 Module &M, uint64_t TableSize, ConstantInt *Offset,
6790 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6791 Constant *DefaultValue, const DataLayout &DL,
6792 const TargetTransformInfo &TTI, const StringRef &FuncName);
6793
6794 /// Build instructions with Builder to retrieve values using Index
6795 /// and replace the switch.
6796 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6797 Function *Func);
6798
6799 /// Return true if a table with TableSize elements of
6800 /// type ElementType would fit in a target-legal register.
6801 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6802 Type *ElementType);
6803
6804 /// Return the default value of the switch.
6805 Constant *getDefaultValue();
6806
6807 /// Return true if the replacement is a lookup table.
6808 bool isLookupTable();
6809
6810 /// Return true if the replacement is a bit map.
6811 bool isBitMap();
6812
6813private:
6814 // Depending on the switch, there are different alternatives.
6815 enum {
6816 // For switches where each case contains the same value, we just have to
6817 // store that single value and return it for each lookup.
6818 SingleValueKind,
6819
6820 // For switches where there is a linear relationship between table index
6821 // and values. We calculate the result with a simple multiplication
6822 // and addition instead of a table lookup.
6823 LinearMapKind,
6824
6825 // For small tables with integer elements, we can pack them into a bitmap
6826 // that fits into a target-legal register. Values are retrieved by
6827 // shift and mask operations.
6828 BitMapKind,
6829
6830 // The table is stored as an array of values. Values are retrieved by load
6831 // instructions from the table.
6832 LookupTableKind
6833 } Kind;
6834
6835 // The default value of the switch.
6836 Constant *DefaultValue;
6837
6838 // The type of the output values.
6839 Type *ValueType;
6840
6841 // For SingleValueKind, this is the single value.
6842 Constant *SingleValue = nullptr;
6843
6844 // For BitMapKind, this is the bitmap.
6845 ConstantInt *BitMap = nullptr;
6846 IntegerType *BitMapElementTy = nullptr;
6847
6848 // For LinearMapKind, these are the constants used to derive the value.
6849 ConstantInt *LinearOffset = nullptr;
6850 ConstantInt *LinearMultiplier = nullptr;
6851 bool LinearMapValWrapped = false;
6852
6853 // For LookupTableKind, this is the table.
6854 Constant *Initializer = nullptr;
6855};
6856
6857} // end anonymous namespace
6858
6859SwitchReplacement::SwitchReplacement(
6860 Module &M, uint64_t TableSize, ConstantInt *Offset,
6861 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6862 Constant *DefaultValue, const DataLayout &DL,
6863 const TargetTransformInfo &TTI, const StringRef &FuncName)
6864 : DefaultValue(DefaultValue) {
6865 assert(Values.size() && "Can't build lookup table without values!");
6866 assert(TableSize >= Values.size() && "Can't fit values in table!");
6867
6868 // If all values in the table are equal, this is that value.
6869 SingleValue = Values.begin()->second;
6870
6871 ValueType = Values.begin()->second->getType();
6872
6873 // Build up the table contents.
6874 SmallVector<Constant *, 64> TableContents(TableSize);
6875 for (const auto &[CaseVal, CaseRes] : Values) {
6876 assert(CaseRes->getType() == ValueType);
6877
6878 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6879 TableContents[Idx] = CaseRes;
6880
6881 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6882 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6883 }
6884
6885 // Fill in any holes in the table with the default result.
6886 if (Values.size() < TableSize) {
6887 assert(DefaultValue &&
6888 "Need a default value to fill the lookup table holes.");
6889 assert(DefaultValue->getType() == ValueType);
6890 for (uint64_t I = 0; I < TableSize; ++I) {
6891 if (!TableContents[I])
6892 TableContents[I] = DefaultValue;
6893 }
6894
6895 // If the default value is poison, all the holes are poison.
6896 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6897
6898 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6899 SingleValue = nullptr;
6900 }
6901
6902 // If each element in the table contains the same value, we only need to store
6903 // that single value.
6904 if (SingleValue) {
6905 Kind = SingleValueKind;
6906 return;
6907 }
6908
6909 // Check if we can derive the value with a linear transformation from the
6910 // table index.
6912 bool LinearMappingPossible = true;
6913 APInt PrevVal;
6914 APInt DistToPrev;
6915 // When linear map is monotonic and signed overflow doesn't happen on
6916 // maximum index, we can attach nsw on Add and Mul.
6917 bool NonMonotonic = false;
6918 assert(TableSize >= 2 && "Should be a SingleValue table.");
6919 // Check if there is the same distance between two consecutive values.
6920 for (uint64_t I = 0; I < TableSize; ++I) {
6921 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6922
6923 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6924 // This is an poison, so it's (probably) a lookup table hole.
6925 // To prevent any regressions from before we switched to using poison as
6926 // the default value, holes will fall back to using the first value.
6927 // This can be removed once we add proper handling for poisons in lookup
6928 // tables.
6929 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6930 }
6931
6932 if (!ConstVal) {
6933 // This is an undef. We could deal with it, but undefs in lookup tables
6934 // are very seldom. It's probably not worth the additional complexity.
6935 LinearMappingPossible = false;
6936 break;
6937 }
6938 const APInt &Val = ConstVal->getValue();
6939 if (I != 0) {
6940 APInt Dist = Val - PrevVal;
6941 if (I == 1) {
6942 DistToPrev = Dist;
6943 } else if (Dist != DistToPrev) {
6944 LinearMappingPossible = false;
6945 break;
6946 }
6947 NonMonotonic |=
6948 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6949 }
6950 PrevVal = Val;
6951 }
6952 if (LinearMappingPossible) {
6953 LinearOffset = cast<ConstantInt>(TableContents[0]);
6954 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6955 APInt M = LinearMultiplier->getValue();
6956 bool MayWrap = true;
6957 if (isIntN(M.getBitWidth(), TableSize - 1))
6958 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6959 LinearMapValWrapped = NonMonotonic || MayWrap;
6960 Kind = LinearMapKind;
6961 return;
6962 }
6963 }
6964
6965 // If the type is integer and the table fits in a register, build a bitmap.
6966 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6968 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6969 for (uint64_t I = TableSize; I > 0; --I) {
6970 TableInt <<= IT->getBitWidth();
6971 // Insert values into the bitmap. Undef values are set to zero.
6972 if (!isa<UndefValue>(TableContents[I - 1])) {
6973 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6974 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6975 }
6976 }
6977 BitMap = ConstantInt::get(M.getContext(), TableInt);
6978 BitMapElementTy = IT;
6979 Kind = BitMapKind;
6980 return;
6981 }
6982
6983 if (auto *IT = dyn_cast<IntegerType>(ValueType)) {
6984 ConstantRange Range(IT->getBitWidth(), false);
6985 for (Constant *Value : TableContents)
6986 if (!isa<UndefValue>(Value))
6987 Range = Range.unionWith(cast<ConstantInt>(Value)->getValue());
6988 // TODO: handle sign extension as well?
6989 unsigned NeededBitWidth =
6990 std::max(TTI.getMinimumLookupTableEntryBitWidth(),
6991 unsigned(PowerOf2Ceil(Range.getActiveBits())));
6992 if (NeededBitWidth < IT->getBitWidth()) {
6993 IntegerType *DstTy = IntegerType::get(IT->getContext(), NeededBitWidth);
6994 for (Constant *&Value : TableContents)
6995 Value = ConstantFoldCastInstruction(Instruction::Trunc, Value, DstTy);
6996 }
6997 }
6998
6999 // Store the table in an array.
7000 auto *TableTy = ArrayType::get(TableContents[0]->getType(), TableSize);
7001 Initializer = ConstantArray::get(TableTy, TableContents);
7002
7003 Kind = LookupTableKind;
7004}
7005
7006Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
7007 const DataLayout &DL, Function *Func) {
7008 switch (Kind) {
7009 case SingleValueKind:
7010 return SingleValue;
7011 case LinearMapKind: {
7012 ++NumLinearMaps;
7013 // Derive the result value from the input value.
7014 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
7015 false, "switch.idx.cast");
7016 if (!LinearMultiplier->isOne())
7017 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
7018 /*HasNUW = */ false,
7019 /*HasNSW = */ !LinearMapValWrapped);
7020
7021 if (!LinearOffset->isZero())
7022 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
7023 /*HasNUW = */ false,
7024 /*HasNSW = */ !LinearMapValWrapped);
7025 return Result;
7026 }
7027 case BitMapKind: {
7028 ++NumBitMaps;
7029 // Type of the bitmap (e.g. i59).
7030 IntegerType *MapTy = BitMap->getIntegerType();
7031
7032 // Cast Index to the same type as the bitmap.
7033 // Note: The Index is <= the number of elements in the table, so
7034 // truncating it to the width of the bitmask is safe.
7035 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
7036
7037 // Multiply the shift amount by the element width. NUW/NSW can always be
7038 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
7039 // BitMap's bit width.
7040 ShiftAmt = Builder.CreateMul(
7041 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7042 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7043
7044 // Shift down.
7045 Value *DownShifted =
7046 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7047 // Mask off.
7048 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7049 }
7050 case LookupTableKind: {
7051 ++NumLookupTables;
7052 auto *Table =
7053 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7054 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7055 Initializer, "switch.table." + Func->getName());
7056 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7057 // Set the alignment to that of an array items. We will be only loading one
7058 // value out of it.
7059 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7060 Type *IndexTy = DL.getIndexType(Table->getType());
7061 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7062
7063 if (Index->getType() != IndexTy) {
7064 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7065 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7066 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7067 Zext->setNonNeg(
7068 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7069 }
7070
7071 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7072 Value *GEP =
7073 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7074 Value *Load =
7075 Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7076 if (Load->getType() == ValueType)
7077 return Load;
7078 return Builder.CreateZExt(Load, ValueType, "switch.ext");
7079 }
7080 }
7081 llvm_unreachable("Unknown helper kind!");
7082}
7083
7084bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7085 uint64_t TableSize,
7086 Type *ElementType) {
7087 auto *IT = dyn_cast<IntegerType>(ElementType);
7088 if (!IT)
7089 return false;
7090 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7091 // are <= 15, we could try to narrow the type.
7092
7093 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7094 if (TableSize >= UINT_MAX / IT->getBitWidth())
7095 return false;
7096 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7097}
7098
7100 const DataLayout &DL) {
7101 // Allow any legal type.
7102 if (TTI.isTypeLegal(Ty))
7103 return true;
7104
7105 auto *IT = dyn_cast<IntegerType>(Ty);
7106 if (!IT)
7107 return false;
7108
7109 // Also allow power of 2 integer types that have at least 8 bits and fit in
7110 // a register. These types are common in frontend languages and targets
7111 // usually support loads of these types.
7112 // TODO: We could relax this to any integer that fits in a register and rely
7113 // on ABI alignment and padding in the table to allow the load to be widened.
7114 // Or we could widen the constants and truncate the load.
7115 unsigned BitWidth = IT->getBitWidth();
7116 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7117 DL.fitsInLegalInteger(IT->getBitWidth());
7118}
7119
7120Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7121
7122bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7123
7124bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7125
7126static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize) {
7127 // 40% is the default density for building a jump table in optsize/minsize
7128 // mode, 10% is the default density for jump tables. See also
7129 // TargetLoweringBase::isSuitableForJumpTable(), which this function was based
7130 // on.
7131 const uint64_t MinDensity = OptSize ? 40 : 10;
7132
7133 if (CaseRange >= UINT64_MAX / 100)
7134 return false; // Avoid multiplication overflows below.
7135
7136 return NumCases * 100 >= CaseRange * MinDensity;
7137}
7138
7139static bool isSwitchDense(ArrayRef<int64_t> Values, bool OptSize) {
7140 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7141 uint64_t Range = Diff + 1;
7142 if (Range < Diff)
7143 return false; // Overflow.
7144
7145 return isSwitchDense(Values.size(), Range, OptSize);
7146}
7147
7148/// Determine whether a lookup table should be built for this switch, based on
7149/// the number of cases, size of the table, and the types of the results.
7150// TODO: We could support larger than legal types by limiting based on the
7151// number of loads required and/or table size. If the constants are small we
7152// could use smaller table entries and extend after the load.
7154 const TargetTransformInfo &TTI,
7155 const DataLayout &DL,
7156 const SmallVector<Type *> &ResultTypes) {
7157 if (SI->getNumCases() > TableSize)
7158 return false; // TableSize overflowed.
7159
7160 bool AllTablesFitInRegister = true;
7161 bool HasIllegalType = false;
7162 for (const auto &Ty : ResultTypes) {
7163 // Saturate this flag to true.
7164 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7165
7166 // Saturate this flag to false.
7167 AllTablesFitInRegister =
7168 AllTablesFitInRegister &&
7169 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7170
7171 // If both flags saturate, we're done. NOTE: This *only* works with
7172 // saturating flags, and all flags have to saturate first due to the
7173 // non-deterministic behavior of iterating over a dense map.
7174 if (HasIllegalType && !AllTablesFitInRegister)
7175 break;
7176 }
7177
7178 // If each table would fit in a register, we should build it anyway.
7179 if (AllTablesFitInRegister)
7180 return true;
7181
7182 // Don't build a table that doesn't fit in-register if it has illegal types.
7183 if (HasIllegalType)
7184 return false;
7185
7186 return isSwitchDense(SI->getNumCases(), TableSize,
7187 SI->getFunction()->hasOptSize());
7188}
7189
7191 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7192 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7193 const DataLayout &DL, const TargetTransformInfo &TTI) {
7194 if (MinCaseVal.isNullValue())
7195 return true;
7196 if (MinCaseVal.isNegative() ||
7197 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7198 !HasDefaultResults)
7199 return false;
7200 return all_of(ResultTypes, [&](const auto &ResultType) {
7201 return SwitchReplacement::wouldFitInRegister(
7202 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7203 });
7204}
7205
7206/// Try to reuse the switch table index compare. Following pattern:
7207/// \code
7208/// if (idx < tablesize)
7209/// r = table[idx]; // table does not contain default_value
7210/// else
7211/// r = default_value;
7212/// if (r != default_value)
7213/// ...
7214/// \endcode
7215/// Is optimized to:
7216/// \code
7217/// cond = idx < tablesize;
7218/// if (cond)
7219/// r = table[idx];
7220/// else
7221/// r = default_value;
7222/// if (cond)
7223/// ...
7224/// \endcode
7225/// Jump threading will then eliminate the second if(cond).
7227 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7228 Constant *DefaultValue,
7229 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7231 if (!CmpInst)
7232 return;
7233
7234 // We require that the compare is in the same block as the phi so that jump
7235 // threading can do its work afterwards.
7236 if (CmpInst->getParent() != PhiBlock)
7237 return;
7238
7240 if (!CmpOp1)
7241 return;
7242
7243 Value *RangeCmp = RangeCheckBranch->getCondition();
7244 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7245 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7246
7247 // Check if the compare with the default value is constant true or false.
7248 const DataLayout &DL = PhiBlock->getDataLayout();
7250 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7251 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7252 return;
7253
7254 // Check if the compare with the case values is distinct from the default
7255 // compare result.
7256 for (auto ValuePair : Values) {
7258 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7259 if (!CaseConst || CaseConst == DefaultConst ||
7260 (CaseConst != TrueConst && CaseConst != FalseConst))
7261 return;
7262 }
7263
7264 // Check if the branch instruction dominates the phi node. It's a simple
7265 // dominance check, but sufficient for our needs.
7266 // Although this check is invariant in the calling loops, it's better to do it
7267 // at this late stage. Practically we do it at most once for a switch.
7268 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7269 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7270 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7271 return;
7272 }
7273
7274 if (DefaultConst == FalseConst) {
7275 // The compare yields the same result. We can replace it.
7276 CmpInst->replaceAllUsesWith(RangeCmp);
7277 ++NumTableCmpReuses;
7278 } else {
7279 // The compare yields the same result, just inverted. We can replace it.
7280 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7281 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7282 RangeCheckBranch->getIterator());
7283 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7284 ++NumTableCmpReuses;
7285 }
7286}
7287
7288/// If the switch is only used to initialize one or more phi nodes in a common
7289/// successor block with different constant values, replace the switch with
7290/// lookup tables.
7292 DomTreeUpdater *DTU, const DataLayout &DL,
7293 const TargetTransformInfo &TTI,
7294 bool ConvertSwitchToLookupTable) {
7295 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7296
7297 BasicBlock *BB = SI->getParent();
7298 Function *Fn = BB->getParent();
7299
7300 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7301 // split off a dense part and build a lookup table for that.
7302
7303 // FIXME: This creates arrays of GEPs to constant strings, which means each
7304 // GEP needs a runtime relocation in PIC code. We should just build one big
7305 // string and lookup indices into that.
7306
7307 // Ignore switches with less than three cases. Lookup tables will not make
7308 // them faster, so we don't analyze them.
7309 if (SI->getNumCases() < 3)
7310 return false;
7311
7312 // Figure out the corresponding result for each case value and phi node in the
7313 // common destination, as well as the min and max case values.
7314 assert(!SI->cases().empty());
7315 SwitchInst::CaseIt CI = SI->case_begin();
7316 ConstantInt *MinCaseVal = CI->getCaseValue();
7317 ConstantInt *MaxCaseVal = CI->getCaseValue();
7318
7319 BasicBlock *CommonDest = nullptr;
7320
7321 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7323
7325 SmallVector<Type *> ResultTypes;
7327
7328 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7329 ConstantInt *CaseVal = CI->getCaseValue();
7330 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7331 MinCaseVal = CaseVal;
7332 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7333 MaxCaseVal = CaseVal;
7334
7335 // Resulting value at phi nodes for this case value.
7337 ResultsTy Results;
7338 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7339 Results, DL, TTI))
7340 return false;
7341
7342 // Append the result and result types from this case to the list for each
7343 // phi.
7344 for (const auto &I : Results) {
7345 PHINode *PHI = I.first;
7346 Constant *Value = I.second;
7347 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7348 if (Inserted)
7349 PHIs.push_back(PHI);
7350 It->second.push_back(std::make_pair(CaseVal, Value));
7351 ResultTypes.push_back(PHI->getType());
7352 }
7353 }
7354
7355 // If the table has holes, we need a constant result for the default case
7356 // or a bitmask that fits in a register.
7357 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7358 bool HasDefaultResults =
7359 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7360 DefaultResultsList, DL, TTI);
7361 for (const auto &I : DefaultResultsList) {
7362 PHINode *PHI = I.first;
7363 Constant *Result = I.second;
7364 DefaultResults[PHI] = Result;
7365 }
7366
7367 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7368 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7369 uint64_t TableSize;
7370 ConstantInt *TableIndexOffset;
7371 if (UseSwitchConditionAsTableIndex) {
7372 TableSize = MaxCaseVal->getLimitedValue() + 1;
7373 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7374 } else {
7375 TableSize =
7376 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7377
7378 TableIndexOffset = MinCaseVal;
7379 }
7380
7381 // If the default destination is unreachable, or if the lookup table covers
7382 // all values of the conditional variable, branch directly to the lookup table
7383 // BB. Otherwise, check that the condition is within the case range.
7384 uint64_t NumResults = ResultLists[PHIs[0]].size();
7385 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7386
7387 bool TableHasHoles = (NumResults < TableSize);
7388
7389 // If the table has holes but the default destination doesn't produce any
7390 // constant results, the lookup table entries corresponding to the holes will
7391 // contain poison.
7392 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7393
7394 // If the default destination doesn't produce a constant result but is still
7395 // reachable, and the lookup table has holes, we need to use a mask to
7396 // determine if the current index should load from the lookup table or jump
7397 // to the default case.
7398 // The mask is unnecessary if the table has holes but the default destination
7399 // is unreachable, as in that case the holes must also be unreachable.
7400 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7401 if (NeedMask) {
7402 // As an extra penalty for the validity test we require more cases.
7403 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7404 return false;
7405 if (!DL.fitsInLegalInteger(TableSize))
7406 return false;
7407 }
7408
7409 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7410 return false;
7411
7412 // Compute the table index value.
7413 Value *TableIndex;
7414 if (UseSwitchConditionAsTableIndex) {
7415 TableIndex = SI->getCondition();
7416 if (HasDefaultResults) {
7417 // Grow the table to cover all possible index values to avoid the range
7418 // check. It will use the default result to fill in the table hole later,
7419 // so make sure it exist.
7420 ConstantRange CR = computeConstantRange(TableIndex, /*ForSigned=*/false,
7421 SimplifyQuery(DL));
7422 // Grow the table shouldn't have any size impact by checking
7423 // wouldFitInRegister.
7424 // TODO: Consider growing the table also when it doesn't fit in a register
7425 // if no optsize is specified.
7426 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7427 if (!CR.isUpperWrapped() &&
7428 all_of(ResultTypes, [&](const auto &ResultType) {
7429 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7430 ResultType);
7431 })) {
7432 // There may be some case index larger than the UpperBound (unreachable
7433 // case), so make sure the table size does not get smaller.
7434 TableSize = std::max(UpperBound, TableSize);
7435 // The default branch is unreachable after we enlarge the lookup table.
7436 // Adjust DefaultIsReachable to reuse code path.
7437 DefaultIsReachable = false;
7438 }
7439 }
7440 }
7441
7442 // Keep track of the switch replacement for each phi
7444 for (PHINode *PHI : PHIs) {
7445 const auto &ResultList = ResultLists[PHI];
7446
7447 Type *ResultType = ResultList.begin()->second->getType();
7448 // Use any value to fill the lookup table holes.
7449 Constant *DefaultVal =
7450 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7451 StringRef FuncName = Fn->getName();
7452 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7453 ResultList, DefaultVal, DL, TTI, FuncName);
7454 PhiToReplacementMap.insert({PHI, Replacement});
7455 }
7456
7457 bool AnyLookupTables = any_of(
7458 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7459 bool AnyBitMaps = any_of(PhiToReplacementMap,
7460 [](auto &KV) { return KV.second.isBitMap(); });
7461
7462 // A few conditions prevent the generation of lookup tables:
7463 // 1. The target does not support lookup tables.
7464 // 2. The "no-jump-tables" function attribute is set.
7465 // However, these objections do not apply to other switch replacements, like
7466 // the bitmap, so we only stop here if any of these conditions are met and we
7467 // want to create a LUT. Otherwise, continue with the switch replacement.
7468 if (AnyLookupTables &&
7469 (!TTI.shouldBuildLookupTables() ||
7470 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7471 return false;
7472
7473 // In the early optimization pipeline, disable formation of lookup tables,
7474 // bit maps and mask checks, as they may inhibit further optimization.
7475 if (!ConvertSwitchToLookupTable &&
7476 (AnyLookupTables || AnyBitMaps || NeedMask))
7477 return false;
7478
7479 Builder.SetInsertPoint(SI);
7480 // TableIndex is the switch condition - TableIndexOffset if we don't
7481 // use the condition directly
7482 if (!UseSwitchConditionAsTableIndex) {
7483 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7484 // we can try to attach nsw.
7485 bool MayWrap = true;
7486 if (!DefaultIsReachable) {
7487 APInt Res =
7488 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7489 (void)Res;
7490 }
7491 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7492 "switch.tableidx", /*HasNUW =*/false,
7493 /*HasNSW =*/!MayWrap);
7494 }
7495
7496 std::vector<DominatorTree::UpdateType> Updates;
7497
7498 // Compute the maximum table size representable by the integer type we are
7499 // switching upon.
7500 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7501 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7502 assert(MaxTableSize >= TableSize &&
7503 "It is impossible for a switch to have more entries than the max "
7504 "representable value of its input integer type's size.");
7505
7506 // Create the BB that does the lookups.
7507 Module &Mod = *CommonDest->getParent()->getParent();
7508 BasicBlock *LookupBB = BasicBlock::Create(
7509 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7510
7511 CondBrInst *RangeCheckBranch = nullptr;
7512 CondBrInst *CondBranch = nullptr;
7513
7514 Builder.SetInsertPoint(SI);
7515 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7516 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7517 Builder.CreateBr(LookupBB);
7518 if (DTU)
7519 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7520 // Note: We call removeProdecessor later since we need to be able to get the
7521 // PHI value for the default case in case we're using a bit mask.
7522 } else {
7523 Value *Cmp = Builder.CreateICmpULT(
7524 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7525 RangeCheckBranch =
7526 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7527 CondBranch = RangeCheckBranch;
7528 if (DTU)
7529 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7530 }
7531
7532 // Populate the BB that does the lookups.
7533 Builder.SetInsertPoint(LookupBB);
7534
7535 if (NeedMask) {
7536 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7537 // re-purposed to do the hole check, and we create a new LookupBB.
7538 BasicBlock *MaskBB = LookupBB;
7539 MaskBB->setName("switch.hole_check");
7540 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7541 CommonDest->getParent(), CommonDest);
7542
7543 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7544 // unnecessary illegal types.
7545 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7546 APInt MaskInt(TableSizePowOf2, 0);
7547 APInt One(TableSizePowOf2, 1);
7548 // Build bitmask; fill in a 1 bit for every case.
7549 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7550 for (const auto &Result : ResultList) {
7551 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7552 .getLimitedValue();
7553 MaskInt |= One << Idx;
7554 }
7555 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7556
7557 // Get the TableIndex'th bit of the bitmask.
7558 // If this bit is 0 (meaning hole) jump to the default destination,
7559 // else continue with table lookup.
7560 IntegerType *MapTy = TableMask->getIntegerType();
7561 Value *MaskIndex =
7562 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7563 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7564 Value *LoBit = Builder.CreateTrunc(
7565 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7566 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7567 if (DTU) {
7568 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7569 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7570 }
7571 Builder.SetInsertPoint(LookupBB);
7572 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7573 }
7574
7575 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7576 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7577 // do not delete PHINodes here.
7578 SI->getDefaultDest()->removePredecessor(BB,
7579 /*KeepOneInputPHIs=*/true);
7580 if (DTU)
7581 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7582 }
7583
7584 for (PHINode *PHI : PHIs) {
7585 const ResultListTy &ResultList = ResultLists[PHI];
7586 auto Replacement = PhiToReplacementMap.at(PHI);
7587 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7588 // Do a small peephole optimization: re-use the switch table compare if
7589 // possible.
7590 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7591 BasicBlock *PhiBlock = PHI->getParent();
7592 // Search for compare instructions which use the phi.
7593 for (auto *User : PHI->users()) {
7594 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7595 Replacement.getDefaultValue(), ResultList);
7596 }
7597 }
7598
7599 PHI->addIncoming(Result, LookupBB);
7600 }
7601
7602 Builder.CreateBr(CommonDest);
7603 if (DTU)
7604 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7605
7606 SmallVector<uint32_t> BranchWeights;
7607 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7608 extractBranchWeights(*SI, BranchWeights);
7609 uint64_t ToLookupWeight = 0;
7610 uint64_t ToDefaultWeight = 0;
7611
7612 // Remove the switch.
7613 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7614 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7615 BasicBlock *Succ = SI->getSuccessor(I);
7616
7617 if (Succ == SI->getDefaultDest()) {
7618 if (HasBranchWeights)
7619 ToDefaultWeight += BranchWeights[I];
7620 continue;
7621 }
7622 Succ->removePredecessor(BB);
7623 if (DTU && RemovedSuccessors.insert(Succ).second)
7624 Updates.push_back({DominatorTree::Delete, BB, Succ});
7625 if (HasBranchWeights)
7626 ToLookupWeight += BranchWeights[I];
7627 }
7628 SI->eraseFromParent();
7629 if (HasBranchWeights)
7630 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7631 /*IsExpected=*/false);
7632 if (DTU)
7633 DTU->applyUpdates(Updates);
7634
7635 if (NeedMask)
7636 ++NumLookupTablesHoles;
7637 return true;
7638}
7639
7640/// Try to transform a switch that has "holes" in it to a contiguous sequence
7641/// of cases.
7642///
7643/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7644/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7645///
7646/// This converts a sparse switch into a dense switch which allows better
7647/// lowering and could also allow transforming into a lookup table.
7649 const DataLayout &DL,
7650 const TargetTransformInfo &TTI) {
7651 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7652 if (CondTy->getIntegerBitWidth() > 64 ||
7653 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7654 return false;
7655 // Only bother with this optimization if there are more than 3 switch cases;
7656 // SDAG will only bother creating jump tables for 4 or more cases.
7657 if (SI->getNumCases() < 4)
7658 return false;
7659
7660 // This transform is agnostic to the signedness of the input or case values. We
7661 // can treat the case values as signed or unsigned. We can optimize more common
7662 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7663 // as signed.
7665 for (const auto &C : SI->cases())
7666 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7667 llvm::sort(Values);
7668
7669 // If the switch is already dense, there's nothing useful to do here.
7670 if (isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7671 return false;
7672
7673 // First, transform the values such that they start at zero and ascend.
7674 int64_t Base = Values[0];
7675 for (auto &V : Values)
7676 V -= (uint64_t)(Base);
7677
7678 // Now we have signed numbers that have been shifted so that, given enough
7679 // precision, there are no negative values. Since the rest of the transform
7680 // is bitwise only, we switch now to an unsigned representation.
7681
7682 // This transform can be done speculatively because it is so cheap - it
7683 // results in a single rotate operation being inserted.
7684
7685 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7686 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7687 // less than 64.
7688 unsigned Shift = 64;
7689 for (auto &V : Values)
7690 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7691 assert(Shift < 64);
7692 if (Shift > 0)
7693 for (auto &V : Values)
7694 V = (int64_t)((uint64_t)V >> Shift);
7695
7696 if (!isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7697 // Transform didn't create a dense switch.
7698 return false;
7699
7700 // The obvious transform is to shift the switch condition right and emit a
7701 // check that the condition actually cleanly divided by GCD, i.e.
7702 // C & (1 << Shift - 1) == 0
7703 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7704 //
7705 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7706 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7707 // are nonzero then the switch condition will be very large and will hit the
7708 // default case.
7709
7710 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7711 Builder.SetInsertPoint(SI);
7712 Value *Sub =
7713 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7714 Value *Rot = Builder.CreateIntrinsic(
7715 Ty, Intrinsic::fshl,
7716 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7717 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7718
7719 for (auto Case : SI->cases()) {
7720 auto *Orig = Case.getCaseValue();
7721 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7722 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7723 }
7724 return true;
7725}
7726
7727/// Tries to transform the switch when the condition is umin with a constant.
7728/// In that case, the default branch can be replaced by the constant's branch.
7729/// This method also removes dead cases when the simplification cannot replace
7730/// the default branch.
7731///
7732/// For example:
7733/// switch(umin(a, 3)) {
7734/// case 0:
7735/// case 1:
7736/// case 2:
7737/// case 3:
7738/// case 4:
7739/// // ...
7740/// default:
7741/// unreachable
7742/// }
7743///
7744/// Transforms into:
7745///
7746/// switch(a) {
7747/// case 0:
7748/// case 1:
7749/// case 2:
7750/// default:
7751/// // This is case 3
7752/// }
7754 Value *A;
7756
7757 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7758 return false;
7759
7762 BasicBlock *BB = SIW->getParent();
7763
7764 // Dead cases are removed even when the simplification fails.
7765 // A case is dead when its value is higher than the Constant.
7766 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7767 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7768 ++I;
7769 continue;
7770 }
7771 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7772 DeadCaseBB->removePredecessor(BB);
7773 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7774 I = SIW.removeCase(I);
7775 E = SIW->case_end();
7776 }
7777
7778 auto Case = SI->findCaseValue(Constant);
7779 // If the case value is not found, `findCaseValue` returns the default case.
7780 // In this scenario, since there is no explicit `case 3:`, the simplification
7781 // fails. The simplification also fails when the switch’s default destination
7782 // is reachable.
7783 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7784 if (DTU)
7785 DTU->applyUpdates(Updates);
7786 return !Updates.empty();
7787 }
7788
7789 BasicBlock *Unreachable = SI->getDefaultDest();
7790 SIW.replaceDefaultDest(Case);
7791 SIW.removeCase(Case);
7792 SIW->setCondition(A);
7793
7794 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7795
7796 if (DTU)
7797 DTU->applyUpdates(Updates);
7798
7799 return true;
7800}
7801
7802/// Tries to transform switch of powers of two to reduce switch range.
7803/// For example, switch like:
7804/// switch (C) { case 1: case 2: case 64: case 128: }
7805/// will be transformed to:
7806/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7807///
7808/// This transformation allows better lowering and may transform the switch
7809/// instruction into a sequence of bit manipulation and a smaller
7810/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7811/// address of the jump target, and indirectly jump to it).
7813 DomTreeUpdater *DTU,
7814 const DataLayout &DL,
7815 const TargetTransformInfo &TTI) {
7816 Value *Condition = SI->getCondition();
7817 LLVMContext &Context = SI->getContext();
7818 auto *CondTy = cast<IntegerType>(Condition->getType());
7819
7820 if (CondTy->getIntegerBitWidth() > 64 ||
7821 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7822 return false;
7823
7824 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7825 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7826 {Condition, ConstantInt::getTrue(Context)});
7827 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7828 TTI::TCC_Basic * 2)
7829 return false;
7830
7831 // Only bother with this optimization if there are more than 3 switch cases.
7832 // SDAG will start emitting jump tables for 4 or more cases.
7833 if (SI->getNumCases() < 4)
7834 return false;
7835
7836 // Check that switch cases are powers of two.
7838 for (const auto &Case : SI->cases()) {
7839 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7840 if (llvm::has_single_bit(CaseValue))
7841 Values.push_back(CaseValue);
7842 else
7843 return false;
7844 }
7845
7846 // isSwichDense requires case values to be sorted.
7847 llvm::sort(Values);
7848 if (!isSwitchDense(Values.size(),
7849 llvm::countr_zero(Values.back()) -
7850 llvm::countr_zero(Values.front()) + 1,
7851 SI->getFunction()->hasOptSize()))
7852 // Transform is unable to generate dense switch.
7853 return false;
7854
7855 Builder.SetInsertPoint(SI);
7856
7857 if (!SI->defaultDestUnreachable()) {
7858 // Let non-power-of-two inputs jump to the default case, when the latter is
7859 // reachable.
7860 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7861 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7862
7863 auto *OrigBB = SI->getParent();
7864 auto *DefaultCaseBB = SI->getDefaultDest();
7865 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7866 auto It = OrigBB->getTerminator()->getIterator();
7867 SmallVector<uint32_t> Weights;
7868 auto HasWeights =
7870 auto *BI = CondBrInst::Create(IsPow2, SplitBB, DefaultCaseBB, It);
7871 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7872 // IsPow2 covers a subset of the cases in which we'd go to the default
7873 // label. The other is those powers of 2 that don't appear in the case
7874 // statement. We don't know the distribution of the values coming in, so
7875 // the safest is to split 50-50 the original probability to `default`.
7876 uint64_t OrigDenominator =
7878 SmallVector<uint64_t> NewWeights(2);
7879 NewWeights[1] = Weights[0] / 2;
7880 NewWeights[0] = OrigDenominator - NewWeights[1];
7881 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7882 // The probability of executing the default block stays constant. It was
7883 // p_d = Weights[0] / OrigDenominator
7884 // we rewrite as W/D
7885 // We want to find the probability of the default branch of the switch
7886 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7887 // i.e. the original probability is the probability we go to the default
7888 // branch from the BI branch, or we take the default branch on the SI.
7889 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7890 // This matches using W/2 for the default branch probability numerator and
7891 // D-W/2 as the denominator.
7892 Weights[0] = NewWeights[1];
7893 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7894 for (auto &W : drop_begin(Weights))
7895 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7896
7897 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7898 }
7899 // BI is handling the default case for SI, and so should share its DebugLoc.
7900 BI->setDebugLoc(SI->getDebugLoc());
7901 It->eraseFromParent();
7902
7903 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7904 if (DTU)
7905 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7906 }
7907
7908 // Replace each case with its trailing zeros number.
7909 for (auto &Case : SI->cases()) {
7910 auto *OrigValue = Case.getCaseValue();
7911 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7912 OrigValue->getValue().countr_zero()));
7913 }
7914
7915 // Replace condition with its trailing zeros number.
7916 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7917 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7918
7919 SI->setCondition(ConditionTrailingZeros);
7920
7921 return true;
7922}
7923
7924/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7925/// the same destination.
7927 DomTreeUpdater *DTU) {
7928 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7929 if (!Cmp || !Cmp->hasOneUse())
7930 return false;
7931
7933 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7934 if (!HasWeights)
7935 Weights.resize(4); // Avoid checking HasWeights everywhere.
7936
7937 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7938 int64_t Res;
7939 BasicBlock *Succ, *OtherSucc;
7940 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7941 BasicBlock *Unreachable = nullptr;
7942
7943 if (SI->getNumCases() == 2) {
7944 // Find which of 1, 0 or -1 is missing (handled by default dest).
7945 SmallSet<int64_t, 3> Missing;
7946 Missing.insert(1);
7947 Missing.insert(0);
7948 Missing.insert(-1);
7949
7950 Succ = SI->getDefaultDest();
7951 SuccWeight = Weights[0];
7952 OtherSucc = nullptr;
7953 for (auto &Case : SI->cases()) {
7954 std::optional<int64_t> Val =
7955 Case.getCaseValue()->getValue().trySExtValue();
7956 if (!Val)
7957 return false;
7958 if (!Missing.erase(*Val))
7959 return false;
7960 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7961 return false;
7962 OtherSucc = Case.getCaseSuccessor();
7963 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7964 }
7965
7966 assert(Missing.size() == 1 && "Should have one case left");
7967 Res = *Missing.begin();
7968 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7969 // Normalize so that Succ is taken once and OtherSucc twice.
7970 Unreachable = SI->getDefaultDest();
7971 Succ = OtherSucc = nullptr;
7972 for (auto &Case : SI->cases()) {
7973 BasicBlock *NewSucc = Case.getCaseSuccessor();
7974 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7975 if (!OtherSucc || OtherSucc == NewSucc) {
7976 OtherSucc = NewSucc;
7977 OtherSuccWeight += Weight;
7978 } else if (!Succ) {
7979 Succ = NewSucc;
7980 SuccWeight = Weight;
7981 } else if (Succ == NewSucc) {
7982 std::swap(Succ, OtherSucc);
7983 std::swap(SuccWeight, OtherSuccWeight);
7984 } else
7985 return false;
7986 }
7987 for (auto &Case : SI->cases()) {
7988 std::optional<int64_t> Val =
7989 Case.getCaseValue()->getValue().trySExtValue();
7990 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7991 return false;
7992 if (Case.getCaseSuccessor() == Succ) {
7993 Res = *Val;
7994 break;
7995 }
7996 }
7997 } else {
7998 return false;
7999 }
8000
8001 // Determine predicate for the missing case.
8003 switch (Res) {
8004 case 1:
8005 Pred = ICmpInst::ICMP_UGT;
8006 break;
8007 case 0:
8008 Pred = ICmpInst::ICMP_EQ;
8009 break;
8010 case -1:
8011 Pred = ICmpInst::ICMP_ULT;
8012 break;
8013 }
8014 if (Cmp->isSigned())
8015 Pred = ICmpInst::getSignedPredicate(Pred);
8016
8017 MDNode *NewWeights = nullptr;
8018 if (HasWeights)
8019 NewWeights = MDBuilder(SI->getContext())
8020 .createBranchWeights(SuccWeight, OtherSuccWeight);
8021
8022 BasicBlock *BB = SI->getParent();
8023 Builder.SetInsertPoint(SI->getIterator());
8024 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
8025 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
8026 SI->getMetadata(LLVMContext::MD_unpredictable));
8027 OtherSucc->removePredecessor(BB);
8028 if (Unreachable)
8029 Unreachable->removePredecessor(BB);
8030 SI->eraseFromParent();
8031 Cmp->eraseFromParent();
8032 if (DTU && Unreachable)
8033 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
8034 return true;
8035}
8036
8037/// Checking whether two BBs are equal depends on the contents of the
8038/// BasicBlock and the incoming values of their successor PHINodes.
8039/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
8040/// calling this function on each BasicBlock every time isEqual is called,
8041/// especially since the same BasicBlock may be passed as an argument multiple
8042/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
8043/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
8044/// of the incoming values.
8047
8048 // One Phi usually has < 8 incoming values.
8052
8053 // We only merge the identical non-entry BBs with
8054 // - terminator unconditional br to Succ (pending relaxation),
8055 // - does not have address taken / weird control.
8056 static bool canBeMerged(const BasicBlock *BB) {
8057 assert(BB && "Expected non-null BB");
8058 // Entry block cannot be eliminated or have predecessors.
8059 if (BB->isEntryBlock())
8060 return false;
8061
8062 // Single successor and must be Succ.
8063 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8064 // on other kinds of terminators. We decide to only support unconditional
8065 // branches for now for compile time reasons.
8066 auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
8067 if (!BI)
8068 return false;
8069
8070 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
8071 // uses.
8072 if (BB->hasAddressTaken() || BB->isEHPad())
8073 return false;
8074
8075 // TODO: relax this condition to merge equal blocks with >1 instructions?
8076 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8077 if (&BB->front() != &BB->back())
8078 return false;
8079
8080 // The BB must have at least one predecessor.
8081 if (pred_empty(BB))
8082 return false;
8083
8084 return true;
8085 }
8086};
8087
8089 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8090 BasicBlock *BB = EBW->BB;
8092 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8093
8094 // Since we assume the BB is just a single UncondBrInst with a single
8095 // successor, we hash as the BB and the incoming Values of its successor
8096 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8097 // including the incoming PHI values leads to better performance.
8098 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8099 // time and passing it in EqualBBWrapper, but this slowed down the average
8100 // compile time without having any impact on the worst case compile time.
8101 BasicBlock *Succ = BI->getSuccessor();
8102 auto PhiValsForBB = map_range(Succ->phis(), [&](PHINode &Phi) {
8103 return (*EBW->PhiPredIVs)[&Phi][BB];
8104 });
8105 return hash_combine(Succ, hash_combine_range(PhiValsForBB));
8106 }
8107 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8108 BasicBlock *A = LHS->BB;
8109 BasicBlock *B = RHS->BB;
8110
8111 // FIXME: we checked that the size of A and B are both 1 in
8112 // mergeIdenticalUncondBBs to make the Case list smaller to
8113 // improve performance. If we decide to support BasicBlocks with more
8114 // than just a single instruction, we need to check that A.size() ==
8115 // B.size() here, and we need to check more than just the BranchInsts
8116 // for equality.
8117
8118 UncondBrInst *ABI = cast<UncondBrInst>(A->getTerminator());
8119 UncondBrInst *BBI = cast<UncondBrInst>(B->getTerminator());
8120 if (ABI->getSuccessor() != BBI->getSuccessor())
8121 return false;
8122
8123 // Need to check that PHIs in successor have matching values.
8124 BasicBlock *Succ = ABI->getSuccessor();
8125 auto IfPhiIVMatch = [&](PHINode &Phi) {
8126 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8127 // query.
8128 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8129 return PredIVs[A] == PredIVs[B];
8130 };
8131 return all_of(Succ->phis(), IfPhiIVMatch);
8132 }
8133};
8134
8135// Merge identical BBs into one of them.
8137 DomTreeUpdater *DTU) {
8138 if (Candidates.size() < 2)
8139 return false;
8140
8141 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8142 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8143 // an entire PHI at once after the loop, opposed to calling
8144 // getIncomingValueForBlock inside this loop, since each call to
8145 // getIncomingValueForBlock is O(|Preds|).
8146 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8148 BBs2Merge.reserve(Candidates.size());
8150
8151 for (BasicBlock *BB : Candidates) {
8152 BasicBlock *Succ = BB->getSingleSuccessor();
8153 assert(Succ && "Expected unconditional BB");
8154 BBs2Merge.emplace_back(EqualBBWrapper{BB, &PhiPredIVs});
8155 Phis.insert_range(make_pointer_range(Succ->phis()));
8156 }
8157
8158 // Precompute a data structure to improve performance of isEqual for
8159 // EqualBBWrapper.
8160 PhiPredIVs.reserve(Phis.size());
8161 for (PHINode *Phi : Phis) {
8162 auto &IVs =
8163 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8164 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8165 // O(|Pred|).
8166 for (auto &IV : Phi->incoming_values())
8167 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8168 }
8169
8170 // Group duplicates using DenseSet with custom equality/hashing.
8171 // Build a set such that if the EqualBBWrapper exists in the set and another
8172 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8173 // the set should be replaced with the one in the set. If the EqualBBWrapper
8174 // is not in the set, then it should be added to the set so other
8175 // EqualBBWrapper can check against it in the same manner. We use
8176 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8177 // information to isEquality, getHashValue, and when doing the replacement
8178 // with better performance.
8180 Keep.reserve(BBs2Merge.size());
8181
8183 Updates.reserve(BBs2Merge.size() * 2);
8184
8185 bool MadeChange = false;
8186
8187 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8188 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8191 if (DTU) {
8192 // All predecessors of DeadPred (except the common predecessor) will be
8193 // moved to LivePred.
8194 Updates.reserve(Updates.size() + DeadPreds.size() * 2);
8196 predecessors(Live));
8197 for (BasicBlock *PredOfDead : DeadPreds) {
8198 // Do not modify those common predecessors of DeadPred and LivePred.
8199 if (!LivePreds.contains(PredOfDead))
8200 Updates.push_back({DominatorTree::Insert, PredOfDead, Live});
8201 Updates.push_back({DominatorTree::Delete, PredOfDead, Dead});
8202 }
8203 }
8204 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8205 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8206 Live->printAsOperand(dbgs()); dbgs() << " for ";
8207 Live->getSingleSuccessor()->printAsOperand(dbgs());
8208 dbgs() << "\n");
8209 // Replace successors in all predecessors of DeadPred.
8210 for (BasicBlock *PredOfDead : DeadPreds) {
8211 Instruction *T = PredOfDead->getTerminator();
8212 T->replaceSuccessorWith(Dead, Live);
8213 }
8214 };
8215
8216 // Try to eliminate duplicate predecessors.
8217 for (const auto &EBW : BBs2Merge) {
8218 // EBW is a candidate for simplification. If we find a duplicate BB,
8219 // replace it.
8220 const auto &[It, Inserted] = Keep.insert(&EBW);
8221 if (Inserted)
8222 continue;
8223
8224 // Found duplicate: merge P into canonical predecessor It->Pred.
8225 BasicBlock *KeepBB = (*It)->BB;
8226 BasicBlock *DeadBB = EBW.BB;
8227
8228 // Avoid merging a BB with itself.
8229 if (KeepBB == DeadBB)
8230 continue;
8231
8232 // Redirect all edges into DeadPred to KeepPred.
8233 RedirectIncomingEdges(DeadBB, KeepBB);
8234
8235 // Now DeadBB should become unreachable; leave DCE to later,
8236 // but we can try to simplify it if it only branches to Succ.
8237 // (We won't erase here to keep the routine simple and DT-safe.)
8238 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8239 MadeChange = true;
8240 }
8241
8242 if (DTU && !Updates.empty())
8243 DTU->applyUpdates(Updates);
8244
8245 return MadeChange;
8246}
8247
8248bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8249 DomTreeUpdater *DTU) {
8250 // Collect candidate switch-arms top-down.
8251 SmallSetVector<BasicBlock *, 16> FilteredArms(
8254 return mergeIdenticalBBs(FilteredArms.getArrayRef(), DTU);
8255}
8256
8257bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8258 DomTreeUpdater *DTU) {
8259 // Need at least 2 predecessors to do anything.
8260 if (!BB || !BB->hasNPredecessorsOrMore(2))
8261 return false;
8262
8263 // Compilation time consideration: retain the canonical loop, otherwise, we
8264 // require more time in the later loop canonicalization.
8265 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
8266 return false;
8267
8268 // Collect candidate predecessors bottom-up.
8269 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8272 return mergeIdenticalBBs(FilteredPreds.getArrayRef(), DTU);
8273}
8274
8275bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8276 BasicBlock *BB = SI->getParent();
8277
8278 if (isValueEqualityComparison(SI)) {
8279 // If we only have one predecessor, and if it is a branch on this value,
8280 // see if that predecessor totally determines the outcome of this switch.
8281 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8282 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8283 return requestResimplify();
8284
8285 Value *Cond = SI->getCondition();
8286 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8287 if (simplifySwitchOnSelect(SI, Select))
8288 return requestResimplify();
8289
8290 // If the block only contains the switch, see if we can fold the block
8291 // away into any preds.
8292 if (SI == &*BB->begin())
8293 if (foldValueComparisonIntoPredecessors(SI, Builder))
8294 return requestResimplify();
8295 }
8296
8297 // Try to transform the switch into an icmp and a branch.
8298 // The conversion from switch to comparison may lose information on
8299 // impossible switch values, so disable it early in the pipeline.
8300 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8301 return requestResimplify();
8302
8303 // Remove unreachable cases.
8304 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8305 return requestResimplify();
8306
8307 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8308 return requestResimplify();
8309
8310 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8311 return requestResimplify();
8312
8313 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8314 return requestResimplify();
8315
8316 // The conversion of switches to arithmetic or lookup table is disabled in
8317 // the early optimization pipeline, as it may lose information or make the
8318 // resulting code harder to analyze.
8319 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8320 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8321 Options.ConvertSwitchToLookupTable))
8322 return requestResimplify();
8323
8324 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8325 return requestResimplify();
8326
8327 if (reduceSwitchRange(SI, Builder, DL, TTI))
8328 return requestResimplify();
8329
8330 if (HoistCommon &&
8331 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8332 return requestResimplify();
8333
8334 // We can merge identical switch arms early to enhance more aggressive
8335 // optimization on switch.
8336 if (simplifyDuplicateSwitchArms(SI, DTU))
8337 return requestResimplify();
8338
8339 if (simplifySwitchWhenUMin(SI, DTU))
8340 return requestResimplify();
8341
8342 return false;
8343}
8344
8345bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8346 BasicBlock *BB = IBI->getParent();
8347 bool Changed = false;
8348 SmallVector<uint32_t> BranchWeights;
8349 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8350 extractBranchWeights(*IBI, BranchWeights);
8351
8352 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8353 if (HasBranchWeights)
8354 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8355 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8356
8357 // Eliminate redundant destinations.
8358 SmallPtrSet<Value *, 8> Succs;
8359 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8360 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8361 BasicBlock *Dest = IBI->getDestination(I);
8362 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8363 if (!Dest->hasAddressTaken())
8364 RemovedSuccs.insert(Dest);
8365 Dest->removePredecessor(BB);
8366 IBI->removeDestination(I);
8367 --I;
8368 --E;
8369 Changed = true;
8370 }
8371 }
8372
8373 if (DTU) {
8374 std::vector<DominatorTree::UpdateType> Updates;
8375 Updates.reserve(RemovedSuccs.size());
8376 for (auto *RemovedSucc : RemovedSuccs)
8377 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8378 DTU->applyUpdates(Updates);
8379 }
8380
8381 if (IBI->getNumDestinations() == 0) {
8382 // If the indirectbr has no successors, change it to unreachable.
8383 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8385 return true;
8386 }
8387
8388 if (IBI->getNumDestinations() == 1) {
8389 // If the indirectbr has one successor, change it to a direct branch.
8392 return true;
8393 }
8394 if (HasBranchWeights) {
8395 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8396 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8397 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8398 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8399 }
8400 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8401 if (simplifyIndirectBrOnSelect(IBI, SI))
8402 return requestResimplify();
8403 }
8404 return Changed;
8405}
8406
8407/// Given an block with only a single landing pad and a unconditional branch
8408/// try to find another basic block which this one can be merged with. This
8409/// handles cases where we have multiple invokes with unique landing pads, but
8410/// a shared handler.
8411///
8412/// We specifically choose to not worry about merging non-empty blocks
8413/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8414/// practice, the optimizer produces empty landing pad blocks quite frequently
8415/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8416/// sinking in this file)
8417///
8418/// This is primarily a code size optimization. We need to avoid performing
8419/// any transform which might inhibit optimization (such as our ability to
8420/// specialize a particular handler via tail commoning). We do this by not
8421/// merging any blocks which require us to introduce a phi. Since the same
8422/// values are flowing through both blocks, we don't lose any ability to
8423/// specialize. If anything, we make such specialization more likely.
8424///
8425/// TODO - This transformation could remove entries from a phi in the target
8426/// block when the inputs in the phi are the same for the two blocks being
8427/// merged. In some cases, this could result in removal of the PHI entirely.
8429 BasicBlock *BB, DomTreeUpdater *DTU) {
8430 auto Succ = BB->getUniqueSuccessor();
8431 assert(Succ);
8432 // If there's a phi in the successor block, we'd likely have to introduce
8433 // a phi into the merged landing pad block.
8434 if (isa<PHINode>(*Succ->begin()))
8435 return false;
8436
8437 for (BasicBlock *OtherPred : predecessors(Succ)) {
8438 if (BB == OtherPred)
8439 continue;
8440 BasicBlock::iterator I = OtherPred->begin();
8442 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8443 continue;
8444 ++I;
8446 if (!BI2 || !BI2->isIdenticalTo(BI))
8447 continue;
8448
8449 std::vector<DominatorTree::UpdateType> Updates;
8450
8451 // We've found an identical block. Update our predecessors to take that
8452 // path instead and make ourselves dead.
8454 for (BasicBlock *Pred : UniquePreds) {
8455 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8456 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8457 "unexpected successor");
8458 II->setUnwindDest(OtherPred);
8459 if (DTU) {
8460 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8461 Updates.push_back({DominatorTree::Delete, Pred, BB});
8462 }
8463 }
8464
8466 for (BasicBlock *Succ : UniqueSuccs) {
8467 Succ->removePredecessor(BB);
8468 if (DTU)
8469 Updates.push_back({DominatorTree::Delete, BB, Succ});
8470 }
8471
8472 IRBuilder<> Builder(BI);
8473 Builder.CreateUnreachable();
8474 BI->eraseFromParent();
8475 if (DTU)
8476 DTU->applyUpdates(Updates);
8477 return true;
8478 }
8479 return false;
8480}
8481
8482bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8483 IRBuilder<> &Builder) {
8484 BasicBlock *BB = BI->getParent();
8485 BasicBlock *Succ = BI->getSuccessor(0);
8486
8487 // If the Terminator is the only non-phi instruction, simplify the block.
8488 // If LoopHeader is provided, check if the block or its successor is a loop
8489 // header. (This is for early invocations before loop simplify and
8490 // vectorization to keep canonical loop forms for nested loops. These blocks
8491 // can be eliminated when the pass is invoked later in the back-end.)
8492 // Note that if BB has only one predecessor then we do not introduce new
8493 // backedge, so we can eliminate BB.
8494 bool NeedCanonicalLoop =
8495 Options.NeedCanonicalLoop &&
8496 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8497 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8499 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8500 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8501 return true;
8502
8503 // If the only instruction in the block is a seteq/setne comparison against a
8504 // constant, try to simplify the block.
8505 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8506 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8507 ++I;
8508 if (I->isTerminator() &&
8509 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8510 return true;
8511 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8512 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8513 Builder))
8514 return true;
8515 }
8516 }
8517
8518 // See if we can merge an empty landing pad block with another which is
8519 // equivalent.
8520 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8521 ++I;
8522 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8523 return true;
8524 }
8525
8526 return false;
8527}
8528
8530 BasicBlock *PredPred = nullptr;
8531 for (auto *P : predecessors(BB)) {
8532 BasicBlock *PPred = P->getSinglePredecessor();
8533 if (!PPred || (PredPred && PredPred != PPred))
8534 return nullptr;
8535 PredPred = PPred;
8536 }
8537 return PredPred;
8538}
8539
8540/// Fold the following pattern:
8541/// bb0:
8542/// br i1 %cond1, label %bb1, label %bb2
8543/// bb1:
8544/// br i1 %cond2, label %bb3, label %bb4
8545/// bb2:
8546/// br i1 %cond2, label %bb4, label %bb3
8547/// bb3:
8548/// ...
8549/// bb4:
8550/// ...
8551/// into
8552/// bb0:
8553/// %cond = xor i1 %cond1, %cond2
8554/// br i1 %cond, label %bb4, label %bb3
8555/// bb3:
8556/// ...
8557/// bb4:
8558/// ...
8559/// NOTE: %cond2 always dominates the terminator of bb0.
8561 BasicBlock *BB = BI->getParent();
8562 BasicBlock *BB1 = BI->getSuccessor(0);
8563 BasicBlock *BB2 = BI->getSuccessor(1);
8564 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8565 if (Succ == BB)
8566 return false;
8567 if (&Succ->front() != Succ->getTerminator())
8568 return false;
8569 SuccBI = dyn_cast<CondBrInst>(Succ->getTerminator());
8570 if (!SuccBI)
8571 return false;
8572 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8573 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8574 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8575 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8576 };
8577 CondBrInst *BB1BI, *BB2BI;
8578 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8579 return false;
8580
8581 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8582 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8583 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8584 return false;
8585
8586 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8587 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8588 IRBuilder<> Builder(BI);
8589 BI->setCondition(
8590 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8591 BB1->removePredecessor(BB);
8592 BI->setSuccessor(0, BB4);
8593 BB2->removePredecessor(BB);
8594 BI->setSuccessor(1, BB3);
8595 if (DTU) {
8597 Updates.push_back({DominatorTree::Delete, BB, BB1});
8598 Updates.push_back({DominatorTree::Insert, BB, BB4});
8599 Updates.push_back({DominatorTree::Delete, BB, BB2});
8600 Updates.push_back({DominatorTree::Insert, BB, BB3});
8601
8602 DTU->applyUpdates(Updates);
8603 }
8604 bool HasWeight = false;
8605 uint64_t BBTWeight, BBFWeight;
8606 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8607 HasWeight = true;
8608 else
8609 BBTWeight = BBFWeight = 1;
8610 uint64_t BB1TWeight, BB1FWeight;
8611 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8612 HasWeight = true;
8613 else
8614 BB1TWeight = BB1FWeight = 1;
8615 uint64_t BB2TWeight, BB2FWeight;
8616 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8617 HasWeight = true;
8618 else
8619 BB2TWeight = BB2FWeight = 1;
8620 if (HasWeight) {
8621 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8622 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8623 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8624 /*ElideAllZero=*/true);
8625 }
8626 return true;
8627}
8628
8629bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8630 assert(
8632 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8633 "Tautological conditional branch should have been eliminated already.");
8634
8635 BasicBlock *BB = BI->getParent();
8636 if (!Options.SimplifyCondBranch ||
8637 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8638 return false;
8639
8640 // Conditional branch
8641 if (isValueEqualityComparison(BI)) {
8642 // If we only have one predecessor, and if it is a branch on this value,
8643 // see if that predecessor totally determines the outcome of this
8644 // switch.
8645 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8646 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8647 return requestResimplify();
8648
8649 // This block must be empty, except for the setcond inst, if it exists.
8650 // Ignore pseudo intrinsics.
8651 for (auto &I : *BB) {
8652 if (isa<PseudoProbeInst>(I) ||
8653 &I == cast<Instruction>(BI->getCondition()))
8654 continue;
8655 if (&I == BI)
8656 if (foldValueComparisonIntoPredecessors(BI, Builder))
8657 return requestResimplify();
8658 break;
8659 }
8660 }
8661
8662 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8663 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8664 return true;
8665
8666 // If this basic block has dominating predecessor blocks and the dominating
8667 // blocks' conditions imply BI's condition, we know the direction of BI.
8668 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8669 if (Imp) {
8670 // Turn this into a branch on constant.
8671 auto *OldCond = BI->getCondition();
8672 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8673 : ConstantInt::getFalse(BB->getContext());
8674 BI->setCondition(TorF);
8676 return requestResimplify();
8677 }
8678
8679 // If this basic block is ONLY a compare and a branch, and if a predecessor
8680 // branches to us and one of our successors, fold the comparison into the
8681 // predecessor and use logical operations to pick the right destination.
8682 if (Options.SpeculateBlocks &&
8683 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.AC,
8684 Options.BonusInstThreshold))
8685 return requestResimplify();
8686
8687 // We have a conditional branch to two blocks that are only reachable
8688 // from BI. We know that the condbr dominates the two blocks, so see if
8689 // there is any identical code in the "then" and "else" blocks. If so, we
8690 // can hoist it up to the branching block.
8691 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8692 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8693 if (HoistCommon &&
8694 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8695 return requestResimplify();
8696
8697 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8698 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8699 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8700 auto CanSpeculateConditionalLoadsStores = [&]() {
8701 for (auto *Succ : successors(BB)) {
8702 for (Instruction &I : *Succ) {
8703 if (I.isTerminator()) {
8704 if (I.getNumSuccessors() > 1)
8705 return false;
8706 continue;
8707 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8708 SpeculatedConditionalLoadsStores.size() ==
8710 return false;
8711 }
8712 SpeculatedConditionalLoadsStores.push_back(&I);
8713 }
8714 }
8715 return !SpeculatedConditionalLoadsStores.empty();
8716 };
8717
8718 if (CanSpeculateConditionalLoadsStores()) {
8719 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8720 std::nullopt, nullptr);
8721 return requestResimplify();
8722 }
8723 }
8724 } else {
8725 // If Successor #1 has multiple preds, we may be able to conditionally
8726 // execute Successor #0 if it branches to Successor #1.
8727 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8728 if (Succ0TI->getNumSuccessors() == 1 &&
8729 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8730 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8731 return requestResimplify();
8732 }
8733 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8734 // If Successor #0 has multiple preds, we may be able to conditionally
8735 // execute Successor #1 if it branches to Successor #0.
8736 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8737 if (Succ1TI->getNumSuccessors() == 1 &&
8738 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8739 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8740 return requestResimplify();
8741 }
8742
8743 // If this is a branch on something for which we know the constant value in
8744 // predecessors (e.g. a phi node in the current block), thread control
8745 // through this block.
8746 if (foldCondBranchOnValueKnownInPredecessor(BI))
8747 return requestResimplify();
8748
8749 // Scan predecessor blocks for conditional branches.
8750 for (BasicBlock *Pred : predecessors(BB))
8751 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Pred->getTerminator()))
8752 if (PBI != BI)
8753 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8754 return requestResimplify();
8755
8756 // Look for diamond patterns.
8757 if (MergeCondStores)
8758 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8759 if (CondBrInst *PBI = dyn_cast<CondBrInst>(PrevBB->getTerminator()))
8760 if (PBI != BI)
8761 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8762 return requestResimplify();
8763
8764 // Look for nested conditional branches.
8765 if (mergeNestedCondBranch(BI, DTU))
8766 return requestResimplify();
8767
8768 return false;
8769}
8770
8771/// Check if passing a value to an instruction will cause undefined behavior.
8772static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8773 assert(V->getType() == I->getType() && "Mismatched types");
8775 if (!C)
8776 return false;
8777
8778 if (I->use_empty())
8779 return false;
8780
8781 if (C->isNullValue() || isa<UndefValue>(C)) {
8782 // Find the first same-block use with a UB-triggering opcode, skipping
8783 // cross-block or before-I uses.
8784 auto FindUse = llvm::find_if(I->uses(), [I](auto &U) {
8785 auto *Use = cast<Instruction>(U.getUser());
8786 // Only same-block uses after I can witness UB at I's program point.
8787 // Self-uses and before-I uses can occur when I is a PHI node.
8788 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8789 return false;
8790 // Change this list when we want to add new instructions.
8791 switch (Use->getOpcode()) {
8792 default:
8793 return false;
8794 case Instruction::GetElementPtr:
8795 case Instruction::Ret:
8796 case Instruction::BitCast:
8797 case Instruction::Load:
8798 case Instruction::Store:
8799 case Instruction::Call:
8800 case Instruction::CallBr:
8801 case Instruction::Invoke:
8802 case Instruction::UDiv:
8803 case Instruction::URem:
8804 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8805 // implemented to avoid code complexity as it is unclear how useful such
8806 // logic is.
8807 case Instruction::SDiv:
8808 case Instruction::SRem:
8809 return true;
8810 }
8811 });
8812 if (FindUse == I->use_end())
8813 return false;
8814 auto &Use = *FindUse;
8815 auto *User = cast<Instruction>(Use.getUser());
8816
8817 // Now make sure that there are no instructions in between that can alter
8818 // control flow (eg. calls)
8819 auto InstrRange =
8820 make_range(std::next(I->getIterator()), User->getIterator());
8821 if (any_of(InstrRange, [](Instruction &I) {
8823 }))
8824 return false;
8825
8826 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8828 if (GEP->getPointerOperand() == I) {
8829 // The type of GEP may differ from the type of base pointer.
8830 // Bail out on vector GEPs, as they are not handled by other checks.
8831 if (GEP->getType()->isVectorTy())
8832 return false;
8833 // The current base address is null, there are four cases to consider:
8834 // getelementptr (TY, null, 0) -> null
8835 // getelementptr (TY, null, not zero) -> may be modified
8836 // getelementptr inbounds (TY, null, 0) -> null
8837 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8838 // undefined?
8839 if (!GEP->hasAllZeroIndices() &&
8840 (!GEP->isInBounds() ||
8841 NullPointerIsDefined(GEP->getFunction(),
8842 GEP->getPointerAddressSpace())))
8843 PtrValueMayBeModified = true;
8844 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8845 }
8846
8847 // Look through return.
8848 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8849 bool HasNoUndefAttr =
8850 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8851 // Return undefined to a noundef return value is undefined.
8852 if (isa<UndefValue>(C) && HasNoUndefAttr)
8853 return true;
8854 // Return null to a nonnull+noundef return value is undefined.
8855 if (C->isNullValue() && HasNoUndefAttr &&
8856 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8857 return !PtrValueMayBeModified;
8858 }
8859 }
8860
8861 // Load from null is undefined.
8862 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8863 if (!LI->isVolatile())
8864 return !NullPointerIsDefined(LI->getFunction(),
8865 LI->getPointerAddressSpace());
8866
8867 // Store to null is undefined.
8869 if (!SI->isVolatile())
8870 return (!NullPointerIsDefined(SI->getFunction(),
8871 SI->getPointerAddressSpace())) &&
8872 SI->getPointerOperand() == I;
8873
8874 // llvm.assume(false/undef) always triggers immediate UB.
8875 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8876 // Ignore assume operand bundles.
8877 if (I == Assume->getArgOperand(0))
8878 return true;
8879 }
8880
8881 if (auto *CB = dyn_cast<CallBase>(User)) {
8882 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8883 return false;
8884 // A call to null is undefined.
8885 if (CB->getCalledOperand() == I)
8886 return true;
8887
8888 if (CB->isArgOperand(&Use)) {
8889 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8890 // Passing null to a nonnnull+noundef argument is undefined.
8892 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8893 return !PtrValueMayBeModified;
8894 // Passing undef to a noundef argument is undefined.
8895 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8896 return true;
8897 }
8898 }
8899 // Div/Rem by zero is immediate UB
8900 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8901 return true;
8902 }
8903 return false;
8904}
8905
8906/// If BB has an incoming value that will always trigger undefined behavior
8907/// (eg. null pointer dereference), remove the branch leading here.
8909 DomTreeUpdater *DTU,
8910 AssumptionCache *AC) {
8911 for (PHINode &PHI : BB->phis())
8912 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8913 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8914 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8915 Instruction *T = Predecessor->getTerminator();
8916 IRBuilder<> Builder(T);
8917 if (isa<UncondBrInst>(T)) {
8918 BB->removePredecessor(Predecessor);
8919 // Turn unconditional branches into unreachables.
8920 Builder.CreateUnreachable();
8921 T->eraseFromParent();
8922 if (DTU)
8923 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8924 return true;
8925 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(T)) {
8926 BB->removePredecessor(Predecessor);
8927 // Preserve guarding condition in assume, because it might not be
8928 // inferrable from any dominating condition.
8929 Value *Cond = BI->getCondition();
8930 CallInst *Assumption;
8931 if (BI->getSuccessor(0) == BB)
8932 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8933 else
8934 Assumption = Builder.CreateAssumption(Cond);
8935 if (AC)
8936 AC->registerAssumption(cast<AssumeInst>(Assumption));
8937 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8938 : BI->getSuccessor(0));
8939 BI->eraseFromParent();
8940 if (DTU)
8941 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8942 return true;
8943 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8944 // Redirect all branches leading to UB into
8945 // a newly created unreachable block.
8946 BasicBlock *Unreachable = BasicBlock::Create(
8947 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8948 Builder.SetInsertPoint(Unreachable);
8949 // The new block contains only one instruction: Unreachable
8950 Builder.CreateUnreachable();
8951 for (const auto &Case : SI->cases())
8952 if (Case.getCaseSuccessor() == BB) {
8953 BB->removePredecessor(Predecessor);
8954 Case.setSuccessor(Unreachable);
8955 }
8956 if (SI->getDefaultDest() == BB) {
8957 BB->removePredecessor(Predecessor);
8958 SI->setDefaultDest(Unreachable);
8959 }
8960
8961 if (DTU)
8962 DTU->applyUpdates(
8963 { { DominatorTree::Insert, Predecessor, Unreachable },
8964 { DominatorTree::Delete, Predecessor, BB } });
8965 return true;
8966 }
8967 }
8968
8969 return false;
8970}
8971
8972bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8973 bool Changed = false;
8974
8975 assert(BB && BB->getParent() && "Block not embedded in function!");
8976 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8977
8978 // Remove basic blocks that have no predecessors (except the entry block)...
8979 // or that just have themself as a predecessor. These are unreachable.
8980 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8981 BB->getSinglePredecessor() == BB) {
8982 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8983 DeleteDeadBlock(BB, DTU);
8984 return true;
8985 }
8986
8987 // Check to see if we can constant propagate this terminator instruction
8988 // away...
8989 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8990 /*TLI=*/nullptr, DTU);
8991
8992 // Check for and eliminate duplicate PHI nodes in this block.
8994
8995 // Check for and remove branches that will always cause undefined behavior.
8997 return requestResimplify();
8998
8999 // Merge basic blocks into their predecessor if there is only one distinct
9000 // pred, and if there is only one distinct successor of the predecessor, and
9001 // if there are no PHI nodes.
9002 if (MergeBlockIntoPredecessor(BB, DTU))
9003 return true;
9004
9005 if (SinkCommon && Options.SinkCommonInsts) {
9006 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
9007 mergeCompatibleInvokes(BB, DTU)) {
9008 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
9009 // so we may now how duplicate PHI's.
9010 // Let's rerun EliminateDuplicatePHINodes() first,
9011 // before foldTwoEntryPHINode() potentially converts them into select's,
9012 // after which we'd need a whole EarlyCSE pass run to cleanup them.
9013 return true;
9014 }
9015 // Merge identical predecessors of this block.
9016 if (simplifyDuplicatePredecessors(BB, DTU))
9017 return true;
9018 }
9019
9020 if (Options.SpeculateBlocks &&
9021 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
9022 // If there is a trivial two-entry PHI node in this basic block, and we can
9023 // eliminate it, do so now.
9024 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
9025 if (PN->getNumIncomingValues() == 2)
9026 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
9027 Options.SpeculateUnpredictables))
9028 return true;
9029 }
9030
9031 IRBuilder<> Builder(BB);
9033 Builder.SetInsertPoint(Terminator);
9034 switch (Terminator->getOpcode()) {
9035 case Instruction::UncondBr:
9036 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
9037 break;
9038 case Instruction::CondBr:
9039 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
9040 break;
9041 case Instruction::Resume:
9042 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
9043 break;
9044 case Instruction::CleanupRet:
9045 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
9046 break;
9047 case Instruction::Switch:
9048 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
9049 break;
9050 case Instruction::Unreachable:
9051 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
9052 break;
9053 case Instruction::IndirectBr:
9054 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
9055 break;
9056 }
9057
9058 return Changed;
9059}
9060
9061bool SimplifyCFGOpt::run(BasicBlock *BB) {
9062 bool Changed = false;
9063
9064 // Repeated simplify BB as long as resimplification is requested.
9065 do {
9066 Resimplify = false;
9067
9068 // Perform one round of simplifcation. Resimplify flag will be set if
9069 // another iteration is requested.
9070 Changed |= simplifyOnce(BB);
9071 } while (Resimplify);
9072
9073 return Changed;
9074}
9075
9078 ArrayRef<WeakVH> LoopHeaders) {
9079 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9080 Options)
9081 .run(BB);
9082}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool isProfitableToSpeculate(const CondBrInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static void hoistConditionalLoadsStores(CondBrInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool mergeIdenticalBBs(ArrayRef< BasicBlock * > Candidates, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1995
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:1119
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:945
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1310
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:710
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
Definition Constants.cpp:89
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:242
static DebugLoc getTemporary()
Definition DebugLoc.h:150
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:169
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:156
static DebugLoc getDropped()
Definition DebugLoc.h:153
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:270
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:178
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2432
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2180
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1268
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2735
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1584
LLVM_ABI CallInst * CreateAssumption(Value *Cond)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2060
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1262
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1906
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1291
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2416
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1958
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2162
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1971
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1474
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2274
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2148
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2357
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2526
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1644
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1508
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2900
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
size_type size() const
Definition MapVector.h:58
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:306
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
void setSuccessor(BasicBlock *NewSucc)
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1625
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
LLVM_ABI void set(Value *Val)
Definition Value.h:874
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:54
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_bind< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
constexpr double e
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2179
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool foldBranchToCommonDest(CondBrInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, AssumptionCache *AC=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
bool succ_empty(const Instruction *I)
Definition CFG.h:141
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
@ Dead
Unused definition.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1701
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1790
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI CondBrInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
LLVM_ABI void InvertBranch(CondBrInst *PBI, IRBuilderBase &Builder)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
LLVM_ABI bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3389
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3896
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1716
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1595
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Equivalent to isDereferenceableAndAlignedPointer with an alignment of 1.
Definition Loads.cpp:264
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
@ Keep
No function return thunk.
Definition CodeGen.h:162
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two BBs are equal depends on the contents of the BasicBlock and the incoming values ...
SmallDenseMap< BasicBlock *, Value *, 8 > BB2ValueMap
Phi2IVsMap * PhiPredIVs
DenseMap< PHINode *, BB2ValueMap > Phi2IVsMap
static bool canBeMerged(const BasicBlock *BB)
BasicBlock * BB
static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS)
static unsigned getHashValue(const EqualBBWrapper *EBW)
An information struct used to provide DenseMap with the various necessary components for a given valu...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342