LLVM 19.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
54#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Module.h"
56#include "llvm/IR/NoFolder.h"
57#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
68#include "llvm/Support/Debug.h"
76#include <algorithm>
77#include <cassert>
78#include <climits>
79#include <cstddef>
80#include <cstdint>
81#include <iterator>
82#include <map>
83#include <optional>
84#include <set>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90using namespace PatternMatch;
91
92#define DEBUG_TYPE "simplifycfg"
93
95 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
96
97 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
98 "into preserving DomTree,"));
99
100// Chosen as 2 so as to be cheap, but still to have enough power to fold
101// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
102// To catch this, we need to fold a compare and a select, hence '2' being the
103// minimum reasonable default.
105 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
106 cl::desc(
107 "Control the amount of phi node folding to perform (default = 2)"));
108
110 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
111 cl::desc("Control the maximal total instruction cost that we are willing "
112 "to speculatively execute to fold a 2-entry PHI node into a "
113 "select (default = 4)"));
114
115static cl::opt<bool>
116 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
117 cl::desc("Hoist common instructions up to the parent block"));
118
120 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
121 cl::init(20),
122 cl::desc("Allow reordering across at most this many "
123 "instructions when hoisting"));
124
125static cl::opt<bool>
126 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
127 cl::desc("Sink common instructions down to the end block"));
128
130 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
131 cl::desc("Hoist conditional stores if an unconditional store precedes"));
132
134 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
135 cl::desc("Hoist conditional stores even if an unconditional store does not "
136 "precede - hoist multiple conditional stores into a single "
137 "predicated store"));
138
140 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
141 cl::desc("When merging conditional stores, do so even if the resultant "
142 "basic blocks are unlikely to be if-converted as a result"));
143
145 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
146 cl::desc("Allow exactly one expensive instruction to be speculatively "
147 "executed"));
148
150 "max-speculation-depth", cl::Hidden, cl::init(10),
151 cl::desc("Limit maximum recursion depth when calculating costs of "
152 "speculatively executed instructions"));
153
154static cl::opt<int>
155 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
156 cl::init(10),
157 cl::desc("Max size of a block which is still considered "
158 "small enough to thread through"));
159
160// Two is chosen to allow one negation and a logical combine.
162 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
163 cl::init(2),
164 cl::desc("Maximum cost of combining conditions when "
165 "folding branches"));
166
168 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
169 cl::init(2),
170 cl::desc("Multiplier to apply to threshold when determining whether or not "
171 "to fold branch to common destination when vector operations are "
172 "present"));
173
175 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
176 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
177
179 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
180 cl::desc("Limit cases to analyze when converting a switch to select"));
181
182STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
183STATISTIC(NumLinearMaps,
184 "Number of switch instructions turned into linear mapping");
185STATISTIC(NumLookupTables,
186 "Number of switch instructions turned into lookup tables");
188 NumLookupTablesHoles,
189 "Number of switch instructions turned into lookup tables (holes checked)");
190STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
191STATISTIC(NumFoldValueComparisonIntoPredecessors,
192 "Number of value comparisons folded into predecessor basic blocks");
193STATISTIC(NumFoldBranchToCommonDest,
194 "Number of branches folded into predecessor basic block");
196 NumHoistCommonCode,
197 "Number of common instruction 'blocks' hoisted up to the begin block");
198STATISTIC(NumHoistCommonInstrs,
199 "Number of common instructions hoisted up to the begin block");
200STATISTIC(NumSinkCommonCode,
201 "Number of common instruction 'blocks' sunk down to the end block");
202STATISTIC(NumSinkCommonInstrs,
203 "Number of common instructions sunk down to the end block");
204STATISTIC(NumSpeculations, "Number of speculative executed instructions");
205STATISTIC(NumInvokes,
206 "Number of invokes with empty resume blocks simplified into calls");
207STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
208STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
209
210namespace {
211
212// The first field contains the value that the switch produces when a certain
213// case group is selected, and the second field is a vector containing the
214// cases composing the case group.
215using SwitchCaseResultVectorTy =
217
218// The first field contains the phi node that generates a result of the switch
219// and the second field contains the value generated for a certain case in the
220// switch for that PHI.
221using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
222
223/// ValueEqualityComparisonCase - Represents a case of a switch.
224struct ValueEqualityComparisonCase {
226 BasicBlock *Dest;
227
228 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
229 : Value(Value), Dest(Dest) {}
230
231 bool operator<(ValueEqualityComparisonCase RHS) const {
232 // Comparing pointers is ok as we only rely on the order for uniquing.
233 return Value < RHS.Value;
234 }
235
236 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
237};
238
239class SimplifyCFGOpt {
241 DomTreeUpdater *DTU;
242 const DataLayout &DL;
243 ArrayRef<WeakVH> LoopHeaders;
245 bool Resimplify;
246
247 Value *isValueEqualityComparison(Instruction *TI);
248 BasicBlock *GetValueEqualityComparisonCases(
249 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
250 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
251 BasicBlock *Pred,
252 IRBuilder<> &Builder);
253 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
254 Instruction *PTI,
255 IRBuilder<> &Builder);
256 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
257 IRBuilder<> &Builder);
258
259 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
260 bool simplifySingleResume(ResumeInst *RI);
261 bool simplifyCommonResume(ResumeInst *RI);
262 bool simplifyCleanupReturn(CleanupReturnInst *RI);
263 bool simplifyUnreachable(UnreachableInst *UI);
264 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
265 bool simplifyIndirectBr(IndirectBrInst *IBI);
266 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
267 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
268 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
269
270 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
271 IRBuilder<> &Builder);
272
273 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
274 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
275 Instruction *TI, Instruction *I1,
276 SmallVectorImpl<Instruction *> &OtherSuccTIs);
277 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
278 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
279 BasicBlock *TrueBB, BasicBlock *FalseBB,
280 uint32_t TrueWeight, uint32_t FalseWeight);
281 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
282 const DataLayout &DL);
283 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
284 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
285 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
286
287public:
288 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
289 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
290 const SimplifyCFGOptions &Opts)
291 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
292 assert((!DTU || !DTU->hasPostDomTree()) &&
293 "SimplifyCFG is not yet capable of maintaining validity of a "
294 "PostDomTree, so don't ask for it.");
295 }
296
297 bool simplifyOnce(BasicBlock *BB);
298 bool run(BasicBlock *BB);
299
300 // Helper to set Resimplify and return change indication.
301 bool requestResimplify() {
302 Resimplify = true;
303 return true;
304 }
305};
306
307} // end anonymous namespace
308
309/// Return true if all the PHI nodes in the basic block \p BB
310/// receive compatible (identical) incoming values when coming from
311/// all of the predecessor blocks that are specified in \p IncomingBlocks.
312///
313/// Note that if the values aren't exactly identical, but \p EquivalenceSet
314/// is provided, and *both* of the values are present in the set,
315/// then they are considered equal.
317 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
318 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
319 assert(IncomingBlocks.size() == 2 &&
320 "Only for a pair of incoming blocks at the time!");
321
322 // FIXME: it is okay if one of the incoming values is an `undef` value,
323 // iff the other incoming value is guaranteed to be a non-poison value.
324 // FIXME: it is okay if one of the incoming values is a `poison` value.
325 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
326 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
327 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
328 if (IV0 == IV1)
329 return true;
330 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
331 EquivalenceSet->contains(IV1))
332 return true;
333 return false;
334 });
335}
336
337/// Return true if it is safe to merge these two
338/// terminator instructions together.
339static bool
341 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
342 if (SI1 == SI2)
343 return false; // Can't merge with self!
344
345 // It is not safe to merge these two switch instructions if they have a common
346 // successor, and if that successor has a PHI node, and if *that* PHI node has
347 // conflicting incoming values from the two switch blocks.
348 BasicBlock *SI1BB = SI1->getParent();
349 BasicBlock *SI2BB = SI2->getParent();
350
351 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
352 bool Fail = false;
353 for (BasicBlock *Succ : successors(SI2BB)) {
354 if (!SI1Succs.count(Succ))
355 continue;
356 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
357 continue;
358 Fail = true;
359 if (FailBlocks)
360 FailBlocks->insert(Succ);
361 else
362 break;
363 }
364
365 return !Fail;
366}
367
368/// Update PHI nodes in Succ to indicate that there will now be entries in it
369/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
370/// will be the same as those coming in from ExistPred, an existing predecessor
371/// of Succ.
372static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
373 BasicBlock *ExistPred,
374 MemorySSAUpdater *MSSAU = nullptr) {
375 for (PHINode &PN : Succ->phis())
376 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
377 if (MSSAU)
378 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
379 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
380}
381
382/// Compute an abstract "cost" of speculating the given instruction,
383/// which is assumed to be safe to speculate. TCC_Free means cheap,
384/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
385/// expensive.
387 const TargetTransformInfo &TTI) {
388 assert((!isa<Instruction>(I) ||
389 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
390 "Instruction is not safe to speculatively execute!");
392}
393
394/// If we have a merge point of an "if condition" as accepted above,
395/// return true if the specified value dominates the block. We
396/// don't handle the true generality of domination here, just a special case
397/// which works well enough for us.
398///
399/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
400/// see if V (which must be an instruction) and its recursive operands
401/// that do not dominate BB have a combined cost lower than Budget and
402/// are non-trapping. If both are true, the instruction is inserted into the
403/// set and true is returned.
404///
405/// The cost for most non-trapping instructions is defined as 1 except for
406/// Select whose cost is 2.
407///
408/// After this function returns, Cost is increased by the cost of
409/// V plus its non-dominating operands. If that cost is greater than
410/// Budget, false is returned and Cost is undefined.
412 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
414 InstructionCost Budget,
416 unsigned Depth = 0) {
417 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
418 // so limit the recursion depth.
419 // TODO: While this recursion limit does prevent pathological behavior, it
420 // would be better to track visited instructions to avoid cycles.
422 return false;
423
424 Instruction *I = dyn_cast<Instruction>(V);
425 if (!I) {
426 // Non-instructions dominate all instructions and can be executed
427 // unconditionally.
428 return true;
429 }
430 BasicBlock *PBB = I->getParent();
431
432 // We don't want to allow weird loops that might have the "if condition" in
433 // the bottom of this block.
434 if (PBB == BB)
435 return false;
436
437 // If this instruction is defined in a block that contains an unconditional
438 // branch to BB, then it must be in the 'conditional' part of the "if
439 // statement". If not, it definitely dominates the region.
440 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
441 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
442 return true;
443
444 // If we have seen this instruction before, don't count it again.
445 if (AggressiveInsts.count(I))
446 return true;
447
448 // Okay, it looks like the instruction IS in the "condition". Check to
449 // see if it's a cheap instruction to unconditionally compute, and if it
450 // only uses stuff defined outside of the condition. If so, hoist it out.
452 return false;
453
455
456 // Allow exactly one instruction to be speculated regardless of its cost
457 // (as long as it is safe to do so).
458 // This is intended to flatten the CFG even if the instruction is a division
459 // or other expensive operation. The speculation of an expensive instruction
460 // is expected to be undone in CodeGenPrepare if the speculation has not
461 // enabled further IR optimizations.
462 if (Cost > Budget &&
463 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
464 !Cost.isValid()))
465 return false;
466
467 // Okay, we can only really hoist these out if their operands do
468 // not take us over the cost threshold.
469 for (Use &Op : I->operands())
470 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
471 Depth + 1))
472 return false;
473 // Okay, it's safe to do this! Remember this instruction.
474 AggressiveInsts.insert(I);
475 return true;
476}
477
478/// Extract ConstantInt from value, looking through IntToPtr
479/// and PointerNullValue. Return NULL if value is not a constant int.
481 // Normal constant int.
482 ConstantInt *CI = dyn_cast<ConstantInt>(V);
483 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
484 DL.isNonIntegralPointerType(V->getType()))
485 return CI;
486
487 // This is some kind of pointer constant. Turn it into a pointer-sized
488 // ConstantInt if possible.
489 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
490
491 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
492 if (isa<ConstantPointerNull>(V))
493 return ConstantInt::get(PtrTy, 0);
494
495 // IntToPtr const int.
496 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
497 if (CE->getOpcode() == Instruction::IntToPtr)
498 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
499 // The constant is very likely to have the right type already.
500 if (CI->getType() == PtrTy)
501 return CI;
502 else
503 return cast<ConstantInt>(
504 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
505 }
506 return nullptr;
507}
508
509namespace {
510
511/// Given a chain of or (||) or and (&&) comparison of a value against a
512/// constant, this will try to recover the information required for a switch
513/// structure.
514/// It will depth-first traverse the chain of comparison, seeking for patterns
515/// like %a == 12 or %a < 4 and combine them to produce a set of integer
516/// representing the different cases for the switch.
517/// Note that if the chain is composed of '||' it will build the set of elements
518/// that matches the comparisons (i.e. any of this value validate the chain)
519/// while for a chain of '&&' it will build the set elements that make the test
520/// fail.
521struct ConstantComparesGatherer {
522 const DataLayout &DL;
523
524 /// Value found for the switch comparison
525 Value *CompValue = nullptr;
526
527 /// Extra clause to be checked before the switch
528 Value *Extra = nullptr;
529
530 /// Set of integers to match in switch
532
533 /// Number of comparisons matched in the and/or chain
534 unsigned UsedICmps = 0;
535
536 /// Construct and compute the result for the comparison instruction Cond
537 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
538 gather(Cond);
539 }
540
541 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
542 ConstantComparesGatherer &
543 operator=(const ConstantComparesGatherer &) = delete;
544
545private:
546 /// Try to set the current value used for the comparison, it succeeds only if
547 /// it wasn't set before or if the new value is the same as the old one
548 bool setValueOnce(Value *NewVal) {
549 if (CompValue && CompValue != NewVal)
550 return false;
551 CompValue = NewVal;
552 return (CompValue != nullptr);
553 }
554
555 /// Try to match Instruction "I" as a comparison against a constant and
556 /// populates the array Vals with the set of values that match (or do not
557 /// match depending on isEQ).
558 /// Return false on failure. On success, the Value the comparison matched
559 /// against is placed in CompValue.
560 /// If CompValue is already set, the function is expected to fail if a match
561 /// is found but the value compared to is different.
562 bool matchInstruction(Instruction *I, bool isEQ) {
563 // If this is an icmp against a constant, handle this as one of the cases.
564 ICmpInst *ICI;
565 ConstantInt *C;
566 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
567 (C = GetConstantInt(I->getOperand(1), DL)))) {
568 return false;
569 }
570
571 Value *RHSVal;
572 const APInt *RHSC;
573
574 // Pattern match a special case
575 // (x & ~2^z) == y --> x == y || x == y|2^z
576 // This undoes a transformation done by instcombine to fuse 2 compares.
577 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
578 // It's a little bit hard to see why the following transformations are
579 // correct. Here is a CVC3 program to verify them for 64-bit values:
580
581 /*
582 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
583 x : BITVECTOR(64);
584 y : BITVECTOR(64);
585 z : BITVECTOR(64);
586 mask : BITVECTOR(64) = BVSHL(ONE, z);
587 QUERY( (y & ~mask = y) =>
588 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
589 );
590 QUERY( (y | mask = y) =>
591 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
592 );
593 */
594
595 // Please note that each pattern must be a dual implication (<--> or
596 // iff). One directional implication can create spurious matches. If the
597 // implication is only one-way, an unsatisfiable condition on the left
598 // side can imply a satisfiable condition on the right side. Dual
599 // implication ensures that satisfiable conditions are transformed to
600 // other satisfiable conditions and unsatisfiable conditions are
601 // transformed to other unsatisfiable conditions.
602
603 // Here is a concrete example of a unsatisfiable condition on the left
604 // implying a satisfiable condition on the right:
605 //
606 // mask = (1 << z)
607 // (x & ~mask) == y --> (x == y || x == (y | mask))
608 //
609 // Substituting y = 3, z = 0 yields:
610 // (x & -2) == 3 --> (x == 3 || x == 2)
611
612 // Pattern match a special case:
613 /*
614 QUERY( (y & ~mask = y) =>
615 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
616 );
617 */
618 if (match(ICI->getOperand(0),
619 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
620 APInt Mask = ~*RHSC;
621 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
622 // If we already have a value for the switch, it has to match!
623 if (!setValueOnce(RHSVal))
624 return false;
625
626 Vals.push_back(C);
627 Vals.push_back(
628 ConstantInt::get(C->getContext(),
629 C->getValue() | Mask));
630 UsedICmps++;
631 return true;
632 }
633 }
634
635 // Pattern match a special case:
636 /*
637 QUERY( (y | mask = y) =>
638 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
639 );
640 */
641 if (match(ICI->getOperand(0),
642 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
643 APInt Mask = *RHSC;
644 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
645 // If we already have a value for the switch, it has to match!
646 if (!setValueOnce(RHSVal))
647 return false;
648
649 Vals.push_back(C);
650 Vals.push_back(ConstantInt::get(C->getContext(),
651 C->getValue() & ~Mask));
652 UsedICmps++;
653 return true;
654 }
655 }
656
657 // If we already have a value for the switch, it has to match!
658 if (!setValueOnce(ICI->getOperand(0)))
659 return false;
660
661 UsedICmps++;
662 Vals.push_back(C);
663 return ICI->getOperand(0);
664 }
665
666 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
667 ConstantRange Span =
669
670 // Shift the range if the compare is fed by an add. This is the range
671 // compare idiom as emitted by instcombine.
672 Value *CandidateVal = I->getOperand(0);
673 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
674 Span = Span.subtract(*RHSC);
675 CandidateVal = RHSVal;
676 }
677
678 // If this is an and/!= check, then we are looking to build the set of
679 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
680 // x != 0 && x != 1.
681 if (!isEQ)
682 Span = Span.inverse();
683
684 // If there are a ton of values, we don't want to make a ginormous switch.
685 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
686 return false;
687 }
688
689 // If we already have a value for the switch, it has to match!
690 if (!setValueOnce(CandidateVal))
691 return false;
692
693 // Add all values from the range to the set
694 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
695 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
696
697 UsedICmps++;
698 return true;
699 }
700
701 /// Given a potentially 'or'd or 'and'd together collection of icmp
702 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
703 /// the value being compared, and stick the list constants into the Vals
704 /// vector.
705 /// One "Extra" case is allowed to differ from the other.
706 void gather(Value *V) {
707 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
708
709 // Keep a stack (SmallVector for efficiency) for depth-first traversal
712
713 // Initialize
714 Visited.insert(V);
715 DFT.push_back(V);
716
717 while (!DFT.empty()) {
718 V = DFT.pop_back_val();
719
720 if (Instruction *I = dyn_cast<Instruction>(V)) {
721 // If it is a || (or && depending on isEQ), process the operands.
722 Value *Op0, *Op1;
723 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
724 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
725 if (Visited.insert(Op1).second)
726 DFT.push_back(Op1);
727 if (Visited.insert(Op0).second)
728 DFT.push_back(Op0);
729
730 continue;
731 }
732
733 // Try to match the current instruction
734 if (matchInstruction(I, isEQ))
735 // Match succeed, continue the loop
736 continue;
737 }
738
739 // One element of the sequence of || (or &&) could not be match as a
740 // comparison against the same value as the others.
741 // We allow only one "Extra" case to be checked before the switch
742 if (!Extra) {
743 Extra = V;
744 continue;
745 }
746 // Failed to parse a proper sequence, abort now
747 CompValue = nullptr;
748 break;
749 }
750 }
751};
752
753} // end anonymous namespace
754
756 MemorySSAUpdater *MSSAU = nullptr) {
757 Instruction *Cond = nullptr;
758 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
759 Cond = dyn_cast<Instruction>(SI->getCondition());
760 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
761 if (BI->isConditional())
762 Cond = dyn_cast<Instruction>(BI->getCondition());
763 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
764 Cond = dyn_cast<Instruction>(IBI->getAddress());
765 }
766
767 TI->eraseFromParent();
768 if (Cond)
770}
771
772/// Return true if the specified terminator checks
773/// to see if a value is equal to constant integer value.
774Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
775 Value *CV = nullptr;
776 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
777 // Do not permit merging of large switch instructions into their
778 // predecessors unless there is only one predecessor.
779 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
780 CV = SI->getCondition();
781 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
782 if (BI->isConditional() && BI->getCondition()->hasOneUse())
783 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
784 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
785 CV = ICI->getOperand(0);
786 }
787
788 // Unwrap any lossless ptrtoint cast.
789 if (CV) {
790 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
791 Value *Ptr = PTII->getPointerOperand();
792 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
793 CV = Ptr;
794 }
795 }
796 return CV;
797}
798
799/// Given a value comparison instruction,
800/// decode all of the 'cases' that it represents and return the 'default' block.
801BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
802 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
803 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
804 Cases.reserve(SI->getNumCases());
805 for (auto Case : SI->cases())
806 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
807 Case.getCaseSuccessor()));
808 return SI->getDefaultDest();
809 }
810
811 BranchInst *BI = cast<BranchInst>(TI);
812 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
813 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
814 Cases.push_back(ValueEqualityComparisonCase(
815 GetConstantInt(ICI->getOperand(1), DL), Succ));
816 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
817}
818
819/// Given a vector of bb/value pairs, remove any entries
820/// in the list that match the specified block.
821static void
823 std::vector<ValueEqualityComparisonCase> &Cases) {
824 llvm::erase(Cases, BB);
825}
826
827/// Return true if there are any keys in C1 that exist in C2 as well.
828static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
829 std::vector<ValueEqualityComparisonCase> &C2) {
830 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
831
832 // Make V1 be smaller than V2.
833 if (V1->size() > V2->size())
834 std::swap(V1, V2);
835
836 if (V1->empty())
837 return false;
838 if (V1->size() == 1) {
839 // Just scan V2.
840 ConstantInt *TheVal = (*V1)[0].Value;
841 for (const ValueEqualityComparisonCase &VECC : *V2)
842 if (TheVal == VECC.Value)
843 return true;
844 }
845
846 // Otherwise, just sort both lists and compare element by element.
847 array_pod_sort(V1->begin(), V1->end());
848 array_pod_sort(V2->begin(), V2->end());
849 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
850 while (i1 != e1 && i2 != e2) {
851 if ((*V1)[i1].Value == (*V2)[i2].Value)
852 return true;
853 if ((*V1)[i1].Value < (*V2)[i2].Value)
854 ++i1;
855 else
856 ++i2;
857 }
858 return false;
859}
860
861// Set branch weights on SwitchInst. This sets the metadata if there is at
862// least one non-zero weight.
864 // Check that there is at least one non-zero weight. Otherwise, pass
865 // nullptr to setMetadata which will erase the existing metadata.
866 MDNode *N = nullptr;
867 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
868 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
869 SI->setMetadata(LLVMContext::MD_prof, N);
870}
871
872// Similar to the above, but for branch and select instructions that take
873// exactly 2 weights.
874static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
875 uint32_t FalseWeight) {
876 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
877 // Check that there is at least one non-zero weight. Otherwise, pass
878 // nullptr to setMetadata which will erase the existing metadata.
879 MDNode *N = nullptr;
880 if (TrueWeight || FalseWeight)
881 N = MDBuilder(I->getParent()->getContext())
882 .createBranchWeights(TrueWeight, FalseWeight);
883 I->setMetadata(LLVMContext::MD_prof, N);
884}
885
886/// If TI is known to be a terminator instruction and its block is known to
887/// only have a single predecessor block, check to see if that predecessor is
888/// also a value comparison with the same value, and if that comparison
889/// determines the outcome of this comparison. If so, simplify TI. This does a
890/// very limited form of jump threading.
891bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
892 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
893 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
894 if (!PredVal)
895 return false; // Not a value comparison in predecessor.
896
897 Value *ThisVal = isValueEqualityComparison(TI);
898 assert(ThisVal && "This isn't a value comparison!!");
899 if (ThisVal != PredVal)
900 return false; // Different predicates.
901
902 // TODO: Preserve branch weight metadata, similarly to how
903 // FoldValueComparisonIntoPredecessors preserves it.
904
905 // Find out information about when control will move from Pred to TI's block.
906 std::vector<ValueEqualityComparisonCase> PredCases;
907 BasicBlock *PredDef =
908 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
909 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
910
911 // Find information about how control leaves this block.
912 std::vector<ValueEqualityComparisonCase> ThisCases;
913 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
914 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
915
916 // If TI's block is the default block from Pred's comparison, potentially
917 // simplify TI based on this knowledge.
918 if (PredDef == TI->getParent()) {
919 // If we are here, we know that the value is none of those cases listed in
920 // PredCases. If there are any cases in ThisCases that are in PredCases, we
921 // can simplify TI.
922 if (!ValuesOverlap(PredCases, ThisCases))
923 return false;
924
925 if (isa<BranchInst>(TI)) {
926 // Okay, one of the successors of this condbr is dead. Convert it to a
927 // uncond br.
928 assert(ThisCases.size() == 1 && "Branch can only have one case!");
929 // Insert the new branch.
930 Instruction *NI = Builder.CreateBr(ThisDef);
931 (void)NI;
932
933 // Remove PHI node entries for the dead edge.
934 ThisCases[0].Dest->removePredecessor(PredDef);
935
936 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
937 << "Through successor TI: " << *TI << "Leaving: " << *NI
938 << "\n");
939
941
942 if (DTU)
943 DTU->applyUpdates(
944 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
945
946 return true;
947 }
948
949 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
950 // Okay, TI has cases that are statically dead, prune them away.
952 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
953 DeadCases.insert(PredCases[i].Value);
954
955 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
956 << "Through successor TI: " << *TI);
957
958 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
959 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
960 --i;
961 auto *Successor = i->getCaseSuccessor();
962 if (DTU)
963 ++NumPerSuccessorCases[Successor];
964 if (DeadCases.count(i->getCaseValue())) {
965 Successor->removePredecessor(PredDef);
966 SI.removeCase(i);
967 if (DTU)
968 --NumPerSuccessorCases[Successor];
969 }
970 }
971
972 if (DTU) {
973 std::vector<DominatorTree::UpdateType> Updates;
974 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
975 if (I.second == 0)
976 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
977 DTU->applyUpdates(Updates);
978 }
979
980 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
981 return true;
982 }
983
984 // Otherwise, TI's block must correspond to some matched value. Find out
985 // which value (or set of values) this is.
986 ConstantInt *TIV = nullptr;
987 BasicBlock *TIBB = TI->getParent();
988 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
989 if (PredCases[i].Dest == TIBB) {
990 if (TIV)
991 return false; // Cannot handle multiple values coming to this block.
992 TIV = PredCases[i].Value;
993 }
994 assert(TIV && "No edge from pred to succ?");
995
996 // Okay, we found the one constant that our value can be if we get into TI's
997 // BB. Find out which successor will unconditionally be branched to.
998 BasicBlock *TheRealDest = nullptr;
999 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1000 if (ThisCases[i].Value == TIV) {
1001 TheRealDest = ThisCases[i].Dest;
1002 break;
1003 }
1004
1005 // If not handled by any explicit cases, it is handled by the default case.
1006 if (!TheRealDest)
1007 TheRealDest = ThisDef;
1008
1009 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1010
1011 // Remove PHI node entries for dead edges.
1012 BasicBlock *CheckEdge = TheRealDest;
1013 for (BasicBlock *Succ : successors(TIBB))
1014 if (Succ != CheckEdge) {
1015 if (Succ != TheRealDest)
1016 RemovedSuccs.insert(Succ);
1017 Succ->removePredecessor(TIBB);
1018 } else
1019 CheckEdge = nullptr;
1020
1021 // Insert the new branch.
1022 Instruction *NI = Builder.CreateBr(TheRealDest);
1023 (void)NI;
1024
1025 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1026 << "Through successor TI: " << *TI << "Leaving: " << *NI
1027 << "\n");
1028
1030 if (DTU) {
1032 Updates.reserve(RemovedSuccs.size());
1033 for (auto *RemovedSucc : RemovedSuccs)
1034 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1035 DTU->applyUpdates(Updates);
1036 }
1037 return true;
1038}
1039
1040namespace {
1041
1042/// This class implements a stable ordering of constant
1043/// integers that does not depend on their address. This is important for
1044/// applications that sort ConstantInt's to ensure uniqueness.
1045struct ConstantIntOrdering {
1046 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1047 return LHS->getValue().ult(RHS->getValue());
1048 }
1049};
1050
1051} // end anonymous namespace
1052
1054 ConstantInt *const *P2) {
1055 const ConstantInt *LHS = *P1;
1056 const ConstantInt *RHS = *P2;
1057 if (LHS == RHS)
1058 return 0;
1059 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1060}
1061
1062/// Get Weights of a given terminator, the default weight is at the front
1063/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1064/// metadata.
1066 SmallVectorImpl<uint64_t> &Weights) {
1067 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1068 assert(MD);
1069 for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1070 ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1071 Weights.push_back(CI->getValue().getZExtValue());
1072 }
1073
1074 // If TI is a conditional eq, the default case is the false case,
1075 // and the corresponding branch-weight data is at index 2. We swap the
1076 // default weight to be the first entry.
1077 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1078 assert(Weights.size() == 2);
1079 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1080 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1081 std::swap(Weights.front(), Weights.back());
1082 }
1083}
1084
1085/// Keep halving the weights until all can fit in uint32_t.
1087 uint64_t Max = *llvm::max_element(Weights);
1088 if (Max > UINT_MAX) {
1089 unsigned Offset = 32 - llvm::countl_zero(Max);
1090 for (uint64_t &I : Weights)
1091 I >>= Offset;
1092 }
1093}
1094
1096 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1097 Instruction *PTI = PredBlock->getTerminator();
1098
1099 // If we have bonus instructions, clone them into the predecessor block.
1100 // Note that there may be multiple predecessor blocks, so we cannot move
1101 // bonus instructions to a predecessor block.
1102 for (Instruction &BonusInst : *BB) {
1103 if (BonusInst.isTerminator())
1104 continue;
1105
1106 Instruction *NewBonusInst = BonusInst.clone();
1107
1108 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1109 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1110 // Unless the instruction has the same !dbg location as the original
1111 // branch, drop it. When we fold the bonus instructions we want to make
1112 // sure we reset their debug locations in order to avoid stepping on
1113 // dead code caused by folding dead branches.
1114 NewBonusInst->setDebugLoc(DebugLoc());
1115 }
1116
1117 RemapInstruction(NewBonusInst, VMap,
1119
1120 // If we speculated an instruction, we need to drop any metadata that may
1121 // result in undefined behavior, as the metadata might have been valid
1122 // only given the branch precondition.
1123 // Similarly strip attributes on call parameters that may cause UB in
1124 // location the call is moved to.
1125 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1126
1127 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1128 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1129 RemapDbgVariableRecordRange(NewBonusInst->getModule(), Range, VMap,
1132
1133 if (isa<DbgInfoIntrinsic>(BonusInst))
1134 continue;
1135
1136 NewBonusInst->takeName(&BonusInst);
1137 BonusInst.setName(NewBonusInst->getName() + ".old");
1138 VMap[&BonusInst] = NewBonusInst;
1139
1140 // Update (liveout) uses of bonus instructions,
1141 // now that the bonus instruction has been cloned into predecessor.
1142 // Note that we expect to be in a block-closed SSA form for this to work!
1143 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1144 auto *UI = cast<Instruction>(U.getUser());
1145 auto *PN = dyn_cast<PHINode>(UI);
1146 if (!PN) {
1147 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1148 "If the user is not a PHI node, then it should be in the same "
1149 "block as, and come after, the original bonus instruction.");
1150 continue; // Keep using the original bonus instruction.
1151 }
1152 // Is this the block-closed SSA form PHI node?
1153 if (PN->getIncomingBlock(U) == BB)
1154 continue; // Great, keep using the original bonus instruction.
1155 // The only other alternative is an "use" when coming from
1156 // the predecessor block - here we should refer to the cloned bonus instr.
1157 assert(PN->getIncomingBlock(U) == PredBlock &&
1158 "Not in block-closed SSA form?");
1159 U.set(NewBonusInst);
1160 }
1161 }
1162}
1163
1164bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1165 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1166 BasicBlock *BB = TI->getParent();
1167 BasicBlock *Pred = PTI->getParent();
1168
1170
1171 // Figure out which 'cases' to copy from SI to PSI.
1172 std::vector<ValueEqualityComparisonCase> BBCases;
1173 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1174
1175 std::vector<ValueEqualityComparisonCase> PredCases;
1176 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1177
1178 // Based on whether the default edge from PTI goes to BB or not, fill in
1179 // PredCases and PredDefault with the new switch cases we would like to
1180 // build.
1182
1183 // Update the branch weight metadata along the way
1185 bool PredHasWeights = hasBranchWeightMD(*PTI);
1186 bool SuccHasWeights = hasBranchWeightMD(*TI);
1187
1188 if (PredHasWeights) {
1189 GetBranchWeights(PTI, Weights);
1190 // branch-weight metadata is inconsistent here.
1191 if (Weights.size() != 1 + PredCases.size())
1192 PredHasWeights = SuccHasWeights = false;
1193 } else if (SuccHasWeights)
1194 // If there are no predecessor weights but there are successor weights,
1195 // populate Weights with 1, which will later be scaled to the sum of
1196 // successor's weights
1197 Weights.assign(1 + PredCases.size(), 1);
1198
1199 SmallVector<uint64_t, 8> SuccWeights;
1200 if (SuccHasWeights) {
1201 GetBranchWeights(TI, SuccWeights);
1202 // branch-weight metadata is inconsistent here.
1203 if (SuccWeights.size() != 1 + BBCases.size())
1204 PredHasWeights = SuccHasWeights = false;
1205 } else if (PredHasWeights)
1206 SuccWeights.assign(1 + BBCases.size(), 1);
1207
1208 if (PredDefault == BB) {
1209 // If this is the default destination from PTI, only the edges in TI
1210 // that don't occur in PTI, or that branch to BB will be activated.
1211 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1212 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1213 if (PredCases[i].Dest != BB)
1214 PTIHandled.insert(PredCases[i].Value);
1215 else {
1216 // The default destination is BB, we don't need explicit targets.
1217 std::swap(PredCases[i], PredCases.back());
1218
1219 if (PredHasWeights || SuccHasWeights) {
1220 // Increase weight for the default case.
1221 Weights[0] += Weights[i + 1];
1222 std::swap(Weights[i + 1], Weights.back());
1223 Weights.pop_back();
1224 }
1225
1226 PredCases.pop_back();
1227 --i;
1228 --e;
1229 }
1230
1231 // Reconstruct the new switch statement we will be building.
1232 if (PredDefault != BBDefault) {
1233 PredDefault->removePredecessor(Pred);
1234 if (DTU && PredDefault != BB)
1235 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1236 PredDefault = BBDefault;
1237 ++NewSuccessors[BBDefault];
1238 }
1239
1240 unsigned CasesFromPred = Weights.size();
1241 uint64_t ValidTotalSuccWeight = 0;
1242 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1243 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1244 PredCases.push_back(BBCases[i]);
1245 ++NewSuccessors[BBCases[i].Dest];
1246 if (SuccHasWeights || PredHasWeights) {
1247 // The default weight is at index 0, so weight for the ith case
1248 // should be at index i+1. Scale the cases from successor by
1249 // PredDefaultWeight (Weights[0]).
1250 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1251 ValidTotalSuccWeight += SuccWeights[i + 1];
1252 }
1253 }
1254
1255 if (SuccHasWeights || PredHasWeights) {
1256 ValidTotalSuccWeight += SuccWeights[0];
1257 // Scale the cases from predecessor by ValidTotalSuccWeight.
1258 for (unsigned i = 1; i < CasesFromPred; ++i)
1259 Weights[i] *= ValidTotalSuccWeight;
1260 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1261 Weights[0] *= SuccWeights[0];
1262 }
1263 } else {
1264 // If this is not the default destination from PSI, only the edges
1265 // in SI that occur in PSI with a destination of BB will be
1266 // activated.
1267 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1268 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1269 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1270 if (PredCases[i].Dest == BB) {
1271 PTIHandled.insert(PredCases[i].Value);
1272
1273 if (PredHasWeights || SuccHasWeights) {
1274 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1275 std::swap(Weights[i + 1], Weights.back());
1276 Weights.pop_back();
1277 }
1278
1279 std::swap(PredCases[i], PredCases.back());
1280 PredCases.pop_back();
1281 --i;
1282 --e;
1283 }
1284
1285 // Okay, now we know which constants were sent to BB from the
1286 // predecessor. Figure out where they will all go now.
1287 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1288 if (PTIHandled.count(BBCases[i].Value)) {
1289 // If this is one we are capable of getting...
1290 if (PredHasWeights || SuccHasWeights)
1291 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1292 PredCases.push_back(BBCases[i]);
1293 ++NewSuccessors[BBCases[i].Dest];
1294 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1295 }
1296
1297 // If there are any constants vectored to BB that TI doesn't handle,
1298 // they must go to the default destination of TI.
1299 for (ConstantInt *I : PTIHandled) {
1300 if (PredHasWeights || SuccHasWeights)
1301 Weights.push_back(WeightsForHandled[I]);
1302 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1303 ++NewSuccessors[BBDefault];
1304 }
1305 }
1306
1307 // Okay, at this point, we know which new successor Pred will get. Make
1308 // sure we update the number of entries in the PHI nodes for these
1309 // successors.
1310 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1311 if (DTU) {
1312 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1313 Updates.reserve(Updates.size() + NewSuccessors.size());
1314 }
1315 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1316 NewSuccessors) {
1317 for (auto I : seq(NewSuccessor.second)) {
1318 (void)I;
1319 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1320 }
1321 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1322 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1323 }
1324
1325 Builder.SetInsertPoint(PTI);
1326 // Convert pointer to int before we switch.
1327 if (CV->getType()->isPointerTy()) {
1328 CV =
1329 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1330 }
1331
1332 // Now that the successors are updated, create the new Switch instruction.
1333 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1334 NewSI->setDebugLoc(PTI->getDebugLoc());
1335 for (ValueEqualityComparisonCase &V : PredCases)
1336 NewSI->addCase(V.Value, V.Dest);
1337
1338 if (PredHasWeights || SuccHasWeights) {
1339 // Halve the weights if any of them cannot fit in an uint32_t
1340 FitWeights(Weights);
1341
1342 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1343
1344 setBranchWeights(NewSI, MDWeights);
1345 }
1346
1348
1349 // Okay, last check. If BB is still a successor of PSI, then we must
1350 // have an infinite loop case. If so, add an infinitely looping block
1351 // to handle the case to preserve the behavior of the code.
1352 BasicBlock *InfLoopBlock = nullptr;
1353 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1354 if (NewSI->getSuccessor(i) == BB) {
1355 if (!InfLoopBlock) {
1356 // Insert it at the end of the function, because it's either code,
1357 // or it won't matter if it's hot. :)
1358 InfLoopBlock =
1359 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1360 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1361 if (DTU)
1362 Updates.push_back(
1363 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1364 }
1365 NewSI->setSuccessor(i, InfLoopBlock);
1366 }
1367
1368 if (DTU) {
1369 if (InfLoopBlock)
1370 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1371
1372 Updates.push_back({DominatorTree::Delete, Pred, BB});
1373
1374 DTU->applyUpdates(Updates);
1375 }
1376
1377 ++NumFoldValueComparisonIntoPredecessors;
1378 return true;
1379}
1380
1381/// The specified terminator is a value equality comparison instruction
1382/// (either a switch or a branch on "X == c").
1383/// See if any of the predecessors of the terminator block are value comparisons
1384/// on the same value. If so, and if safe to do so, fold them together.
1385bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1386 IRBuilder<> &Builder) {
1387 BasicBlock *BB = TI->getParent();
1388 Value *CV = isValueEqualityComparison(TI); // CondVal
1389 assert(CV && "Not a comparison?");
1390
1391 bool Changed = false;
1392
1394 while (!Preds.empty()) {
1395 BasicBlock *Pred = Preds.pop_back_val();
1396 Instruction *PTI = Pred->getTerminator();
1397
1398 // Don't try to fold into itself.
1399 if (Pred == BB)
1400 continue;
1401
1402 // See if the predecessor is a comparison with the same value.
1403 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1404 if (PCV != CV)
1405 continue;
1406
1408 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1409 for (auto *Succ : FailBlocks) {
1410 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1411 return false;
1412 }
1413 }
1414
1415 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1416 Changed = true;
1417 }
1418 return Changed;
1419}
1420
1421// If we would need to insert a select that uses the value of this invoke
1422// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1423// need to do this), we can't hoist the invoke, as there is nowhere to put the
1424// select in this case.
1426 Instruction *I1, Instruction *I2) {
1427 for (BasicBlock *Succ : successors(BB1)) {
1428 for (const PHINode &PN : Succ->phis()) {
1429 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1430 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1431 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1432 return false;
1433 }
1434 }
1435 }
1436 return true;
1437}
1438
1439// Get interesting characteristics of instructions that
1440// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1441// instructions can be reordered across.
1447
1449 unsigned Flags = 0;
1450 if (I->mayReadFromMemory())
1451 Flags |= SkipReadMem;
1452 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1453 // inalloca) across stacksave/stackrestore boundaries.
1454 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1455 Flags |= SkipSideEffect;
1457 Flags |= SkipImplicitControlFlow;
1458 return Flags;
1459}
1460
1461// Returns true if it is safe to reorder an instruction across preceding
1462// instructions in a basic block.
1463static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1464 // Don't reorder a store over a load.
1465 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1466 return false;
1467
1468 // If we have seen an instruction with side effects, it's unsafe to reorder an
1469 // instruction which reads memory or itself has side effects.
1470 if ((Flags & SkipSideEffect) &&
1471 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1472 return false;
1473
1474 // Reordering across an instruction which does not necessarily transfer
1475 // control to the next instruction is speculation.
1477 return false;
1478
1479 // Hoisting of llvm.deoptimize is only legal together with the next return
1480 // instruction, which this pass is not always able to do.
1481 if (auto *CB = dyn_cast<CallBase>(I))
1482 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1483 return false;
1484
1485 // It's also unsafe/illegal to hoist an instruction above its instruction
1486 // operands
1487 BasicBlock *BB = I->getParent();
1488 for (Value *Op : I->operands()) {
1489 if (auto *J = dyn_cast<Instruction>(Op))
1490 if (J->getParent() == BB)
1491 return false;
1492 }
1493
1494 return true;
1495}
1496
1497static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1498
1499/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1500/// instructions \p I1 and \p I2 can and should be hoisted.
1502 const TargetTransformInfo &TTI) {
1503 // If we're going to hoist a call, make sure that the two instructions
1504 // we're commoning/hoisting are both marked with musttail, or neither of
1505 // them is marked as such. Otherwise, we might end up in a situation where
1506 // we hoist from a block where the terminator is a `ret` to a block where
1507 // the terminator is a `br`, and `musttail` calls expect to be followed by
1508 // a return.
1509 auto *C1 = dyn_cast<CallInst>(I1);
1510 auto *C2 = dyn_cast<CallInst>(I2);
1511 if (C1 && C2)
1512 if (C1->isMustTailCall() != C2->isMustTailCall())
1513 return false;
1514
1516 return false;
1517
1518 // If any of the two call sites has nomerge or convergent attribute, stop
1519 // hoisting.
1520 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1521 if (CB1->cannotMerge() || CB1->isConvergent())
1522 return false;
1523 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1524 if (CB2->cannotMerge() || CB2->isConvergent())
1525 return false;
1526
1527 return true;
1528}
1529
1530/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1531/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1532/// hoistCommonCodeFromSuccessors. e.g. The input:
1533/// I1 DVRs: { x, z },
1534/// OtherInsts: { I2 DVRs: { x, y, z } }
1535/// would result in hoisting only DbgVariableRecord x.
1537 Instruction *TI, Instruction *I1,
1538 SmallVectorImpl<Instruction *> &OtherInsts) {
1539 if (!I1->hasDbgRecords())
1540 return;
1541 using CurrentAndEndIt =
1542 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1543 // Vector of {Current, End} iterators.
1545 Itrs.reserve(OtherInsts.size() + 1);
1546 // Helper lambdas for lock-step checks:
1547 // Return true if this Current == End.
1548 auto atEnd = [](const CurrentAndEndIt &Pair) {
1549 return Pair.first == Pair.second;
1550 };
1551 // Return true if all Current are identical.
1552 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1553 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1555 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1556 });
1557 };
1558
1559 // Collect the iterators.
1560 Itrs.push_back(
1561 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1562 for (Instruction *Other : OtherInsts) {
1563 if (!Other->hasDbgRecords())
1564 return;
1565 Itrs.push_back(
1566 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1567 }
1568
1569 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1570 // the lock-step DbgRecord are identical, hoist all of them to TI.
1571 // This replicates the dbg.* intrinsic behaviour in
1572 // hoistCommonCodeFromSuccessors.
1573 while (none_of(Itrs, atEnd)) {
1574 bool HoistDVRs = allIdentical(Itrs);
1575 for (CurrentAndEndIt &Pair : Itrs) {
1576 // Increment Current iterator now as we may be about to move the
1577 // DbgRecord.
1578 DbgRecord &DR = *Pair.first++;
1579 if (HoistDVRs) {
1580 DR.removeFromParent();
1581 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1582 }
1583 }
1584 }
1585}
1586
1587/// Hoist any common code in the successor blocks up into the block. This
1588/// function guarantees that BB dominates all successors. If EqTermsOnly is
1589/// given, only perform hoisting in case both blocks only contain a terminator.
1590/// In that case, only the original BI will be replaced and selects for PHIs are
1591/// added.
1592bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1593 bool EqTermsOnly) {
1594 // This does very trivial matching, with limited scanning, to find identical
1595 // instructions in the two blocks. In particular, we don't want to get into
1596 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1597 // such, we currently just scan for obviously identical instructions in an
1598 // identical order, possibly separated by the same number of non-identical
1599 // instructions.
1600 unsigned int SuccSize = succ_size(BB);
1601 if (SuccSize < 2)
1602 return false;
1603
1604 // If either of the blocks has it's address taken, then we can't do this fold,
1605 // because the code we'd hoist would no longer run when we jump into the block
1606 // by it's address.
1607 for (auto *Succ : successors(BB))
1608 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1609 return false;
1610
1611 auto *TI = BB->getTerminator();
1612
1613 // The second of pair is a SkipFlags bitmask.
1614 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1615 SmallVector<SuccIterPair, 8> SuccIterPairs;
1616 for (auto *Succ : successors(BB)) {
1617 BasicBlock::iterator SuccItr = Succ->begin();
1618 if (isa<PHINode>(*SuccItr))
1619 return false;
1620 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1621 }
1622
1623 // Check if only hoisting terminators is allowed. This does not add new
1624 // instructions to the hoist location.
1625 if (EqTermsOnly) {
1626 // Skip any debug intrinsics, as they are free to hoist.
1627 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1628 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1629 if (!INonDbg->isTerminator())
1630 return false;
1631 }
1632 // Now we know that we only need to hoist debug intrinsics and the
1633 // terminator. Let the loop below handle those 2 cases.
1634 }
1635
1636 // Count how many instructions were not hoisted so far. There's a limit on how
1637 // many instructions we skip, serving as a compilation time control as well as
1638 // preventing excessive increase of life ranges.
1639 unsigned NumSkipped = 0;
1640 // If we find an unreachable instruction at the beginning of a basic block, we
1641 // can still hoist instructions from the rest of the basic blocks.
1642 if (SuccIterPairs.size() > 2) {
1643 erase_if(SuccIterPairs,
1644 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1645 if (SuccIterPairs.size() < 2)
1646 return false;
1647 }
1648
1649 bool Changed = false;
1650
1651 for (;;) {
1652 auto *SuccIterPairBegin = SuccIterPairs.begin();
1653 auto &BB1ItrPair = *SuccIterPairBegin++;
1654 auto OtherSuccIterPairRange =
1655 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1656 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1657
1658 Instruction *I1 = &*BB1ItrPair.first;
1659
1660 // Skip debug info if it is not identical.
1661 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1662 Instruction *I2 = &*Iter;
1663 return I1->isIdenticalToWhenDefined(I2);
1664 });
1665 if (!AllDbgInstsAreIdentical) {
1666 while (isa<DbgInfoIntrinsic>(I1))
1667 I1 = &*++BB1ItrPair.first;
1668 for (auto &SuccIter : OtherSuccIterRange) {
1669 Instruction *I2 = &*SuccIter;
1670 while (isa<DbgInfoIntrinsic>(I2))
1671 I2 = &*++SuccIter;
1672 }
1673 }
1674
1675 bool AllInstsAreIdentical = true;
1676 bool HasTerminator = I1->isTerminator();
1677 for (auto &SuccIter : OtherSuccIterRange) {
1678 Instruction *I2 = &*SuccIter;
1679 HasTerminator |= I2->isTerminator();
1680 if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
1681 AllInstsAreIdentical = false;
1682 }
1683
1685 for (auto &SuccIter : OtherSuccIterRange)
1686 OtherInsts.push_back(&*SuccIter);
1687
1688 // If we are hoisting the terminator instruction, don't move one (making a
1689 // broken BB), instead clone it, and remove BI.
1690 if (HasTerminator) {
1691 // Even if BB, which contains only one unreachable instruction, is ignored
1692 // at the beginning of the loop, we can hoist the terminator instruction.
1693 // If any instructions remain in the block, we cannot hoist terminators.
1694 if (NumSkipped || !AllInstsAreIdentical) {
1695 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1696 return Changed;
1697 }
1698
1699 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1700 Changed;
1701 }
1702
1703 if (AllInstsAreIdentical) {
1704 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1705 AllInstsAreIdentical =
1706 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1707 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1708 Instruction *I2 = &*Pair.first;
1709 unsigned SkipFlagsBB2 = Pair.second;
1710 // Even if the instructions are identical, it may not
1711 // be safe to hoist them if we have skipped over
1712 // instructions with side effects or their operands
1713 // weren't hoisted.
1714 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1716 });
1717 }
1718
1719 if (AllInstsAreIdentical) {
1720 BB1ItrPair.first++;
1721 if (isa<DbgInfoIntrinsic>(I1)) {
1722 // The debug location is an integral part of a debug info intrinsic
1723 // and can't be separated from it or replaced. Instead of attempting
1724 // to merge locations, simply hoist both copies of the intrinsic.
1725 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1726 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1727 // and leave any that were not hoisted behind (by calling moveBefore
1728 // rather than moveBeforePreserving).
1729 I1->moveBefore(TI);
1730 for (auto &SuccIter : OtherSuccIterRange) {
1731 auto *I2 = &*SuccIter++;
1732 assert(isa<DbgInfoIntrinsic>(I2));
1733 I2->moveBefore(TI);
1734 }
1735 } else {
1736 // For a normal instruction, we just move one to right before the
1737 // branch, then replace all uses of the other with the first. Finally,
1738 // we remove the now redundant second instruction.
1739 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1740 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1741 // and leave any that were not hoisted behind (by calling moveBefore
1742 // rather than moveBeforePreserving).
1743 I1->moveBefore(TI);
1744 for (auto &SuccIter : OtherSuccIterRange) {
1745 Instruction *I2 = &*SuccIter++;
1746 assert(I2 != I1);
1747 if (!I2->use_empty())
1748 I2->replaceAllUsesWith(I1);
1749 I1->andIRFlags(I2);
1750 combineMetadataForCSE(I1, I2, true);
1751 // I1 and I2 are being combined into a single instruction. Its debug
1752 // location is the merged locations of the original instructions.
1753 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1754 I2->eraseFromParent();
1755 }
1756 }
1757 if (!Changed)
1758 NumHoistCommonCode += SuccIterPairs.size();
1759 Changed = true;
1760 NumHoistCommonInstrs += SuccIterPairs.size();
1761 } else {
1762 if (NumSkipped >= HoistCommonSkipLimit) {
1763 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1764 return Changed;
1765 }
1766 // We are about to skip over a pair of non-identical instructions. Record
1767 // if any have characteristics that would prevent reordering instructions
1768 // across them.
1769 for (auto &SuccIterPair : SuccIterPairs) {
1770 Instruction *I = &*SuccIterPair.first++;
1771 SuccIterPair.second |= skippedInstrFlags(I);
1772 }
1773 ++NumSkipped;
1774 }
1775 }
1776}
1777
1778bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1779 Instruction *TI, Instruction *I1,
1780 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1781
1782 auto *BI = dyn_cast<BranchInst>(TI);
1783
1784 bool Changed = false;
1785 BasicBlock *TIParent = TI->getParent();
1786 BasicBlock *BB1 = I1->getParent();
1787
1788 // Use only for an if statement.
1789 auto *I2 = *OtherSuccTIs.begin();
1790 auto *BB2 = I2->getParent();
1791 if (BI) {
1792 assert(OtherSuccTIs.size() == 1);
1793 assert(BI->getSuccessor(0) == I1->getParent());
1794 assert(BI->getSuccessor(1) == I2->getParent());
1795 }
1796
1797 // In the case of an if statement, we try to hoist an invoke.
1798 // FIXME: Can we define a safety predicate for CallBr?
1799 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1800 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1801 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1802 return false;
1803
1804 // TODO: callbr hoisting currently disabled pending further study.
1805 if (isa<CallBrInst>(I1))
1806 return false;
1807
1808 for (BasicBlock *Succ : successors(BB1)) {
1809 for (PHINode &PN : Succ->phis()) {
1810 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1811 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1812 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1813 if (BB1V == BB2V)
1814 continue;
1815
1816 // In the case of an if statement, check for
1817 // passingValueIsAlwaysUndefined here because we would rather eliminate
1818 // undefined control flow then converting it to a select.
1819 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1821 return false;
1822 }
1823 }
1824 }
1825
1826 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1827 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1828 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1829 // Clone the terminator and hoist it into the pred, without any debug info.
1830 Instruction *NT = I1->clone();
1831 NT->insertInto(TIParent, TI->getIterator());
1832 if (!NT->getType()->isVoidTy()) {
1833 I1->replaceAllUsesWith(NT);
1834 for (Instruction *OtherSuccTI : OtherSuccTIs)
1835 OtherSuccTI->replaceAllUsesWith(NT);
1836 NT->takeName(I1);
1837 }
1838 Changed = true;
1839 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1840
1841 // Ensure terminator gets a debug location, even an unknown one, in case
1842 // it involves inlinable calls.
1844 Locs.push_back(I1->getDebugLoc());
1845 for (auto *OtherSuccTI : OtherSuccTIs)
1846 Locs.push_back(OtherSuccTI->getDebugLoc());
1847 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1848
1849 // PHIs created below will adopt NT's merged DebugLoc.
1850 IRBuilder<NoFolder> Builder(NT);
1851
1852 // In the case of an if statement, hoisting one of the terminators from our
1853 // successor is a great thing. Unfortunately, the successors of the if/else
1854 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1855 // must agree for all PHI nodes, so we insert select instruction to compute
1856 // the final result.
1857 if (BI) {
1858 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1859 for (BasicBlock *Succ : successors(BB1)) {
1860 for (PHINode &PN : Succ->phis()) {
1861 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1862 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1863 if (BB1V == BB2V)
1864 continue;
1865
1866 // These values do not agree. Insert a select instruction before NT
1867 // that determines the right value.
1868 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1869 if (!SI) {
1870 // Propagate fast-math-flags from phi node to its replacement select.
1871 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1872 if (isa<FPMathOperator>(PN))
1873 Builder.setFastMathFlags(PN.getFastMathFlags());
1874
1875 SI = cast<SelectInst>(Builder.CreateSelect(
1876 BI->getCondition(), BB1V, BB2V,
1877 BB1V->getName() + "." + BB2V->getName(), BI));
1878 }
1879
1880 // Make the PHI node use the select for all incoming values for BB1/BB2
1881 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1882 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1883 PN.setIncomingValue(i, SI);
1884 }
1885 }
1886 }
1887
1889
1890 // Update any PHI nodes in our new successors.
1891 for (BasicBlock *Succ : successors(BB1)) {
1892 AddPredecessorToBlock(Succ, TIParent, BB1);
1893 if (DTU)
1894 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1895 }
1896
1897 if (DTU)
1898 for (BasicBlock *Succ : successors(TI))
1899 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1900
1902 if (DTU)
1903 DTU->applyUpdates(Updates);
1904 return Changed;
1905}
1906
1907// Check lifetime markers.
1908static bool isLifeTimeMarker(const Instruction *I) {
1909 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1910 switch (II->getIntrinsicID()) {
1911 default:
1912 break;
1913 case Intrinsic::lifetime_start:
1914 case Intrinsic::lifetime_end:
1915 return true;
1916 }
1917 }
1918 return false;
1919}
1920
1921// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1922// into variables.
1924 int OpIdx) {
1925 return !isa<IntrinsicInst>(I);
1926}
1927
1928// All instructions in Insts belong to different blocks that all unconditionally
1929// branch to a common successor. Analyze each instruction and return true if it
1930// would be possible to sink them into their successor, creating one common
1931// instruction instead. For every value that would be required to be provided by
1932// PHI node (because an operand varies in each input block), add to PHIOperands.
1935 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1936 // Prune out obviously bad instructions to move. Each instruction must have
1937 // exactly zero or one use, and we check later that use is by a single, common
1938 // PHI instruction in the successor.
1939 bool HasUse = !Insts.front()->user_empty();
1940 for (auto *I : Insts) {
1941 // These instructions may change or break semantics if moved.
1942 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1943 I->getType()->isTokenTy())
1944 return false;
1945
1946 // Do not try to sink an instruction in an infinite loop - it can cause
1947 // this algorithm to infinite loop.
1948 if (I->getParent()->getSingleSuccessor() == I->getParent())
1949 return false;
1950
1951 // Conservatively return false if I is an inline-asm instruction. Sinking
1952 // and merging inline-asm instructions can potentially create arguments
1953 // that cannot satisfy the inline-asm constraints.
1954 // If the instruction has nomerge or convergent attribute, return false.
1955 if (const auto *C = dyn_cast<CallBase>(I))
1956 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1957 return false;
1958
1959 // Each instruction must have zero or one use.
1960 if (HasUse && !I->hasOneUse())
1961 return false;
1962 if (!HasUse && !I->user_empty())
1963 return false;
1964 }
1965
1966 const Instruction *I0 = Insts.front();
1967 for (auto *I : Insts) {
1968 if (!I->isSameOperationAs(I0))
1969 return false;
1970
1971 // swifterror pointers can only be used by a load or store; sinking a load
1972 // or store would require introducing a select for the pointer operand,
1973 // which isn't allowed for swifterror pointers.
1974 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1975 return false;
1976 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1977 return false;
1978 }
1979
1980 // All instructions in Insts are known to be the same opcode. If they have a
1981 // use, check that the only user is a PHI or in the same block as the
1982 // instruction, because if a user is in the same block as an instruction we're
1983 // contemplating sinking, it must already be determined to be sinkable.
1984 if (HasUse) {
1985 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1986 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1987 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1988 auto *U = cast<Instruction>(*I->user_begin());
1989 return (PNUse &&
1990 PNUse->getParent() == Succ &&
1991 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1992 U->getParent() == I->getParent();
1993 }))
1994 return false;
1995 }
1996
1997 // Because SROA can't handle speculating stores of selects, try not to sink
1998 // loads, stores or lifetime markers of allocas when we'd have to create a
1999 // PHI for the address operand. Also, because it is likely that loads or
2000 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2001 // them.
2002 // This can cause code churn which can have unintended consequences down
2003 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2004 // FIXME: This is a workaround for a deficiency in SROA - see
2005 // https://llvm.org/bugs/show_bug.cgi?id=30188
2006 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2007 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2008 }))
2009 return false;
2010 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2011 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2012 }))
2013 return false;
2014 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2015 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2016 }))
2017 return false;
2018
2019 // For calls to be sinkable, they must all be indirect, or have same callee.
2020 // I.e. if we have two direct calls to different callees, we don't want to
2021 // turn that into an indirect call. Likewise, if we have an indirect call,
2022 // and a direct call, we don't actually want to have a single indirect call.
2023 if (isa<CallBase>(I0)) {
2024 auto IsIndirectCall = [](const Instruction *I) {
2025 return cast<CallBase>(I)->isIndirectCall();
2026 };
2027 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2028 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2029 if (HaveIndirectCalls) {
2030 if (!AllCallsAreIndirect)
2031 return false;
2032 } else {
2033 // All callees must be identical.
2034 Value *Callee = nullptr;
2035 for (const Instruction *I : Insts) {
2036 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2037 if (!Callee)
2038 Callee = CurrCallee;
2039 else if (Callee != CurrCallee)
2040 return false;
2041 }
2042 }
2043 }
2044
2045 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2046 Value *Op = I0->getOperand(OI);
2047 if (Op->getType()->isTokenTy())
2048 // Don't touch any operand of token type.
2049 return false;
2050
2051 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2052 assert(I->getNumOperands() == I0->getNumOperands());
2053 return I->getOperand(OI) == I0->getOperand(OI);
2054 };
2055 if (!all_of(Insts, SameAsI0)) {
2056 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2058 // We can't create a PHI from this GEP.
2059 return false;
2060 for (auto *I : Insts)
2061 PHIOperands[I].push_back(I->getOperand(OI));
2062 }
2063 }
2064 return true;
2065}
2066
2067// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2068// instruction of every block in Blocks to their common successor, commoning
2069// into one instruction.
2071 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2072
2073 // canSinkInstructions returning true guarantees that every block has at
2074 // least one non-terminator instruction.
2076 for (auto *BB : Blocks) {
2077 Instruction *I = BB->getTerminator();
2078 do {
2079 I = I->getPrevNode();
2080 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2081 if (!isa<DbgInfoIntrinsic>(I))
2082 Insts.push_back(I);
2083 }
2084
2085 // The only checking we need to do now is that all users of all instructions
2086 // are the same PHI node. canSinkInstructions should have checked this but
2087 // it is slightly over-aggressive - it gets confused by commutative
2088 // instructions so double-check it here.
2089 Instruction *I0 = Insts.front();
2090 if (!I0->user_empty()) {
2091 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2092 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2093 auto *U = cast<Instruction>(*I->user_begin());
2094 return U == PNUse;
2095 }))
2096 return false;
2097 }
2098
2099 // We don't need to do any more checking here; canSinkInstructions should
2100 // have done it all for us.
2101 SmallVector<Value*, 4> NewOperands;
2102 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2103 // This check is different to that in canSinkInstructions. There, we
2104 // cared about the global view once simplifycfg (and instcombine) have
2105 // completed - it takes into account PHIs that become trivially
2106 // simplifiable. However here we need a more local view; if an operand
2107 // differs we create a PHI and rely on instcombine to clean up the very
2108 // small mess we may make.
2109 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2110 return I->getOperand(O) != I0->getOperand(O);
2111 });
2112 if (!NeedPHI) {
2113 NewOperands.push_back(I0->getOperand(O));
2114 continue;
2115 }
2116
2117 // Create a new PHI in the successor block and populate it.
2118 auto *Op = I0->getOperand(O);
2119 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2120 auto *PN =
2121 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2122 PN->insertBefore(BBEnd->begin());
2123 for (auto *I : Insts)
2124 PN->addIncoming(I->getOperand(O), I->getParent());
2125 NewOperands.push_back(PN);
2126 }
2127
2128 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2129 // and move it to the start of the successor block.
2130 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2131 I0->getOperandUse(O).set(NewOperands[O]);
2132
2133 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2134
2135 // Update metadata and IR flags, and merge debug locations.
2136 for (auto *I : Insts)
2137 if (I != I0) {
2138 // The debug location for the "common" instruction is the merged locations
2139 // of all the commoned instructions. We start with the original location
2140 // of the "common" instruction and iteratively merge each location in the
2141 // loop below.
2142 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2143 // However, as N-way merge for CallInst is rare, so we use simplified API
2144 // instead of using complex API for N-way merge.
2145 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2146 combineMetadataForCSE(I0, I, true);
2147 I0->andIRFlags(I);
2148 }
2149
2150 if (!I0->user_empty()) {
2151 // canSinkLastInstruction checked that all instructions were used by
2152 // one and only one PHI node. Find that now, RAUW it to our common
2153 // instruction and nuke it.
2154 auto *PN = cast<PHINode>(*I0->user_begin());
2155 PN->replaceAllUsesWith(I0);
2156 PN->eraseFromParent();
2157 }
2158
2159 // Finally nuke all instructions apart from the common instruction.
2160 for (auto *I : Insts) {
2161 if (I == I0)
2162 continue;
2163 // The remaining uses are debug users, replace those with the common inst.
2164 // In most (all?) cases this just introduces a use-before-def.
2165 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2166 I->replaceAllUsesWith(I0);
2167 I->eraseFromParent();
2168 }
2169
2170 return true;
2171}
2172
2173namespace {
2174
2175 // LockstepReverseIterator - Iterates through instructions
2176 // in a set of blocks in reverse order from the first non-terminator.
2177 // For example (assume all blocks have size n):
2178 // LockstepReverseIterator I([B1, B2, B3]);
2179 // *I-- = [B1[n], B2[n], B3[n]];
2180 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2181 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2182 // ...
2183 class LockstepReverseIterator {
2186 bool Fail;
2187
2188 public:
2189 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2190 reset();
2191 }
2192
2193 void reset() {
2194 Fail = false;
2195 Insts.clear();
2196 for (auto *BB : Blocks) {
2197 Instruction *Inst = BB->getTerminator();
2198 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2199 Inst = Inst->getPrevNode();
2200 if (!Inst) {
2201 // Block wasn't big enough.
2202 Fail = true;
2203 return;
2204 }
2205 Insts.push_back(Inst);
2206 }
2207 }
2208
2209 bool isValid() const {
2210 return !Fail;
2211 }
2212
2213 void operator--() {
2214 if (Fail)
2215 return;
2216 for (auto *&Inst : Insts) {
2217 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2218 Inst = Inst->getPrevNode();
2219 // Already at beginning of block.
2220 if (!Inst) {
2221 Fail = true;
2222 return;
2223 }
2224 }
2225 }
2226
2227 void operator++() {
2228 if (Fail)
2229 return;
2230 for (auto *&Inst : Insts) {
2231 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2232 Inst = Inst->getNextNode();
2233 // Already at end of block.
2234 if (!Inst) {
2235 Fail = true;
2236 return;
2237 }
2238 }
2239 }
2240
2242 return Insts;
2243 }
2244 };
2245
2246} // end anonymous namespace
2247
2248/// Check whether BB's predecessors end with unconditional branches. If it is
2249/// true, sink any common code from the predecessors to BB.
2251 DomTreeUpdater *DTU) {
2252 // We support two situations:
2253 // (1) all incoming arcs are unconditional
2254 // (2) there are non-unconditional incoming arcs
2255 //
2256 // (2) is very common in switch defaults and
2257 // else-if patterns;
2258 //
2259 // if (a) f(1);
2260 // else if (b) f(2);
2261 //
2262 // produces:
2263 //
2264 // [if]
2265 // / \
2266 // [f(1)] [if]
2267 // | | \
2268 // | | |
2269 // | [f(2)]|
2270 // \ | /
2271 // [ end ]
2272 //
2273 // [end] has two unconditional predecessor arcs and one conditional. The
2274 // conditional refers to the implicit empty 'else' arc. This conditional
2275 // arc can also be caused by an empty default block in a switch.
2276 //
2277 // In this case, we attempt to sink code from all *unconditional* arcs.
2278 // If we can sink instructions from these arcs (determined during the scan
2279 // phase below) we insert a common successor for all unconditional arcs and
2280 // connect that to [end], to enable sinking:
2281 //
2282 // [if]
2283 // / \
2284 // [x(1)] [if]
2285 // | | \
2286 // | | \
2287 // | [x(2)] |
2288 // \ / |
2289 // [sink.split] |
2290 // \ /
2291 // [ end ]
2292 //
2293 SmallVector<BasicBlock*,4> UnconditionalPreds;
2294 bool HaveNonUnconditionalPredecessors = false;
2295 for (auto *PredBB : predecessors(BB)) {
2296 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2297 if (PredBr && PredBr->isUnconditional())
2298 UnconditionalPreds.push_back(PredBB);
2299 else
2300 HaveNonUnconditionalPredecessors = true;
2301 }
2302 if (UnconditionalPreds.size() < 2)
2303 return false;
2304
2305 // We take a two-step approach to tail sinking. First we scan from the end of
2306 // each block upwards in lockstep. If the n'th instruction from the end of each
2307 // block can be sunk, those instructions are added to ValuesToSink and we
2308 // carry on. If we can sink an instruction but need to PHI-merge some operands
2309 // (because they're not identical in each instruction) we add these to
2310 // PHIOperands.
2311 int ScanIdx = 0;
2312 SmallPtrSet<Value*,4> InstructionsToSink;
2314 LockstepReverseIterator LRI(UnconditionalPreds);
2315 while (LRI.isValid() &&
2316 canSinkInstructions(*LRI, PHIOperands)) {
2317 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2318 << "\n");
2319 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2320 ++ScanIdx;
2321 --LRI;
2322 }
2323
2324 // If no instructions can be sunk, early-return.
2325 if (ScanIdx == 0)
2326 return false;
2327
2328 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2329
2330 if (!followedByDeoptOrUnreachable) {
2331 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2332 // actually sink before encountering instruction that is unprofitable to
2333 // sink?
2334 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2335 unsigned NumPHIdValues = 0;
2336 for (auto *I : *LRI)
2337 for (auto *V : PHIOperands[I]) {
2338 if (!InstructionsToSink.contains(V))
2339 ++NumPHIdValues;
2340 // FIXME: this check is overly optimistic. We may end up not sinking
2341 // said instruction, due to the very same profitability check.
2342 // See @creating_too_many_phis in sink-common-code.ll.
2343 }
2344 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2345 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2346 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2347 NumPHIInsts++;
2348
2349 return NumPHIInsts <= 1;
2350 };
2351
2352 // We've determined that we are going to sink last ScanIdx instructions,
2353 // and recorded them in InstructionsToSink. Now, some instructions may be
2354 // unprofitable to sink. But that determination depends on the instructions
2355 // that we are going to sink.
2356
2357 // First, forward scan: find the first instruction unprofitable to sink,
2358 // recording all the ones that are profitable to sink.
2359 // FIXME: would it be better, after we detect that not all are profitable.
2360 // to either record the profitable ones, or erase the unprofitable ones?
2361 // Maybe we need to choose (at runtime) the one that will touch least
2362 // instrs?
2363 LRI.reset();
2364 int Idx = 0;
2365 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2366 while (Idx < ScanIdx) {
2367 if (!ProfitableToSinkInstruction(LRI)) {
2368 // Too many PHIs would be created.
2369 LLVM_DEBUG(
2370 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2371 break;
2372 }
2373 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2374 --LRI;
2375 ++Idx;
2376 }
2377
2378 // If no instructions can be sunk, early-return.
2379 if (Idx == 0)
2380 return false;
2381
2382 // Did we determine that (only) some instructions are unprofitable to sink?
2383 if (Idx < ScanIdx) {
2384 // Okay, some instructions are unprofitable.
2385 ScanIdx = Idx;
2386 InstructionsToSink = InstructionsProfitableToSink;
2387
2388 // But, that may make other instructions unprofitable, too.
2389 // So, do a backward scan, do any earlier instructions become
2390 // unprofitable?
2391 assert(
2392 !ProfitableToSinkInstruction(LRI) &&
2393 "We already know that the last instruction is unprofitable to sink");
2394 ++LRI;
2395 --Idx;
2396 while (Idx >= 0) {
2397 // If we detect that an instruction becomes unprofitable to sink,
2398 // all earlier instructions won't be sunk either,
2399 // so preemptively keep InstructionsProfitableToSink in sync.
2400 // FIXME: is this the most performant approach?
2401 for (auto *I : *LRI)
2402 InstructionsProfitableToSink.erase(I);
2403 if (!ProfitableToSinkInstruction(LRI)) {
2404 // Everything starting with this instruction won't be sunk.
2405 ScanIdx = Idx;
2406 InstructionsToSink = InstructionsProfitableToSink;
2407 }
2408 ++LRI;
2409 --Idx;
2410 }
2411 }
2412
2413 // If no instructions can be sunk, early-return.
2414 if (ScanIdx == 0)
2415 return false;
2416 }
2417
2418 bool Changed = false;
2419
2420 if (HaveNonUnconditionalPredecessors) {
2421 if (!followedByDeoptOrUnreachable) {
2422 // It is always legal to sink common instructions from unconditional
2423 // predecessors. However, if not all predecessors are unconditional,
2424 // this transformation might be pessimizing. So as a rule of thumb,
2425 // don't do it unless we'd sink at least one non-speculatable instruction.
2426 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2427 LRI.reset();
2428 int Idx = 0;
2429 bool Profitable = false;
2430 while (Idx < ScanIdx) {
2431 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2432 Profitable = true;
2433 break;
2434 }
2435 --LRI;
2436 ++Idx;
2437 }
2438 if (!Profitable)
2439 return false;
2440 }
2441
2442 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2443 // We have a conditional edge and we're going to sink some instructions.
2444 // Insert a new block postdominating all blocks we're going to sink from.
2445 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2446 // Edges couldn't be split.
2447 return false;
2448 Changed = true;
2449 }
2450
2451 // Now that we've analyzed all potential sinking candidates, perform the
2452 // actual sink. We iteratively sink the last non-terminator of the source
2453 // blocks into their common successor unless doing so would require too
2454 // many PHI instructions to be generated (currently only one PHI is allowed
2455 // per sunk instruction).
2456 //
2457 // We can use InstructionsToSink to discount values needing PHI-merging that will
2458 // actually be sunk in a later iteration. This allows us to be more
2459 // aggressive in what we sink. This does allow a false positive where we
2460 // sink presuming a later value will also be sunk, but stop half way through
2461 // and never actually sink it which means we produce more PHIs than intended.
2462 // This is unlikely in practice though.
2463 int SinkIdx = 0;
2464 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2465 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2466 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2467 << "\n");
2468
2469 // Because we've sunk every instruction in turn, the current instruction to
2470 // sink is always at index 0.
2471 LRI.reset();
2472
2473 if (!sinkLastInstruction(UnconditionalPreds)) {
2474 LLVM_DEBUG(
2475 dbgs()
2476 << "SINK: stopping here, failed to actually sink instruction!\n");
2477 break;
2478 }
2479
2480 NumSinkCommonInstrs++;
2481 Changed = true;
2482 }
2483 if (SinkIdx != 0)
2484 ++NumSinkCommonCode;
2485 return Changed;
2486}
2487
2488namespace {
2489
2490struct CompatibleSets {
2491 using SetTy = SmallVector<InvokeInst *, 2>;
2492
2494
2495 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2496
2497 SetTy &getCompatibleSet(InvokeInst *II);
2498
2499 void insert(InvokeInst *II);
2500};
2501
2502CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2503 // Perform a linear scan over all the existing sets, see if the new `invoke`
2504 // is compatible with any particular set. Since we know that all the `invokes`
2505 // within a set are compatible, only check the first `invoke` in each set.
2506 // WARNING: at worst, this has quadratic complexity.
2507 for (CompatibleSets::SetTy &Set : Sets) {
2508 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2509 return Set;
2510 }
2511
2512 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2513 return Sets.emplace_back();
2514}
2515
2516void CompatibleSets::insert(InvokeInst *II) {
2517 getCompatibleSet(II).emplace_back(II);
2518}
2519
2520bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2521 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2522
2523 // Can we theoretically merge these `invoke`s?
2524 auto IsIllegalToMerge = [](InvokeInst *II) {
2525 return II->cannotMerge() || II->isInlineAsm();
2526 };
2527 if (any_of(Invokes, IsIllegalToMerge))
2528 return false;
2529
2530 // Either both `invoke`s must be direct,
2531 // or both `invoke`s must be indirect.
2532 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2533 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2534 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2535 if (HaveIndirectCalls) {
2536 if (!AllCallsAreIndirect)
2537 return false;
2538 } else {
2539 // All callees must be identical.
2540 Value *Callee = nullptr;
2541 for (InvokeInst *II : Invokes) {
2542 Value *CurrCallee = II->getCalledOperand();
2543 assert(CurrCallee && "There is always a called operand.");
2544 if (!Callee)
2545 Callee = CurrCallee;
2546 else if (Callee != CurrCallee)
2547 return false;
2548 }
2549 }
2550
2551 // Either both `invoke`s must not have a normal destination,
2552 // or both `invoke`s must have a normal destination,
2553 auto HasNormalDest = [](InvokeInst *II) {
2554 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2555 };
2556 if (any_of(Invokes, HasNormalDest)) {
2557 // Do not merge `invoke` that does not have a normal destination with one
2558 // that does have a normal destination, even though doing so would be legal.
2559 if (!all_of(Invokes, HasNormalDest))
2560 return false;
2561
2562 // All normal destinations must be identical.
2563 BasicBlock *NormalBB = nullptr;
2564 for (InvokeInst *II : Invokes) {
2565 BasicBlock *CurrNormalBB = II->getNormalDest();
2566 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2567 if (!NormalBB)
2568 NormalBB = CurrNormalBB;
2569 else if (NormalBB != CurrNormalBB)
2570 return false;
2571 }
2572
2573 // In the normal destination, the incoming values for these two `invoke`s
2574 // must be compatible.
2575 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2577 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2578 &EquivalenceSet))
2579 return false;
2580 }
2581
2582#ifndef NDEBUG
2583 // All unwind destinations must be identical.
2584 // We know that because we have started from said unwind destination.
2585 BasicBlock *UnwindBB = nullptr;
2586 for (InvokeInst *II : Invokes) {
2587 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2588 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2589 if (!UnwindBB)
2590 UnwindBB = CurrUnwindBB;
2591 else
2592 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2593 }
2594#endif
2595
2596 // In the unwind destination, the incoming values for these two `invoke`s
2597 // must be compatible.
2599 Invokes.front()->getUnwindDest(),
2600 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2601 return false;
2602
2603 // Ignoring arguments, these `invoke`s must be identical,
2604 // including operand bundles.
2605 const InvokeInst *II0 = Invokes.front();
2606 for (auto *II : Invokes.drop_front())
2607 if (!II->isSameOperationAs(II0))
2608 return false;
2609
2610 // Can we theoretically form the data operands for the merged `invoke`?
2611 auto IsIllegalToMergeArguments = [](auto Ops) {
2612 Use &U0 = std::get<0>(Ops);
2613 Use &U1 = std::get<1>(Ops);
2614 if (U0 == U1)
2615 return false;
2616 return U0->getType()->isTokenTy() ||
2617 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2618 U0.getOperandNo());
2619 };
2620 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2621 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2622 IsIllegalToMergeArguments))
2623 return false;
2624
2625 return true;
2626}
2627
2628} // namespace
2629
2630// Merge all invokes in the provided set, all of which are compatible
2631// as per the `CompatibleSets::shouldBelongToSameSet()`.
2633 DomTreeUpdater *DTU) {
2634 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2635
2637 if (DTU)
2638 Updates.reserve(2 + 3 * Invokes.size());
2639
2640 bool HasNormalDest =
2641 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2642
2643 // Clone one of the invokes into a new basic block.
2644 // Since they are all compatible, it doesn't matter which invoke is cloned.
2645 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2646 InvokeInst *II0 = Invokes.front();
2647 BasicBlock *II0BB = II0->getParent();
2648 BasicBlock *InsertBeforeBlock =
2649 II0->getParent()->getIterator()->getNextNode();
2650 Function *Func = II0BB->getParent();
2651 LLVMContext &Ctx = II0->getContext();
2652
2653 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2654 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2655
2656 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2657 // NOTE: all invokes have the same attributes, so no handling needed.
2658 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2659
2660 if (!HasNormalDest) {
2661 // This set does not have a normal destination,
2662 // so just form a new block with unreachable terminator.
2663 BasicBlock *MergedNormalDest = BasicBlock::Create(
2664 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2665 new UnreachableInst(Ctx, MergedNormalDest);
2666 MergedInvoke->setNormalDest(MergedNormalDest);
2667 }
2668
2669 // The unwind destination, however, remainds identical for all invokes here.
2670
2671 return MergedInvoke;
2672 }();
2673
2674 if (DTU) {
2675 // Predecessor blocks that contained these invokes will now branch to
2676 // the new block that contains the merged invoke, ...
2677 for (InvokeInst *II : Invokes)
2678 Updates.push_back(
2679 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2680
2681 // ... which has the new `unreachable` block as normal destination,
2682 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2683 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2684 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2685 SuccBBOfMergedInvoke});
2686
2687 // Since predecessor blocks now unconditionally branch to a new block,
2688 // they no longer branch to their original successors.
2689 for (InvokeInst *II : Invokes)
2690 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2691 Updates.push_back(
2692 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2693 }
2694
2695 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2696
2697 // Form the merged operands for the merged invoke.
2698 for (Use &U : MergedInvoke->operands()) {
2699 // Only PHI together the indirect callees and data operands.
2700 if (MergedInvoke->isCallee(&U)) {
2701 if (!IsIndirectCall)
2702 continue;
2703 } else if (!MergedInvoke->isDataOperand(&U))
2704 continue;
2705
2706 // Don't create trivial PHI's with all-identical incoming values.
2707 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2708 return II->getOperand(U.getOperandNo()) != U.get();
2709 });
2710 if (!NeedPHI)
2711 continue;
2712
2713 // Form a PHI out of all the data ops under this index.
2715 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2716 for (InvokeInst *II : Invokes)
2717 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2718
2719 U.set(PN);
2720 }
2721
2722 // We've ensured that each PHI node has compatible (identical) incoming values
2723 // when coming from each of the `invoke`s in the current merge set,
2724 // so update the PHI nodes accordingly.
2725 for (BasicBlock *Succ : successors(MergedInvoke))
2726 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2727 /*ExistPred=*/Invokes.front()->getParent());
2728
2729 // And finally, replace the original `invoke`s with an unconditional branch
2730 // to the block with the merged `invoke`. Also, give that merged `invoke`
2731 // the merged debugloc of all the original `invoke`s.
2732 DILocation *MergedDebugLoc = nullptr;
2733 for (InvokeInst *II : Invokes) {
2734 // Compute the debug location common to all the original `invoke`s.
2735 if (!MergedDebugLoc)
2736 MergedDebugLoc = II->getDebugLoc();
2737 else
2738 MergedDebugLoc =
2739 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2740
2741 // And replace the old `invoke` with an unconditionally branch
2742 // to the block with the merged `invoke`.
2743 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2744 OrigSuccBB->removePredecessor(II->getParent());
2745 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2746 II->replaceAllUsesWith(MergedInvoke);
2747 II->eraseFromParent();
2748 ++NumInvokesMerged;
2749 }
2750 MergedInvoke->setDebugLoc(MergedDebugLoc);
2751 ++NumInvokeSetsFormed;
2752
2753 if (DTU)
2754 DTU->applyUpdates(Updates);
2755}
2756
2757/// If this block is a `landingpad` exception handling block, categorize all
2758/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2759/// being "mergeable" together, and then merge invokes in each set together.
2760///
2761/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2762/// [...] [...]
2763/// | |
2764/// [invoke0] [invoke1]
2765/// / \ / \
2766/// [cont0] [landingpad] [cont1]
2767/// to:
2768/// [...] [...]
2769/// \ /
2770/// [invoke]
2771/// / \
2772/// [cont] [landingpad]
2773///
2774/// But of course we can only do that if the invokes share the `landingpad`,
2775/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2776/// and the invoked functions are "compatible".
2779 return false;
2780
2781 bool Changed = false;
2782
2783 // FIXME: generalize to all exception handling blocks?
2784 if (!BB->isLandingPad())
2785 return Changed;
2786
2787 CompatibleSets Grouper;
2788
2789 // Record all the predecessors of this `landingpad`. As per verifier,
2790 // the only allowed predecessor is the unwind edge of an `invoke`.
2791 // We want to group "compatible" `invokes` into the same set to be merged.
2792 for (BasicBlock *PredBB : predecessors(BB))
2793 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2794
2795 // And now, merge `invoke`s that were grouped togeter.
2796 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2797 if (Invokes.size() < 2)
2798 continue;
2799 Changed = true;
2800 MergeCompatibleInvokesImpl(Invokes, DTU);
2801 }
2802
2803 return Changed;
2804}
2805
2806namespace {
2807/// Track ephemeral values, which should be ignored for cost-modelling
2808/// purposes. Requires walking instructions in reverse order.
2809class EphemeralValueTracker {
2811
2812 bool isEphemeral(const Instruction *I) {
2813 if (isa<AssumeInst>(I))
2814 return true;
2815 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2816 all_of(I->users(), [&](const User *U) {
2817 return EphValues.count(cast<Instruction>(U));
2818 });
2819 }
2820
2821public:
2822 bool track(const Instruction *I) {
2823 if (isEphemeral(I)) {
2824 EphValues.insert(I);
2825 return true;
2826 }
2827 return false;
2828 }
2829
2830 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2831};
2832} // namespace
2833
2834/// Determine if we can hoist sink a sole store instruction out of a
2835/// conditional block.
2836///
2837/// We are looking for code like the following:
2838/// BrBB:
2839/// store i32 %add, i32* %arrayidx2
2840/// ... // No other stores or function calls (we could be calling a memory
2841/// ... // function).
2842/// %cmp = icmp ult %x, %y
2843/// br i1 %cmp, label %EndBB, label %ThenBB
2844/// ThenBB:
2845/// store i32 %add5, i32* %arrayidx2
2846/// br label EndBB
2847/// EndBB:
2848/// ...
2849/// We are going to transform this into:
2850/// BrBB:
2851/// store i32 %add, i32* %arrayidx2
2852/// ... //
2853/// %cmp = icmp ult %x, %y
2854/// %add.add5 = select i1 %cmp, i32 %add, %add5
2855/// store i32 %add.add5, i32* %arrayidx2
2856/// ...
2857///
2858/// \return The pointer to the value of the previous store if the store can be
2859/// hoisted into the predecessor block. 0 otherwise.
2861 BasicBlock *StoreBB, BasicBlock *EndBB) {
2862 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2863 if (!StoreToHoist)
2864 return nullptr;
2865
2866 // Volatile or atomic.
2867 if (!StoreToHoist->isSimple())
2868 return nullptr;
2869
2870 Value *StorePtr = StoreToHoist->getPointerOperand();
2871 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2872
2873 // Look for a store to the same pointer in BrBB.
2874 unsigned MaxNumInstToLookAt = 9;
2875 // Skip pseudo probe intrinsic calls which are not really killing any memory
2876 // accesses.
2877 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2878 if (!MaxNumInstToLookAt)
2879 break;
2880 --MaxNumInstToLookAt;
2881
2882 // Could be calling an instruction that affects memory like free().
2883 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2884 return nullptr;
2885
2886 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2887 // Found the previous store to same location and type. Make sure it is
2888 // simple, to avoid introducing a spurious non-atomic write after an
2889 // atomic write.
2890 if (SI->getPointerOperand() == StorePtr &&
2891 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2892 SI->getAlign() >= StoreToHoist->getAlign())
2893 // Found the previous store, return its value operand.
2894 return SI->getValueOperand();
2895 return nullptr; // Unknown store.
2896 }
2897
2898 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2899 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2900 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2901 // Local objects (created by an `alloca` instruction) are always
2902 // writable, so once we are past a read from a location it is valid to
2903 // also write to that same location.
2904 // If the address of the local object never escapes the function, that
2905 // means it's never concurrently read or written, hence moving the store
2906 // from under the condition will not introduce a data race.
2907 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2908 if (AI && !PointerMayBeCaptured(AI, false, true))
2909 // Found a previous load, return it.
2910 return LI;
2911 }
2912 // The load didn't work out, but we may still find a store.
2913 }
2914 }
2915
2916 return nullptr;
2917}
2918
2919/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2920/// converted to selects.
2922 BasicBlock *EndBB,
2923 unsigned &SpeculatedInstructions,
2925 const TargetTransformInfo &TTI) {
2927 BB->getParent()->hasMinSize()
2930
2931 bool HaveRewritablePHIs = false;
2932 for (PHINode &PN : EndBB->phis()) {
2933 Value *OrigV = PN.getIncomingValueForBlock(BB);
2934 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2935
2936 // FIXME: Try to remove some of the duplication with
2937 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2938 if (ThenV == OrigV)
2939 continue;
2940
2941 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2943
2944 // Don't convert to selects if we could remove undefined behavior instead.
2945 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2947 return false;
2948
2949 HaveRewritablePHIs = true;
2950 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2951 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2952 if (!OrigCE && !ThenCE)
2953 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2954
2955 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2956 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2957 InstructionCost MaxCost =
2959 if (OrigCost + ThenCost > MaxCost)
2960 return false;
2961
2962 // Account for the cost of an unfolded ConstantExpr which could end up
2963 // getting expanded into Instructions.
2964 // FIXME: This doesn't account for how many operations are combined in the
2965 // constant expression.
2966 ++SpeculatedInstructions;
2967 if (SpeculatedInstructions > 1)
2968 return false;
2969 }
2970
2971 return HaveRewritablePHIs;
2972}
2973
2974/// Speculate a conditional basic block flattening the CFG.
2975///
2976/// Note that this is a very risky transform currently. Speculating
2977/// instructions like this is most often not desirable. Instead, there is an MI
2978/// pass which can do it with full awareness of the resource constraints.
2979/// However, some cases are "obvious" and we should do directly. An example of
2980/// this is speculating a single, reasonably cheap instruction.
2981///
2982/// There is only one distinct advantage to flattening the CFG at the IR level:
2983/// it makes very common but simplistic optimizations such as are common in
2984/// instcombine and the DAG combiner more powerful by removing CFG edges and
2985/// modeling their effects with easier to reason about SSA value graphs.
2986///
2987///
2988/// An illustration of this transform is turning this IR:
2989/// \code
2990/// BB:
2991/// %cmp = icmp ult %x, %y
2992/// br i1 %cmp, label %EndBB, label %ThenBB
2993/// ThenBB:
2994/// %sub = sub %x, %y
2995/// br label BB2
2996/// EndBB:
2997/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2998/// ...
2999/// \endcode
3000///
3001/// Into this IR:
3002/// \code
3003/// BB:
3004/// %cmp = icmp ult %x, %y
3005/// %sub = sub %x, %y
3006/// %cond = select i1 %cmp, 0, %sub
3007/// ...
3008/// \endcode
3009///
3010/// \returns true if the conditional block is removed.
3011bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3012 BasicBlock *ThenBB) {
3013 if (!Options.SpeculateBlocks)
3014 return false;
3015
3016 // Be conservative for now. FP select instruction can often be expensive.
3017 Value *BrCond = BI->getCondition();
3018 if (isa<FCmpInst>(BrCond))
3019 return false;
3020
3021 BasicBlock *BB = BI->getParent();
3022 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3023 InstructionCost Budget =
3025
3026 // If ThenBB is actually on the false edge of the conditional branch, remember
3027 // to swap the select operands later.
3028 bool Invert = false;
3029 if (ThenBB != BI->getSuccessor(0)) {
3030 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3031 Invert = true;
3032 }
3033 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3034
3035 // If the branch is non-unpredictable, and is predicted to *not* branch to
3036 // the `then` block, then avoid speculating it.
3037 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3038 uint64_t TWeight, FWeight;
3039 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3040 (TWeight + FWeight) != 0) {
3041 uint64_t EndWeight = Invert ? TWeight : FWeight;
3042 BranchProbability BIEndProb =
3043 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3045 if (BIEndProb >= Likely)
3046 return false;
3047 }
3048 }
3049
3050 // Keep a count of how many times instructions are used within ThenBB when
3051 // they are candidates for sinking into ThenBB. Specifically:
3052 // - They are defined in BB, and
3053 // - They have no side effects, and
3054 // - All of their uses are in ThenBB.
3055 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3056
3057 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3058
3059 unsigned SpeculatedInstructions = 0;
3060 Value *SpeculatedStoreValue = nullptr;
3061 StoreInst *SpeculatedStore = nullptr;
3062 EphemeralValueTracker EphTracker;
3063 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3064 // Skip debug info.
3065 if (isa<DbgInfoIntrinsic>(I)) {
3066 SpeculatedDbgIntrinsics.push_back(&I);
3067 continue;
3068 }
3069
3070 // Skip pseudo probes. The consequence is we lose track of the branch
3071 // probability for ThenBB, which is fine since the optimization here takes
3072 // place regardless of the branch probability.
3073 if (isa<PseudoProbeInst>(I)) {
3074 // The probe should be deleted so that it will not be over-counted when
3075 // the samples collected on the non-conditional path are counted towards
3076 // the conditional path. We leave it for the counts inference algorithm to
3077 // figure out a proper count for an unknown probe.
3078 SpeculatedDbgIntrinsics.push_back(&I);
3079 continue;
3080 }
3081
3082 // Ignore ephemeral values, they will be dropped by the transform.
3083 if (EphTracker.track(&I))
3084 continue;
3085
3086 // Only speculatively execute a single instruction (not counting the
3087 // terminator) for now.
3088 ++SpeculatedInstructions;
3089 if (SpeculatedInstructions > 1)
3090 return false;
3091
3092 // Don't hoist the instruction if it's unsafe or expensive.
3094 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3095 &I, BB, ThenBB, EndBB))))
3096 return false;
3097 if (!SpeculatedStoreValue &&
3100 return false;
3101
3102 // Store the store speculation candidate.
3103 if (SpeculatedStoreValue)
3104 SpeculatedStore = cast<StoreInst>(&I);
3105
3106 // Do not hoist the instruction if any of its operands are defined but not
3107 // used in BB. The transformation will prevent the operand from
3108 // being sunk into the use block.
3109 for (Use &Op : I.operands()) {
3110 Instruction *OpI = dyn_cast<Instruction>(Op);
3111 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3112 continue; // Not a candidate for sinking.
3113
3114 ++SinkCandidateUseCounts[OpI];
3115 }
3116 }
3117
3118 // Consider any sink candidates which are only used in ThenBB as costs for
3119 // speculation. Note, while we iterate over a DenseMap here, we are summing
3120 // and so iteration order isn't significant.
3121 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3122 if (Inst->hasNUses(Count)) {
3123 ++SpeculatedInstructions;
3124 if (SpeculatedInstructions > 1)
3125 return false;
3126 }
3127
3128 // Check that we can insert the selects and that it's not too expensive to do
3129 // so.
3130 bool Convert = SpeculatedStore != nullptr;
3132 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3133 SpeculatedInstructions,
3134 Cost, TTI);
3135 if (!Convert || Cost > Budget)
3136 return false;
3137
3138 // If we get here, we can hoist the instruction and if-convert.
3139 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3140
3141 // Insert a select of the value of the speculated store.
3142 if (SpeculatedStoreValue) {
3143 IRBuilder<NoFolder> Builder(BI);
3144 Value *OrigV = SpeculatedStore->getValueOperand();
3145 Value *TrueV = SpeculatedStore->getValueOperand();
3146 Value *FalseV = SpeculatedStoreValue;
3147 if (Invert)
3148 std::swap(TrueV, FalseV);
3149 Value *S = Builder.CreateSelect(
3150 BrCond, TrueV, FalseV, "spec.store.select", BI);
3151 SpeculatedStore->setOperand(0, S);
3152 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3153 SpeculatedStore->getDebugLoc());
3154 // The value stored is still conditional, but the store itself is now
3155 // unconditonally executed, so we must be sure that any linked dbg.assign
3156 // intrinsics are tracking the new stored value (the result of the
3157 // select). If we don't, and the store were to be removed by another pass
3158 // (e.g. DSE), then we'd eventually end up emitting a location describing
3159 // the conditional value, unconditionally.
3160 //
3161 // === Before this transformation ===
3162 // pred:
3163 // store %one, %x.dest, !DIAssignID !1
3164 // dbg.assign %one, "x", ..., !1, ...
3165 // br %cond if.then
3166 //
3167 // if.then:
3168 // store %two, %x.dest, !DIAssignID !2
3169 // dbg.assign %two, "x", ..., !2, ...
3170 //
3171 // === After this transformation ===
3172 // pred:
3173 // store %one, %x.dest, !DIAssignID !1
3174 // dbg.assign %one, "x", ..., !1
3175 /// ...
3176 // %merge = select %cond, %two, %one
3177 // store %merge, %x.dest, !DIAssignID !2
3178 // dbg.assign %merge, "x", ..., !2
3179 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3180 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3181 DbgAssign->replaceVariableLocationOp(OrigV, S);
3182 };
3183 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3184 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3185 }
3186
3187 // Metadata can be dependent on the condition we are hoisting above.
3188 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3189 // to avoid making it appear as if the condition is a constant, which would
3190 // be misleading while debugging.
3191 // Similarly strip attributes that maybe dependent on condition we are
3192 // hoisting above.
3193 for (auto &I : make_early_inc_range(*ThenBB)) {
3194 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3195 // Don't update the DILocation of dbg.assign intrinsics.
3196 if (!isa<DbgAssignIntrinsic>(&I))
3197 I.setDebugLoc(DebugLoc());
3198 }
3199 I.dropUBImplyingAttrsAndMetadata();
3200
3201 // Drop ephemeral values.
3202 if (EphTracker.contains(&I)) {
3203 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3204 I.eraseFromParent();
3205 }
3206 }
3207
3208 // Hoist the instructions.
3209 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3210 // to these instructions, in the same way that dbg.value intrinsics are
3211 // dropped at the end of this block.
3212 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3213 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3214 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3215 // equivalent).
3216 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3217 !DVR || !DVR->isDbgAssign())
3218 It.dropOneDbgRecord(&DR);
3219 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3220 std::prev(ThenBB->end()));
3221
3222 // Insert selects and rewrite the PHI operands.
3223 IRBuilder<NoFolder> Builder(BI);
3224 for (PHINode &PN : EndBB->phis()) {
3225 unsigned OrigI = PN.getBasicBlockIndex(BB);
3226 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3227 Value *OrigV = PN.getIncomingValue(OrigI);
3228 Value *ThenV = PN.getIncomingValue(ThenI);
3229
3230 // Skip PHIs which are trivial.
3231 if (OrigV == ThenV)
3232 continue;
3233
3234 // Create a select whose true value is the speculatively executed value and
3235 // false value is the pre-existing value. Swap them if the branch
3236 // destinations were inverted.
3237 Value *TrueV = ThenV, *FalseV = OrigV;
3238 if (Invert)
3239 std::swap(TrueV, FalseV);
3240 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3241 PN.setIncomingValue(OrigI, V);
3242 PN.setIncomingValue(ThenI, V);
3243 }
3244
3245 // Remove speculated dbg intrinsics.
3246 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3247 // dbg value for the different flows and inserting it after the select.
3248 for (Instruction *I : SpeculatedDbgIntrinsics) {
3249 // We still want to know that an assignment took place so don't remove
3250 // dbg.assign intrinsics.
3251 if (!isa<DbgAssignIntrinsic>(I))
3252 I->eraseFromParent();
3253 }
3254
3255 ++NumSpeculations;
3256 return true;
3257}
3258
3259/// Return true if we can thread a branch across this block.
3261 int Size = 0;
3262 EphemeralValueTracker EphTracker;
3263
3264 // Walk the loop in reverse so that we can identify ephemeral values properly
3265 // (values only feeding assumes).
3266 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3267 // Can't fold blocks that contain noduplicate or convergent calls.
3268 if (CallInst *CI = dyn_cast<CallInst>(&I))
3269 if (CI->cannotDuplicate() || CI->isConvergent())
3270 return false;
3271
3272 // Ignore ephemeral values which are deleted during codegen.
3273 // We will delete Phis while threading, so Phis should not be accounted in
3274 // block's size.
3275 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3276 if (Size++ > MaxSmallBlockSize)
3277 return false; // Don't clone large BB's.
3278 }
3279
3280 // We can only support instructions that do not define values that are
3281 // live outside of the current basic block.
3282 for (User *U : I.users()) {
3283 Instruction *UI = cast<Instruction>(U);
3284 if (UI->getParent() != BB || isa<PHINode>(UI))
3285 return false;
3286 }
3287
3288 // Looks ok, continue checking.
3289 }
3290
3291 return true;
3292}
3293
3295 BasicBlock *To) {
3296 // Don't look past the block defining the value, we might get the value from
3297 // a previous loop iteration.
3298 auto *I = dyn_cast<Instruction>(V);
3299 if (I && I->getParent() == To)
3300 return nullptr;
3301
3302 // We know the value if the From block branches on it.
3303 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3304 if (BI && BI->isConditional() && BI->getCondition() == V &&
3305 BI->getSuccessor(0) != BI->getSuccessor(1))
3306 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3308
3309 return nullptr;
3310}
3311
3312/// If we have a conditional branch on something for which we know the constant
3313/// value in predecessors (e.g. a phi node in the current block), thread edges
3314/// from the predecessor to their ultimate destination.
3315static std::optional<bool>
3317 const DataLayout &DL,
3318 AssumptionCache *AC) {
3320 BasicBlock *BB = BI->getParent();
3321 Value *Cond = BI->getCondition();
3322 PHINode *PN = dyn_cast<PHINode>(Cond);
3323 if (PN && PN->getParent() == BB) {
3324 // Degenerate case of a single entry PHI.
3325 if (PN->getNumIncomingValues() == 1) {
3327 return true;
3328 }
3329
3330 for (Use &U : PN->incoming_values())
3331 if (auto *CB = dyn_cast<ConstantInt>(U))
3332 KnownValues[CB].insert(PN->getIncomingBlock(U));
3333 } else {
3334 for (BasicBlock *Pred : predecessors(BB)) {
3335 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3336 KnownValues[CB].insert(Pred);
3337 }
3338 }
3339
3340 if (KnownValues.empty())
3341 return false;
3342
3343 // Now we know that this block has multiple preds and two succs.
3344 // Check that the block is small enough and values defined in the block are
3345 // not used outside of it.
3347 return false;
3348
3349 for (const auto &Pair : KnownValues) {
3350 // Okay, we now know that all edges from PredBB should be revectored to
3351 // branch to RealDest.
3352 ConstantInt *CB = Pair.first;
3353 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3354 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3355
3356 if (RealDest == BB)
3357 continue; // Skip self loops.
3358
3359 // Skip if the predecessor's terminator is an indirect branch.
3360 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3361 return isa<IndirectBrInst>(PredBB->getTerminator());
3362 }))
3363 continue;
3364
3365 LLVM_DEBUG({
3366 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3367 << " has value " << *Pair.first << " in predecessors:\n";
3368 for (const BasicBlock *PredBB : Pair.second)
3369 dbgs() << " " << PredBB->getName() << "\n";
3370 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3371 });
3372
3373 // Split the predecessors we are threading into a new edge block. We'll
3374 // clone the instructions into this block, and then redirect it to RealDest.
3375 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3376
3377 // TODO: These just exist to reduce test diff, we can drop them if we like.
3378 EdgeBB->setName(RealDest->getName() + ".critedge");
3379 EdgeBB->moveBefore(RealDest);
3380
3381 // Update PHI nodes.
3382 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3383
3384 // BB may have instructions that are being threaded over. Clone these
3385 // instructions into EdgeBB. We know that there will be no uses of the
3386 // cloned instructions outside of EdgeBB.
3387 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3388 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3389 TranslateMap[Cond] = CB;
3390
3391 // RemoveDIs: track instructions that we optimise away while folding, so
3392 // that we can copy DbgVariableRecords from them later.
3393 BasicBlock::iterator SrcDbgCursor = BB->begin();
3394 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3395 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3396 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3397 continue;
3398 }
3399 // Clone the instruction.
3400 Instruction *N = BBI->clone();
3401 // Insert the new instruction into its new home.
3402 N->insertInto(EdgeBB, InsertPt);
3403
3404 if (BBI->hasName())
3405 N->setName(BBI->getName() + ".c");
3406
3407 // Update operands due to translation.
3408 for (Use &Op : N->operands()) {
3409 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3410 if (PI != TranslateMap.end())
3411 Op = PI->second;
3412 }
3413
3414 // Check for trivial simplification.
3415 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3416 if (!BBI->use_empty())
3417 TranslateMap[&*BBI] = V;
3418 if (!N->mayHaveSideEffects()) {
3419 N->eraseFromParent(); // Instruction folded away, don't need actual
3420 // inst
3421 N = nullptr;
3422 }
3423 } else {
3424 if (!BBI->use_empty())
3425 TranslateMap[&*BBI] = N;
3426 }
3427 if (N) {
3428 // Copy all debug-info attached to instructions from the last we
3429 // successfully clone, up to this instruction (they might have been
3430 // folded away).
3431 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3432 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3433 SrcDbgCursor = std::next(BBI);
3434 // Clone debug-info on this instruction too.
3435 N->cloneDebugInfoFrom(&*BBI);
3436
3437 // Register the new instruction with the assumption cache if necessary.
3438 if (auto *Assume = dyn_cast<AssumeInst>(N))
3439 if (AC)
3440 AC->registerAssumption(Assume);
3441 }
3442 }
3443
3444 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3445 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3446 InsertPt->cloneDebugInfoFrom(BI);
3447
3448 BB->removePredecessor(EdgeBB);
3449 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3450 EdgeBI->setSuccessor(0, RealDest);
3451 EdgeBI->setDebugLoc(BI->getDebugLoc());
3452
3453 if (DTU) {
3455 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3456 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3457 DTU->applyUpdates(Updates);
3458 }
3459
3460 // For simplicity, we created a separate basic block for the edge. Merge
3461 // it back into the predecessor if possible. This not only avoids
3462 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3463 // bypass the check for trivial cycles above.
3464 MergeBlockIntoPredecessor(EdgeBB, DTU);
3465
3466 // Signal repeat, simplifying any other constants.
3467 return std::nullopt;
3468 }
3469
3470 return false;
3471}
3472
3474 DomTreeUpdater *DTU,
3475 const DataLayout &DL,
3476 AssumptionCache *AC) {
3477 std::optional<bool> Result;
3478 bool EverChanged = false;
3479 do {
3480 // Note that None means "we changed things, but recurse further."
3481 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3482 EverChanged |= Result == std::nullopt || *Result;
3483 } while (Result == std::nullopt);
3484 return EverChanged;
3485}
3486
3487/// Given a BB that starts with the specified two-entry PHI node,
3488/// see if we can eliminate it.
3490 DomTreeUpdater *DTU, const DataLayout &DL) {
3491 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3492 // statement", which has a very simple dominance structure. Basically, we
3493 // are trying to find the condition that is being branched on, which
3494 // subsequently causes this merge to happen. We really want control
3495 // dependence information for this check, but simplifycfg can't keep it up
3496 // to date, and this catches most of the cases we care about anyway.
3497 BasicBlock *BB = PN->getParent();
3498
3499 BasicBlock *IfTrue, *IfFalse;
3500 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3501 if (!DomBI)
3502 return false;
3503 Value *IfCond = DomBI->getCondition();
3504 // Don't bother if the branch will be constant folded trivially.
3505 if (isa<ConstantInt>(IfCond))
3506 return false;
3507
3508 BasicBlock *DomBlock = DomBI->getParent();
3511 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3512 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3513 });
3514 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3515 "Will have either one or two blocks to speculate.");
3516
3517 // If the branch is non-unpredictable, see if we either predictably jump to
3518 // the merge bb (if we have only a single 'then' block), or if we predictably
3519 // jump to one specific 'then' block (if we have two of them).
3520 // It isn't beneficial to speculatively execute the code
3521 // from the block that we know is predictably not entered.
3522 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3523 uint64_t TWeight, FWeight;
3524 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3525 (TWeight + FWeight) != 0) {
3526 BranchProbability BITrueProb =
3527 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3529 BranchProbability BIFalseProb = BITrueProb.getCompl();
3530 if (IfBlocks.size() == 1) {
3531 BranchProbability BIBBProb =
3532 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3533 if (BIBBProb >= Likely)
3534 return false;
3535 } else {
3536 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3537 return false;
3538 }
3539 }
3540 }
3541
3542 // Don't try to fold an unreachable block. For example, the phi node itself
3543 // can't be the candidate if-condition for a select that we want to form.
3544 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3545 if (IfCondPhiInst->getParent() == BB)
3546 return false;
3547
3548 // Okay, we found that we can merge this two-entry phi node into a select.
3549 // Doing so would require us to fold *all* two entry phi nodes in this block.
3550 // At some point this becomes non-profitable (particularly if the target
3551 // doesn't support cmov's). Only do this transformation if there are two or
3552 // fewer PHI nodes in this block.
3553 unsigned NumPhis = 0;
3554 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3555 if (NumPhis > 2)
3556 return false;
3557
3558 // Loop over the PHI's seeing if we can promote them all to select
3559 // instructions. While we are at it, keep track of the instructions
3560 // that need to be moved to the dominating block.
3561 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3563 InstructionCost Budget =
3565
3566 bool Changed = false;
3567 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3568 PHINode *PN = cast<PHINode>(II++);
3569 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3570 PN->replaceAllUsesWith(V);
3571 PN->eraseFromParent();
3572 Changed = true;
3573 continue;
3574 }
3575
3576 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3577 Cost, Budget, TTI) ||
3578 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3579 Cost, Budget, TTI))
3580 return Changed;
3581 }
3582
3583 // If we folded the first phi, PN dangles at this point. Refresh it. If
3584 // we ran out of PHIs then we simplified them all.
3585 PN = dyn_cast<PHINode>(BB->begin());
3586 if (!PN)
3587 return true;
3588
3589 // Return true if at least one of these is a 'not', and another is either
3590 // a 'not' too, or a constant.
3591 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3592 if (!match(V0, m_Not(m_Value())))
3593 std::swap(V0, V1);
3594 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3595 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3596 };
3597
3598 // Don't fold i1 branches on PHIs which contain binary operators or
3599 // (possibly inverted) select form of or/ands, unless one of
3600 // the incoming values is an 'not' and another one is freely invertible.
3601 // These can often be turned into switches and other things.
3602 auto IsBinOpOrAnd = [](Value *V) {
3603 return match(
3604 V, m_CombineOr(
3605 m_BinOp(),
3608 };
3609 if (PN->getType()->isIntegerTy(1) &&
3610 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3611 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3612 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3613 PN->getIncomingValue(1)))
3614 return Changed;
3615
3616 // If all PHI nodes are promotable, check to make sure that all instructions
3617 // in the predecessor blocks can be promoted as well. If not, we won't be able
3618 // to get rid of the control flow, so it's not worth promoting to select
3619 // instructions.
3620 for (BasicBlock *IfBlock : IfBlocks)
3621 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3622 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3623 // This is not an aggressive instruction that we can promote.
3624 // Because of this, we won't be able to get rid of the control flow, so
3625 // the xform is not worth it.
3626 return Changed;
3627 }
3628
3629 // If either of the blocks has it's address taken, we can't do this fold.
3630 if (any_of(IfBlocks,
3631 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3632 return Changed;
3633
3634 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3635 << " T: " << IfTrue->getName()
3636 << " F: " << IfFalse->getName() << "\n");
3637
3638 // If we can still promote the PHI nodes after this gauntlet of tests,
3639 // do all of the PHI's now.
3640
3641 // Move all 'aggressive' instructions, which are defined in the
3642 // conditional parts of the if's up to the dominating block.
3643 for (BasicBlock *IfBlock : IfBlocks)
3644 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3645
3646 IRBuilder<NoFolder> Builder(DomBI);
3647 // Propagate fast-math-flags from phi nodes to replacement selects.
3648 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3649 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3650 if (isa<FPMathOperator>(PN))
3651 Builder.setFastMathFlags(PN->getFastMathFlags());
3652
3653 // Change the PHI node into a select instruction.
3654 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3655 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3656
3657 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3658 PN->replaceAllUsesWith(Sel);
3659 Sel->takeName(PN);
3660 PN->eraseFromParent();
3661 }
3662
3663 // At this point, all IfBlocks are empty, so our if statement
3664 // has been flattened. Change DomBlock to jump directly to our new block to
3665 // avoid other simplifycfg's kicking in on the diamond.
3666 Builder.CreateBr(BB);
3667
3669 if (DTU) {
3670 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3671 for (auto *Successor : successors(DomBlock))
3672 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3673 }
3674
3675 DomBI->eraseFromParent();
3676 if (DTU)
3677 DTU->applyUpdates(Updates);
3678
3679 return true;
3680}
3681
3683 Instruction::BinaryOps Opc, Value *LHS,
3684 Value *RHS, const Twine &Name = "") {
3685 // Try to relax logical op to binary op.
3686 if (impliesPoison(RHS, LHS))
3687 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3688 if (Opc == Instruction::And)
3689 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3690 if (Opc == Instruction::Or)
3691 return Builder.CreateLogicalOr(LHS, RHS, Name);
3692 llvm_unreachable("Invalid logical opcode");
3693}
3694
3695/// Return true if either PBI or BI has branch weight available, and store
3696/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3697/// not have branch weight, use 1:1 as its weight.
3699 uint64_t &PredTrueWeight,
3700 uint64_t &PredFalseWeight,
3701 uint64_t &SuccTrueWeight,
3702 uint64_t &SuccFalseWeight) {
3703 bool PredHasWeights =
3704 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3705 bool SuccHasWeights =
3706 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3707 if (PredHasWeights || SuccHasWeights) {
3708 if (!PredHasWeights)
3709 PredTrueWeight = PredFalseWeight = 1;
3710 if (!SuccHasWeights)
3711 SuccTrueWeight = SuccFalseWeight = 1;
3712 return true;
3713 } else {
3714 return false;
3715 }
3716}
3717
3718/// Determine if the two branches share a common destination and deduce a glue
3719/// that joins the branches' conditions to arrive at the common destination if
3720/// that would be profitable.
3721static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3723 const TargetTransformInfo *TTI) {
3724 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3725 "Both blocks must end with a conditional branches.");
3727 "PredBB must be a predecessor of BB.");
3728
3729 // We have the potential to fold the conditions together, but if the
3730 // predecessor branch is predictable, we may not want to merge them.
3731 uint64_t PTWeight, PFWeight;
3732 BranchProbability PBITrueProb, Likely;
3733 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3734 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3735 (PTWeight + PFWeight) != 0) {
3736 PBITrueProb =
3737 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3739 }
3740
3741 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3742 // Speculate the 2nd condition unless the 1st is probably true.
3743 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3744 return {{BI->getSuccessor(0), Instruction::Or, false}};
3745 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3746 // Speculate the 2nd condition unless the 1st is probably false.
3747 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3748 return {{BI->getSuccessor(1), Instruction::And, false}};
3749 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3750 // Speculate the 2nd condition unless the 1st is probably true.
3751 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3752 return {{BI->getSuccessor(1), Instruction::And, true}};
3753 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3754 // Speculate the 2nd condition unless the 1st is probably false.
3755 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3756 return {{BI->getSuccessor(0), Instruction::Or, true}};
3757 }
3758 return std::nullopt;
3759}
3760
3762 DomTreeUpdater *DTU,
3763 MemorySSAUpdater *MSSAU,
3764 const TargetTransformInfo *TTI) {
3765 BasicBlock *BB = BI->getParent();
3766 BasicBlock *PredBlock = PBI->getParent();
3767
3768 // Determine if the two branches share a common destination.
3769 BasicBlock *CommonSucc;
3771 bool InvertPredCond;
3772 std::tie(CommonSucc, Opc, InvertPredCond) =
3774
3775 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3776
3777 IRBuilder<> Builder(PBI);
3778 // The builder is used to create instructions to eliminate the branch in BB.
3779 // If BB's terminator has !annotation metadata, add it to the new
3780 // instructions.
3782 {LLVMContext::MD_annotation});
3783
3784 // If we need to invert the condition in the pred block to match, do so now.
3785 if (InvertPredCond) {
3786 InvertBranch(PBI, Builder);
3787 }
3788
3789 BasicBlock *UniqueSucc =
3790 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3791
3792 // Before cloning instructions, notify the successor basic block that it
3793 // is about to have a new predecessor. This will update PHI nodes,
3794 // which will allow us to update live-out uses of bonus instructions.
3795 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3796
3797 // Try to update branch weights.
3798 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3799 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3800 SuccTrueWeight, SuccFalseWeight)) {
3801 SmallVector<uint64_t, 8> NewWeights;
3802
3803 if (PBI->getSuccessor(0) == BB) {
3804 // PBI: br i1 %x, BB, FalseDest
3805 // BI: br i1 %y, UniqueSucc, FalseDest
3806 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3807 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3808 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3809 // TrueWeight for PBI * FalseWeight for BI.
3810 // We assume that total weights of a BranchInst can fit into 32 bits.
3811 // Therefore, we will not have overflow using 64-bit arithmetic.
3812 NewWeights.push_back(PredFalseWeight *
3813 (SuccFalseWeight + SuccTrueWeight) +
3814 PredTrueWeight * SuccFalseWeight);
3815 } else {
3816 // PBI: br i1 %x, TrueDest, BB
3817 // BI: br i1 %y, TrueDest, UniqueSucc
3818 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3819 // FalseWeight for PBI * TrueWeight for BI.
3820 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3821 PredFalseWeight * SuccTrueWeight);
3822 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3823 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3824 }
3825
3826 // Halve the weights if any of them cannot fit in an uint32_t
3827 FitWeights(NewWeights);
3828
3829 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3830 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3831
3832 // TODO: If BB is reachable from all paths through PredBlock, then we
3833 // could replace PBI's branch probabilities with BI's.
3834 } else
3835 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3836
3837 // Now, update the CFG.
3838 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3839
3840 if (DTU)
3841 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3842 {DominatorTree::Delete, PredBlock, BB}});
3843
3844 // If BI was a loop latch, it may have had associated loop metadata.
3845 // We need to copy it to the new latch, that is, PBI.
3846 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3847 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3848
3849 ValueToValueMapTy VMap; // maps original values to cloned values
3851
3852 Module *M = BB->getModule();
3853
3854 if (PredBlock->IsNewDbgInfoFormat) {
3855 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3856 for (DbgVariableRecord &DVR :
3858 RemapDbgVariableRecord(M, &DVR, VMap,
3860 }
3861 }
3862
3863 // Now that the Cond was cloned into the predecessor basic block,
3864 // or/and the two conditions together.
3865 Value *BICond = VMap[BI->getCondition()];
3866 PBI->setCondition(
3867 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3868
3869 ++NumFoldBranchToCommonDest;
3870 return true;
3871}
3872
3873/// Return if an instruction's type or any of its operands' types are a vector
3874/// type.
3875static bool isVectorOp(Instruction &I) {
3876 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3877 return U->getType()->isVectorTy();
3878 });
3879}
3880
3881/// If this basic block is simple enough, and if a predecessor branches to us
3882/// and one of our successors, fold the block into the predecessor and use
3883/// logical operations to pick the right destination.
3885 MemorySSAUpdater *MSSAU,
3886 const TargetTransformInfo *TTI,
3887 unsigned BonusInstThreshold) {
3888 // If this block ends with an unconditional branch,
3889 // let SpeculativelyExecuteBB() deal with it.
3890 if (!BI->isConditional())
3891 return false;
3892
3893 BasicBlock *BB = BI->getParent();
3897
3898 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3899
3900 if (!Cond ||
3901 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3902 !isa<SelectInst>(Cond)) ||
3903 Cond->getParent() != BB || !Cond->hasOneUse())
3904 return false;
3905
3906 // Finally, don't infinitely unroll conditional loops.
3907 if (is_contained(successors(BB), BB))
3908 return false;
3909
3910 // With which predecessors will we want to deal with?
3912 for (BasicBlock *PredBlock : predecessors(BB)) {
3913 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3914
3915 // Check that we have two conditional branches. If there is a PHI node in
3916 // the common successor, verify that the same value flows in from both
3917 // blocks.
3918 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3919 continue;
3920
3921 // Determine if the two branches share a common destination.
3922 BasicBlock *CommonSucc;
3924 bool InvertPredCond;
3925 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3926 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3927 else
3928 continue;
3929
3930 // Check the cost of inserting the necessary logic before performing the
3931 // transformation.
3932 if (TTI) {
3933 Type *Ty = BI->getCondition()->getType();
3935 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3936 !isa<CmpInst>(PBI->getCondition())))
3937 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3938
3940 continue;
3941 }
3942
3943 // Ok, we do want to deal with this predecessor. Record it.
3944 Preds.emplace_back(PredBlock);
3945 }
3946
3947 // If there aren't any predecessors into which we can fold,
3948 // don't bother checking the cost.
3949 if (Preds.empty())
3950 return false;
3951
3952 // Only allow this transformation if computing the condition doesn't involve
3953 // too many instructions and these involved instructions can be executed
3954 // unconditionally. We denote all involved instructions except the condition
3955 // as "bonus instructions", and only allow this transformation when the
3956 // number of the bonus instructions we'll need to create when cloning into
3957 // each predecessor does not exceed a certain threshold.
3958 unsigned NumBonusInsts = 0;
3959 bool SawVectorOp = false;
3960 const unsigned PredCount = Preds.size();
3961 for (Instruction &I : *BB) {
3962 // Don't check the branch condition comparison itself.
3963 if (&I == Cond)
3964 continue;
3965 // Ignore dbg intrinsics, and the terminator.
3966 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3967 continue;
3968 // I must be safe to execute unconditionally.
3970 return false;
3971 SawVectorOp |= isVectorOp(I);
3972
3973 // Account for the cost of duplicating this instruction into each
3974 // predecessor. Ignore free instructions.
3975 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3977 NumBonusInsts += PredCount;
3978
3979 // Early exits once we reach the limit.
3980 if (NumBonusInsts >
3981 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3982 return false;
3983 }
3984
3985 auto IsBCSSAUse = [BB, &I](Use &U) {
3986 auto *UI = cast<Instruction>(U.getUser());
3987 if (auto *PN = dyn_cast<PHINode>(UI))
3988 return PN->getIncomingBlock(U) == BB;
3989 return UI->getParent() == BB && I.comesBefore(UI);
3990 };
3991
3992 // Does this instruction require rewriting of uses?
3993 if (!all_of(I.uses(), IsBCSSAUse))
3994 return false;
3995 }
3996 if (NumBonusInsts >
3997 BonusInstThreshold *
3998 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3999 return false;
4000
4001 // Ok, we have the budget. Perform the transformation.
4002 for (BasicBlock *PredBlock : Preds) {
4003 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4004 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4005 }
4006 return false;
4007}
4008
4009// If there is only one store in BB1 and BB2, return it, otherwise return
4010// nullptr.
4012 StoreInst *S = nullptr;
4013 for (auto *BB : {BB1, BB2}) {
4014 if (!BB)
4015 continue;
4016 for (auto &I : *BB)
4017 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4018 if (S)
4019 // Multiple stores seen.
4020 return nullptr;
4021 else
4022 S = SI;
4023 }
4024 }
4025 return S;
4026}
4027
4029 Value *AlternativeV = nullptr) {
4030 // PHI is going to be a PHI node that allows the value V that is defined in
4031 // BB to be referenced in BB's only successor.
4032 //
4033 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4034 // doesn't matter to us what the other operand is (it'll never get used). We
4035 // could just create a new PHI with an undef incoming value, but that could
4036 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4037 // other PHI. So here we directly look for some PHI in BB's successor with V
4038 // as an incoming operand. If we find one, we use it, else we create a new
4039 // one.
4040 //
4041 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4042 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4043 // where OtherBB is the single other predecessor of BB's only successor.
4044 PHINode *PHI = nullptr;
4045 BasicBlock *Succ = BB->getSingleSuccessor();
4046
4047 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4048 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4049 PHI = cast<PHINode>(I);
4050 if (!AlternativeV)
4051 break;
4052
4053 assert(Succ->hasNPredecessors(2));
4054 auto PredI = pred_begin(Succ);
4055 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4056 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4057 break;
4058 PHI = nullptr;
4059 }
4060 if (PHI)
4061 return PHI;
4062
4063 // If V is not an instruction defined in BB, just return it.
4064 if (!AlternativeV &&
4065 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4066 return V;
4067
4068 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4069 PHI->insertBefore(Succ->begin());
4070 PHI->addIncoming(V, BB);
4071 for (BasicBlock *PredBB : predecessors(Succ))
4072 if (PredBB != BB)
4073 PHI->addIncoming(
4074 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4075 return PHI;
4076}
4077
4079 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4080 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4081 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4082 // For every pointer, there must be exactly two stores, one coming from
4083 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4084 // store (to any address) in PTB,PFB or QTB,QFB.
4085 // FIXME: We could relax this restriction with a bit more work and performance
4086 // testing.
4087 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4088 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4089 if (!PStore || !QStore)
4090 return false;
4091
4092 // Now check the stores are compatible.
4093 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4094 PStore->getValueOperand()->getType() !=
4095 QStore->getValueOperand()->getType())
4096 return false;
4097
4098 // Check that sinking the store won't cause program behavior changes. Sinking
4099 // the store out of the Q blocks won't change any behavior as we're sinking
4100 // from a block to its unconditional successor. But we're moving a store from
4101 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4102 // So we need to check that there are no aliasing loads or stores in
4103 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4104 // operations between PStore and the end of its parent block.
4105 //
4106 // The ideal way to do this is to query AliasAnalysis, but we don't
4107 // preserve AA currently so that is dangerous. Be super safe and just
4108 // check there are no other memory operations at all.
4109 for (auto &I : *QFB->getSinglePredecessor())
4110 if (I.mayReadOrWriteMemory())
4111 return false;
4112 for (auto &I : *QFB)
4113 if (&I != QStore && I.mayReadOrWriteMemory())
4114 return false;
4115 if (QTB)
4116 for (auto &I : *QTB)
4117 if (&I != QStore && I.mayReadOrWriteMemory())
4118 return false;
4119 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4120 I != E; ++I)
4121 if (&*I != PStore && I->mayReadOrWriteMemory())
4122 return false;
4123
4124 // If we're not in aggressive mode, we only optimize if we have some
4125 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4126 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4127 if (!BB)
4128 return true;
4129 // Heuristic: if the block can be if-converted/phi-folded and the
4130 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4131 // thread this store.
4133 InstructionCost Budget =
4135 for (auto &I : BB->instructionsWithoutDebug(false)) {
4136 // Consider terminator instruction to be free.
4137 if (I.isTerminator())
4138 continue;
4139 // If this is one the stores that we want to speculate out of this BB,
4140 // then don't count it's cost, consider it to be free.
4141 if (auto *S = dyn_cast<StoreInst>(&I))
4142 if (llvm::find(FreeStores, S))
4143 continue;
4144 // Else, we have a white-list of instructions that we are ak speculating.
4145 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4146 return false; // Not in white-list - not worthwhile folding.
4147 // And finally, if this is a non-free instruction that we are okay
4148 // speculating, ensure that we consider the speculation budget.
4149 Cost +=
4151 if (Cost > Budget)
4152 return false; // Eagerly refuse to fold as soon as we're out of budget.
4153 }
4154 assert(Cost <= Budget &&
4155 "When we run out of budget we will eagerly return from within the "
4156 "per-instruction loop.");
4157 return true;
4158 };
4159
4160 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4162 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4163 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4164 return false;
4165
4166 // If PostBB has more than two predecessors, we need to split it so we can
4167 // sink the store.
4168 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4169 // We know that QFB's only successor is PostBB. And QFB has a single
4170 // predecessor. If QTB exists, then its only successor is also PostBB.
4171 // If QTB does not exist, then QFB's only predecessor has a conditional
4172 // branch to QFB and PostBB.
4173 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4174 BasicBlock *NewBB =
4175 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4176 if (!NewBB)
4177 return false;
4178 PostBB = NewBB;
4179 }
4180
4181 // OK, we're going to sink the stores to PostBB. The store has to be
4182 // conditional though, so first create the predicate.
4183 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4184 ->getCondition();
4185 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4186 ->getCondition();
4187
4189 PStore->getParent());
4191 QStore->getParent(), PPHI);
4192
4193 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4194 IRBuilder<> QB(PostBB, PostBBFirst);
4195 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4196
4197 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4198 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4199
4200 if (InvertPCond)
4201 PPred = QB.CreateNot(PPred);
4202 if (InvertQCond)
4203 QPred = QB.CreateNot(QPred);
4204 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4205
4206 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4207 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4208 /*Unreachable=*/false,
4209 /*BranchWeights=*/nullptr, DTU);
4210
4211 QB.SetInsertPoint(T);
4212 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4213 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4214 // Choose the minimum alignment. If we could prove both stores execute, we
4215 // could use biggest one. In this case, though, we only know that one of the
4216 // stores executes. And we don't know it's safe to take the alignment from a
4217 // store that doesn't execute.
4218 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4219
4220 QStore->eraseFromParent();
4221 PStore->eraseFromParent();
4222
4223 return true;
4224}
4225
4227 DomTreeUpdater *DTU, const DataLayout &DL,
4228 const TargetTransformInfo &TTI) {
4229 // The intention here is to find diamonds or triangles (see below) where each
4230 // conditional block contains a store to the same address. Both of these
4231 // stores are conditional, so they can't be unconditionally sunk. But it may
4232 // be profitable to speculatively sink the stores into one merged store at the
4233 // end, and predicate the merged store on the union of the two conditions of
4234 // PBI and QBI.
4235 //
4236 // This can reduce the number of stores executed if both of the conditions are
4237 // true, and can allow the blocks to become small enough to be if-converted.
4238 // This optimization will also chain, so that ladders of test-and-set
4239 // sequences can be if-converted away.
4240 //
4241 // We only deal with simple diamonds or triangles:
4242 //
4243 // PBI or PBI or a combination of the two
4244 // / \ | \
4245 // PTB PFB | PFB
4246 // \ / | /
4247 // QBI QBI
4248 // / \ | \
4249 // QTB QFB | QFB
4250 // \ / | /
4251 // PostBB PostBB
4252 //
4253 // We model triangles as a type of diamond with a nullptr "true" block.
4254 // Triangles are canonicalized so that the fallthrough edge is represented by
4255 // a true condition, as in the diagram above.
4256 BasicBlock *PTB = PBI->getSuccessor(0);
4257 BasicBlock *PFB = PBI->getSuccessor(1);
4258 BasicBlock *QTB = QBI->getSuccessor(0);
4259 BasicBlock *QFB = QBI->getSuccessor(1);
4260 BasicBlock *PostBB = QFB->getSingleSuccessor();
4261
4262 // Make sure we have a good guess for PostBB. If QTB's only successor is
4263 // QFB, then QFB is a better PostBB.
4264 if (QTB->getSingleSuccessor() == QFB)
4265 PostBB = QFB;
4266
4267 // If we couldn't find a good PostBB, stop.
4268 if (!PostBB)
4269 return false;
4270
4271 bool InvertPCond = false, InvertQCond = false;
4272 // Canonicalize fallthroughs to the true branches.
4273 if (PFB == QBI->getParent()) {
4274 std::swap(PFB, PTB);
4275 InvertPCond = true;
4276 }
4277 if (QFB == PostBB) {
4278 std::swap(QFB, QTB);
4279 InvertQCond = true;
4280 }
4281
4282 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4283 // and QFB may not. Model fallthroughs as a nullptr block.
4284 if (PTB == QBI->getParent())
4285 PTB = nullptr;
4286 if (QTB == PostBB)
4287 QTB = nullptr;
4288
4289 // Legality bailouts. We must have at least the non-fallthrough blocks and
4290 // the post-dominating block, and the non-fallthroughs must only have one
4291 // predecessor.
4292 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4293 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4294 };
4295 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4296 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4297 return false;
4298 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4299 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4300 return false;
4301 if (!QBI->getParent()->hasNUses(2))
4302 return false;
4303
4304 // OK, this is a sequence of two diamonds or triangles.
4305 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4306 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4307 for (auto *BB : {PTB, PFB}) {
4308 if (!BB)
4309 continue;
4310 for (auto &I : *BB)
4311 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4312 PStoreAddresses.insert(SI->getPointerOperand());
4313 }
4314 for (auto *BB : {QTB, QFB}) {
4315 if (!BB)
4316 continue;
4317 for (auto &I : *BB)
4318 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4319 QStoreAddresses.insert(SI->getPointerOperand());
4320 }
4321
4322 set_intersect(PStoreAddresses, QStoreAddresses);
4323 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4324 // clear what it contains.
4325 auto &CommonAddresses = PStoreAddresses;
4326
4327 bool Changed = false;
4328 for (auto *Address : CommonAddresses)
4329 Changed |=
4330 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4331 InvertPCond, InvertQCond, DTU, DL, TTI);
4332 return Changed;
4333}
4334
4335/// If the previous block ended with a widenable branch, determine if reusing
4336/// the target block is profitable and legal. This will have the effect of
4337/// "widening" PBI, but doesn't require us to reason about hosting safety.
4339 DomTreeUpdater *DTU) {
4340 // TODO: This can be generalized in two important ways:
4341 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4342 // values from the PBI edge.
4343 // 2) We can sink side effecting instructions into BI's fallthrough
4344 // successor provided they doesn't contribute to computation of
4345 // BI's condition.
4346 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4347 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4348 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4350 return false;
4351 if (!IfFalseBB->phis().empty())
4352 return false; // TODO
4353 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4354 // may undo the transform done here.
4355 // TODO: There might be a more fine-grained solution to this.
4356 if (!llvm::succ_empty(IfFalseBB))
4357 return false;
4358 // Use lambda to lazily compute expensive condition after cheap ones.
4359 auto NoSideEffects = [](BasicBlock &BB) {
4360 return llvm::none_of(BB, [](const Instruction &I) {
4361 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4362 });
4363 };
4364 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4365 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4366 NoSideEffects(*BI->getParent())) {
4367 auto *OldSuccessor = BI->getSuccessor(1);
4368 OldSuccessor->removePredecessor(BI->getParent());
4369 BI->setSuccessor(1, IfFalseBB);
4370 if (DTU)
4371 DTU->applyUpdates(
4372 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4373 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4374 return true;
4375 }
4376 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4377 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4378 NoSideEffects(*BI->getParent())) {
4379 auto *OldSuccessor = BI->getSuccessor(0);
4380 OldSuccessor->removePredecessor(BI->getParent());
4381 BI->setSuccessor(0, IfFalseBB);
4382 if (DTU)
4383 DTU->applyUpdates(
4384 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4385 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4386 return true;
4387 }
4388 return false;
4389}
4390
4391/// If we have a conditional branch as a predecessor of another block,
4392/// this function tries to simplify it. We know
4393/// that PBI and BI are both conditional branches, and BI is in one of the
4394/// successor blocks of PBI - PBI branches to BI.
4396 DomTreeUpdater *DTU,
4397 const DataLayout &DL,
4398 const TargetTransformInfo &TTI) {
4399 assert(PBI->isConditional() && BI->isConditional());
4400 BasicBlock *BB = BI->getParent();
4401
4402 // If this block ends with a branch instruction, and if there is a
4403 // predecessor that ends on a branch of the same condition, make
4404 // this conditional branch redundant.
4405 if (PBI->getCondition() == BI->getCondition() &&
4406 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4407 // Okay, the outcome of this conditional branch is statically
4408 // knowable. If this block had a single pred, handle specially, otherwise
4409 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4410 if (BB->getSinglePredecessor()) {
4411 // Turn this into a branch on constant.
4412 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4413 BI->setCondition(
4414 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4415 return true; // Nuke the branch on constant.
4416 }
4417 }
4418
4419 // If the previous block ended with a widenable branch, determine if reusing
4420 // the target block is profitable and legal. This will have the effect of
4421 // "widening" PBI, but doesn't require us to reason about hosting safety.
4422 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4423 return true;
4424
4425 // If both branches are conditional and both contain stores to the same
4426 // address, remove the stores from the conditionals and create a conditional
4427 // merged store at the end.
4428 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4429 return true;
4430
4431 // If this is a conditional branch in an empty block, and if any
4432 // predecessors are a conditional branch to one of our destinations,
4433 // fold the conditions into logical ops and one cond br.
4434
4435 // Ignore dbg intrinsics.
4436 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4437 return false;
4438
4439 int PBIOp, BIOp;
4440 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4441 PBIOp = 0;
4442 BIOp = 0;
4443 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4444 PBIOp = 0;
4445 BIOp = 1;
4446 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4447 PBIOp = 1;
4448 BIOp = 0;
4449 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4450 PBIOp = 1;
4451 BIOp = 1;
4452 } else {
4453 return false;
4454 }
4455
4456 // Check to make sure that the other destination of this branch
4457 // isn't BB itself. If so, this is an infinite loop that will
4458 // keep getting unwound.
4459 if (PBI->getSuccessor(PBIOp) == BB)
4460 return false;
4461
4462 // If predecessor's branch probability to BB is too low don't merge branches.
4463 SmallVector<uint32_t, 2> PredWeights;
4464 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4465 extractBranchWeights(*PBI, PredWeights) &&
4466 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4467
4469 PredWeights[PBIOp],
4470 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4471
4473 if (CommonDestProb >= Likely)
4474 return false;
4475 }
4476
4477 // Do not perform this transformation if it would require
4478 // insertion of a large number of select instructions. For targets
4479 // without predication/cmovs, this is a big pessimization.
4480
4481 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4482 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4483 unsigned NumPhis = 0;
4484 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4485 ++II, ++NumPhis) {
4486 if (NumPhis > 2) // Disable this xform.
4487 return false;
4488 }
4489
4490 // Finally, if everything is ok, fold the branches to logical ops.
4491 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4492
4493 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4494 << "AND: " << *BI->getParent());
4495
4497
4498 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4499 // branch in it, where one edge (OtherDest) goes back to itself but the other
4500 // exits. We don't *know* that the program avoids the infinite loop
4501 // (even though that seems likely). If we do this xform naively, we'll end up
4502 // recursively unpeeling the loop. Since we know that (after the xform is
4503 // done) that the block *is* infinite if reached, we just make it an obviously
4504 // infinite loop with no cond branch.
4505 if (OtherDest == BB) {
4506 // Insert it at the end of the function, because it's either code,
4507 // or it won't matter if it's hot. :)
4508 BasicBlock *InfLoopBlock =
4509 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4510 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4511 if (DTU)
4512 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4513 OtherDest = InfLoopBlock;
4514 }
4515
4516 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4517
4518 // BI may have other predecessors. Because of this, we leave
4519 // it alone, but modify PBI.
4520
4521 // Make sure we get to CommonDest on True&True directions.
4522 Value *PBICond = PBI->getCondition();
4523 IRBuilder<NoFolder> Builder(PBI);
4524 if (PBIOp)
4525 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4526
4527 Value *BICond = BI->getCondition();
4528 if (BIOp)
4529 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4530
4531 // Merge the conditions.
4532 Value *Cond =
4533 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4534
4535 // Modify PBI to branch on the new condition to the new dests.
4536 PBI->setCondition(Cond);
4537 PBI->setSuccessor(0, CommonDest);
4538 PBI->setSuccessor(1, OtherDest);
4539
4540 if (DTU) {
4541 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4542 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4543
4544 DTU->applyUpdates(Updates);
4545 }
4546
4547 // Update branch weight for PBI.
4548 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4549 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4550 bool HasWeights =
4551 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4552 SuccTrueWeight, SuccFalseWeight);
4553 if (HasWeights) {
4554 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4555 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4556 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4557 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4558 // The weight to CommonDest should be PredCommon * SuccTotal +
4559 // PredOther * SuccCommon.
4560 // The weight to OtherDest should be PredOther * SuccOther.
4561 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4562 PredOther * SuccCommon,
4563 PredOther * SuccOther};
4564 // Halve the weights if any of them cannot fit in an uint32_t
4565 FitWeights(NewWeights);
4566
4567 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4568 }
4569
4570 // OtherDest may have phi nodes. If so, add an entry from PBI's
4571 // block that are identical to the entries for BI's block.
4572 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4573
4574 // We know that the CommonDest already had an edge from PBI to
4575 // it. If it has PHIs though, the PHIs may have different
4576 // entries for BB and PBI's BB. If so, insert a select to make
4577 // them agree.
4578 for (PHINode &PN : CommonDest->phis()) {
4579 Value *BIV = PN.getIncomingValueForBlock(BB);
4580 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4581 Value *PBIV = PN.getIncomingValue(PBBIdx);
4582 if (BIV != PBIV) {
4583 // Insert a select in PBI to pick the right value.
4584 SelectInst *NV = cast<SelectInst>(
4585 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4586 PN.setIncomingValue(PBBIdx, NV);
4587 // Although the select has the same condition as PBI, the original branch
4588 // weights for PBI do not apply to the new select because the select's
4589 // 'logical' edges are incoming edges of the phi that is eliminated, not
4590 // the outgoing edges of PBI.
4591 if (HasWeights) {
4592 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4593 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4594 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4595 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4596 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4597 // The weight to PredOtherDest should be PredOther * SuccCommon.
4598 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4599 PredOther * SuccCommon};
4600
4601 FitWeights(NewWeights);
4602
4603 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4604 }
4605 }
4606 }
4607
4608 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4609 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4610
4611 // This basic block is probably dead. We know it has at least
4612 // one fewer predecessor.
4613 return true;
4614}
4615
4616// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4617// true or to FalseBB if Cond is false.
4618// Takes care of updating the successors and removing the old terminator.
4619// Also makes sure not to introduce new successors by assuming that edges to
4620// non-successor TrueBBs and FalseBBs aren't reachable.
4621bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4622 Value *Cond, BasicBlock *TrueBB,
4623 BasicBlock *FalseBB,
4624 uint32_t TrueWeight,
4625 uint32_t FalseWeight) {
4626 auto *BB = OldTerm->getParent();
4627 // Remove any superfluous successor edges from the CFG.
4628 // First, figure out which successors to preserve.
4629 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4630 // successor.
4631 BasicBlock *KeepEdge1 = TrueBB;
4632 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4633
4634 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4635
4636 // Then remove the rest.
4637 for (BasicBlock *Succ : successors(OldTerm)) {
4638 // Make sure only to keep exactly one copy of each edge.
4639 if (Succ == KeepEdge1)
4640 KeepEdge1 = nullptr;
4641 else if (Succ == KeepEdge2)
4642 KeepEdge2 = nullptr;
4643 else {
4644 Succ->removePredecessor(BB,
4645 /*KeepOneInputPHIs=*/true);
4646
4647 if (Succ != TrueBB && Succ != FalseBB)
4648 RemovedSuccessors.insert(Succ);
4649 }
4650 }
4651
4652 IRBuilder<> Builder(OldTerm);
4653 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4654
4655 // Insert an appropriate new terminator.
4656 if (!KeepEdge1 && !KeepEdge2) {
4657 if (TrueBB == FalseBB) {
4658 // We were only looking for one successor, and it was present.
4659 // Create an unconditional branch to it.
4660 Builder.CreateBr(TrueBB);
4661 } else {
4662 // We found both of the successors we were looking for.
4663 // Create a conditional branch sharing the condition of the select.
4664 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4665 if (TrueWeight != FalseWeight)
4666 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4667 }
4668 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4669 // Neither of the selected blocks were successors, so this
4670 // terminator must be unreachable.
4671 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4672 } else {
4673 // One of the selected values was a successor, but the other wasn't.
4674 // Insert an unconditional branch to the one that was found;
4675 // the edge to the one that wasn't must be unreachable.
4676 if (!KeepEdge1) {
4677 // Only TrueBB was found.
4678 Builder.CreateBr(TrueBB);
4679 } else {
4680 // Only FalseBB was found.
4681 Builder.CreateBr(FalseBB);
4682 }
4683 }
4684
4686
4687 if (DTU) {
4689 Updates.reserve(RemovedSuccessors.size());
4690 for (auto *RemovedSuccessor : RemovedSuccessors)
4691 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4692 DTU->applyUpdates(Updates);
4693 }
4694
4695 return true;
4696}
4697
4698// Replaces
4699// (switch (select cond, X, Y)) on constant X, Y
4700// with a branch - conditional if X and Y lead to distinct BBs,
4701// unconditional otherwise.
4702bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4703 SelectInst *Select) {
4704 // Check for constant integer values in the select.
4705 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4706 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4707 if (!TrueVal || !FalseVal)
4708 return false;
4709
4710 // Find the relevant condition and destinations.
4711 Value *Condition = Select->getCondition();
4712 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4713 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4714
4715 // Get weight for TrueBB and FalseBB.
4716 uint32_t TrueWeight = 0, FalseWeight = 0;
4718 bool HasWeights = hasBranchWeightMD(*SI);
4719 if (HasWeights) {
4720 GetBranchWeights(SI, Weights);
4721 if (Weights.size() == 1 + SI->getNumCases()) {
4722 TrueWeight =
4723 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4724 FalseWeight =
4725 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4726 }
4727 }
4728
4729 // Perform the actual simplification.
4730 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4731 FalseWeight);
4732}
4733
4734// Replaces
4735// (indirectbr (select cond, blockaddress(@fn, BlockA),
4736// blockaddress(@fn, BlockB)))
4737// with
4738// (br cond, BlockA, BlockB).
4739bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4740 SelectInst *SI) {
4741 // Check that both operands of the select are block addresses.
4742 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4743 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4744 if (!TBA || !FBA)
4745 return false;
4746
4747 // Extract the actual blocks.
4748 BasicBlock *TrueBB = TBA->getBasicBlock();
4749 BasicBlock *FalseBB = FBA->getBasicBlock();
4750
4751 // Perform the actual simplification.
4752 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4753 0);
4754}
4755
4756/// This is called when we find an icmp instruction
4757/// (a seteq/setne with a constant) as the only instruction in a
4758/// block that ends with an uncond branch. We are looking for a very specific
4759/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4760/// this case, we merge the first two "or's of icmp" into a switch, but then the
4761/// default value goes to an uncond block with a seteq in it, we get something
4762/// like:
4763///
4764/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4765/// DEFAULT:
4766/// %tmp = icmp eq i8 %A, 92
4767/// br label %end
4768/// end:
4769/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4770///
4771/// We prefer to split the edge to 'end' so that there is a true/false entry to
4772/// the PHI, merging the third icmp into the switch.
4773bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4774 ICmpInst *ICI, IRBuilder<> &Builder) {
4775 BasicBlock *BB = ICI->getParent();
4776
4777 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4778 // complex.
4779 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4780 return false;
4781
4782 Value *V = ICI->getOperand(0);
4783 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4784
4785 // The pattern we're looking for is where our only predecessor is a switch on
4786 // 'V' and this block is the default case for the switch. In this case we can
4787 // fold the compared value into the switch to simplify things.
4788 BasicBlock *Pred = BB->getSinglePredecessor();
4789 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4790 return false;
4791
4792 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4793 if (SI->getCondition() != V)
4794 return false;
4795
4796 // If BB is reachable on a non-default case, then we simply know the value of
4797 // V in this block. Substitute it and constant fold the icmp instruction
4798 // away.
4799 if (SI->getDefaultDest() != BB) {
4800 ConstantInt *VVal = SI->findCaseDest(BB);
4801 assert(VVal && "Should have a unique destination value");
4802 ICI->setOperand(0, VVal);
4803
4804 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4805 ICI->replaceAllUsesWith(V);
4806 ICI->eraseFromParent();
4807 }
4808 // BB is now empty, so it is likely to simplify away.
4809 return requestResimplify();
4810 }
4811
4812 // Ok, the block is reachable from the default dest. If the constant we're
4813 // comparing exists in one of the other edges, then we can constant fold ICI
4814 // and zap it.
4815 if (SI->findCaseValue(Cst) != SI->case_default()) {
4816 Value *V;
4817 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4819 else
4821
4822 ICI->replaceAllUsesWith(V);
4823 ICI->eraseFromParent();
4824 // BB is now empty, so it is likely to simplify away.
4825 return requestResimplify();
4826 }
4827
4828 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4829 // the block.
4830 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4831 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4832 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4833 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4834 return false;
4835
4836 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4837 // true in the PHI.
4838 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4839 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4840
4841 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4842 std::swap(DefaultCst, NewCst);
4843
4844 // Replace ICI (which is used by the PHI for the default value) with true or
4845 // false depending on if it is EQ or NE.
4846 ICI->replaceAllUsesWith(DefaultCst);
4847 ICI->eraseFromParent();
4848
4850
4851 // Okay, the switch goes to this block on a default value. Add an edge from
4852 // the switch to the merge point on the compared value.
4853 BasicBlock *NewBB =
4854 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4855 {
4857 auto W0 = SIW.getSuccessorWeight(0);
4859 if (W0) {
4860 NewW = ((uint64_t(*W0) + 1) >> 1);
4861 SIW.setSuccessorWeight(0, *NewW);
4862 }
4863 SIW.addCase(Cst, NewBB, NewW);
4864 if (DTU)
4865 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4866 }
4867
4868 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4869 Builder.SetInsertPoint(NewBB);
4870 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4871 Builder.CreateBr(SuccBlock);
4872 PHIUse->addIncoming(NewCst, NewBB);
4873 if (DTU) {
4874 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4875 DTU->applyUpdates(Updates);
4876 }
4877 return true;
4878}
4879
4880/// The specified branch is a conditional branch.
4881/// Check to see if it is branching on an or/and chain of icmp instructions, and
4882/// fold it into a switch instruction if so.
4883bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4884 IRBuilder<> &Builder,
4885 const DataLayout &DL) {
4886 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4887 if (!Cond)
4888 return false;
4889
4890 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4891 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4892 // 'setne's and'ed together, collect them.
4893
4894 // Try to gather values from a chain of and/or to be turned into a switch
4895 ConstantComparesGatherer ConstantCompare(Cond, DL);
4896 // Unpack the result
4897 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4898 Value *CompVal = ConstantCompare.CompValue;
4899 unsigned UsedICmps = ConstantCompare.UsedICmps;
4900 Value *ExtraCase = ConstantCompare.Extra;
4901
4902 // If we didn't have a multiply compared value, fail.
4903 if (!CompVal)
4904 return false;
4905
4906 // Avoid turning single icmps into a switch.
4907 if (UsedICmps <= 1)
4908 return false;
4909
4910 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4911
4912 // There might be duplicate constants in the list, which the switch
4913 // instruction can't handle, remove them now.
4914 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4915 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4916
4917 // If Extra was used, we require at least two switch values to do the
4918 // transformation. A switch with one value is just a conditional branch.
4919 if (ExtraCase && Values.size() < 2)
4920 return false;
4921
4922 // TODO: Preserve branch weight metadata, similarly to how
4923 // FoldValueComparisonIntoPredecessors preserves it.
4924
4925 // Figure out which block is which destination.
4926 BasicBlock *DefaultBB = BI->getSuccessor(1);
4927 BasicBlock *EdgeBB = BI->getSuccessor(0);
4928 if (!TrueWhenEqual)
4929 std::swap(DefaultBB, EdgeBB);
4930
4931 BasicBlock *BB = BI->getParent();
4932
4933 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4934 << " cases into SWITCH. BB is:\n"
4935 << *BB);
4936
4938
4939 // If there are any extra values that couldn't be folded into the switch
4940 // then we evaluate them with an explicit branch first. Split the block
4941 // right before the condbr to handle it.
4942 if (ExtraCase) {
4943 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4944 /*MSSAU=*/nullptr, "switch.early.test");
4945
4946 // Remove the uncond branch added to the old block.
4947 Instruction *OldTI = BB->getTerminator();
4948 Builder.SetInsertPoint(OldTI);
4949
4950 // There can be an unintended UB if extra values are Poison. Before the
4951 // transformation, extra values may not be evaluated according to the
4952 // condition, and it will not raise UB. But after transformation, we are
4953 // evaluating extra values before checking the condition, and it will raise
4954 // UB. It can be solved by adding freeze instruction to extra values.
4955 AssumptionCache *AC = Options.AC;
4956
4957 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4958 ExtraCase = Builder.CreateFreeze(ExtraCase);
4959
4960 if (TrueWhenEqual)
4961 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4962 else
4963 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4964
4965 OldTI->eraseFromParent();
4966
4967 if (DTU)
4968 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4969
4970 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4971 // for the edge we just added.
4972 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4973
4974 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4975 << "\nEXTRABB = " << *BB);
4976 BB = NewBB;
4977 }
4978
4979 Builder.SetInsertPoint(BI);
4980 // Convert pointer to int before we switch.
4981 if (CompVal->getType()->isPointerTy()) {
4982 CompVal = Builder.CreatePtrToInt(
4983 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4984 }
4985
4986 // Create the new switch instruction now.
4987 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4988
4989 // Add all of the 'cases' to the switch instruction.
4990 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4991 New->addCase(Values[i], EdgeBB);
4992
4993 // We added edges from PI to the EdgeBB. As such, if there were any
4994 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4995 // the number of edges added.
4996 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4997 PHINode *PN = cast<PHINode>(BBI);
4998 Value *InVal = PN->getIncomingValueForBlock(BB);
4999 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5000 PN->addIncoming(InVal, BB);
5001 }
5002
5003 // Erase the old branch instruction.
5005 if (DTU)
5006 DTU->applyUpdates(Updates);
5007
5008 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5009 return true;
5010}
5011
5012bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5013 if (isa<PHINode>(RI->getValue()))
5014 return simplifyCommonResume(RI);
5015 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5016 RI->getValue() == RI->getParent()->getFirstNonPHI())
5017 // The resume must unwind the exception that caused control to branch here.
5018 return simplifySingleResume(RI);
5019
5020 return false;
5021}
5022
5023// Check if cleanup block is empty
5025 for (Instruction &I : R) {
5026 auto *II = dyn_cast<IntrinsicInst>(&I);
5027 if (!II)
5028 return false;
5029
5030 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5031 switch (IntrinsicID) {
5032 case Intrinsic::dbg_declare:
5033 case Intrinsic::dbg_value:
5034 case Intrinsic::dbg_label:
5035 case Intrinsic::lifetime_end:
5036 break;
5037 default:
5038 return false;
5039 }
5040 }
5041 return true;
5042}
5043
5044// Simplify resume that is shared by several landing pads (phi of landing pad).
5045bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5046 BasicBlock *BB = RI->getParent();
5047
5048 // Check that there are no other instructions except for debug and lifetime
5049 // intrinsics between the phi's and resume instruction.
5052 return false;
5053
5054 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5055 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5056
5057 // Check incoming blocks to see if any of them are trivial.
5058 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5059 Idx++) {
5060 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5061 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5062
5063 // If the block has other successors, we can not delete it because
5064 // it has other dependents.
5065 if (IncomingBB->getUniqueSuccessor() != BB)
5066 continue;
5067
5068 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5069 // Not the landing pad that caused the control to branch here.
5070 if (IncomingValue != LandingPad)
5071 continue;
5072
5074 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5075 TrivialUnwindBlocks.insert(IncomingBB);
5076 }
5077
5078 // If no trivial unwind blocks, don't do any simplifications.
5079 if (TrivialUnwindBlocks.empty())
5080 return false;
5081
5082 // Turn all invokes that unwind here into calls.
5083 for (auto *TrivialBB : TrivialUnwindBlocks) {
5084 // Blocks that will be simplified should be removed from the phi node.
5085 // Note there could be multiple edges to the resume block, and we need
5086 // to remove them all.
5087 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5088 BB->removePredecessor(TrivialBB, true);
5089
5090 for (BasicBlock *Pred :
5092 removeUnwindEdge(Pred, DTU);
5093 ++NumInvokes;
5094 }
5095
5096 // In each SimplifyCFG run, only the current processed block can be erased.
5097 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5098 // of erasing TrivialBB, we only remove the branch to the common resume
5099 // block so that we can later erase the resume block since it has no
5100 // predecessors.
5101 TrivialBB->getTerminator()->eraseFromParent();
5102 new UnreachableInst(RI->getContext(), TrivialBB);
5103 if (DTU)
5104 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5105 }
5106
5107 // Delete the resume block if all its predecessors have been removed.
5108 if (pred_empty(BB))
5109 DeleteDeadBlock(BB, DTU);
5110
5111 return !TrivialUnwindBlocks.empty();
5112}
5113
5114// Simplify resume that is only used by a single (non-phi) landing pad.
5115bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5116 BasicBlock *BB = RI->getParent();
5117 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5118 assert(RI->getValue() == LPInst &&
5119 "Resume must unwind the exception that caused control to here");
5120
5121 // Check that there are no other instructions except for debug intrinsics.
5123 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5124 return false;
5125
5126 // Turn all invokes that unwind here into calls and delete the basic block.
5128 removeUnwindEdge(Pred, DTU);
5129 ++NumInvokes;
5130 }
5131
5132 // The landingpad is now unreachable. Zap it.
5133 DeleteDeadBlock(BB, DTU);
5134 return true;
5135}
5136
5138 // If this is a trivial cleanup pad that executes no instructions, it can be
5139 // eliminated. If the cleanup pad continues to the caller, any predecessor
5140 // that is an EH pad will be updated to continue to the caller and any
5141 // predecessor that terminates with an invoke instruction will have its invoke
5142 // instruction converted to a call instruction. If the cleanup pad being
5143 // simplified does not continue to the caller, each predecessor will be
5144 // updated to continue to the unwind destination of the cleanup pad being
5145 // simplified.
5146 BasicBlock *BB = RI->getParent();
5147 CleanupPadInst *CPInst = RI->getCleanupPad();
5148 if (CPInst->getParent() != BB)
5149 // This isn't an empty cleanup.
5150 return false;
5151
5152 // We cannot kill the pad if it has multiple uses. This typically arises
5153 // from unreachable basic blocks.
5154 if (!CPInst->hasOneUse())
5155 return false;
5156
5157 // Check that there are no other instructions except for benign intrinsics.
5159 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5160 return false;
5161
5162 // If the cleanup return we are simplifying unwinds to the caller, this will
5163 // set UnwindDest to nullptr.
5164 BasicBlock *UnwindDest = RI->getUnwindDest();
5165 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5166
5167 // We're about to remove BB from the control flow. Before we do, sink any
5168 // PHINodes into the unwind destination. Doing this before changing the
5169 // control flow avoids some potentially slow checks, since we can currently
5170 // be certain that UnwindDest and BB have no common predecessors (since they
5171 // are both EH pads).
5172 if (UnwindDest) {
5173 // First, go through the PHI nodes in UnwindDest and update any nodes that
5174 // reference the block we are removing
5175 for (PHINode &DestPN : UnwindDest->phis()) {
5176 int Idx = DestPN.getBasicBlockIndex(BB);
5177 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5178 assert(Idx != -1);
5179 // This PHI node has an incoming value that corresponds to a control
5180 // path through the cleanup pad we are removing. If the incoming
5181 // value is in the cleanup pad, it must be a PHINode (because we
5182 // verified above that the block is otherwise empty). Otherwise, the
5183 // value is either a constant or a value that dominates the cleanup
5184 // pad being removed.
5185 //
5186 // Because BB and UnwindDest are both EH pads, all of their
5187 // predecessors must unwind to these blocks, and since no instruction
5188 // can have multiple unwind destinations, there will be no overlap in
5189 // incoming blocks between SrcPN and DestPN.
5190 Value *SrcVal = DestPN.getIncomingValue(Idx);
5191 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5192
5193 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5194 for (auto *Pred : predecessors(BB)) {
5195 Value *Incoming =
5196 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5197 DestPN.addIncoming(Incoming, Pred);
5198 }
5199 }
5200
5201 // Sink any remaining PHI nodes directly into UnwindDest.
5202 Instruction *InsertPt = DestEHPad;
5203 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5204 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5205 // If the PHI node has no uses or all of its uses are in this basic
5206 // block (meaning they are debug or lifetime intrinsics), just leave
5207 // it. It will be erased when we erase BB below.
5208 continue;
5209
5210 // Otherwise, sink this PHI node into UnwindDest.
5211 // Any predecessors to UnwindDest which are not already represented
5212 // must be back edges which inherit the value from the path through
5213 // BB. In this case, the PHI value must reference itself.
5214 for (auto *pred : predecessors(UnwindDest))
5215 if (pred != BB)
5216 PN.addIncoming(&PN, pred);
5217 PN.moveBefore(InsertPt);
5218 // Also, add a dummy incoming value for the original BB itself,
5219 // so that the PHI is well-formed until we drop said predecessor.
5220 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5221 }
5222 }
5223
5224 std::vector<DominatorTree::UpdateType> Updates;
5225
5226 // We use make_early_inc_range here because we will remove all predecessors.
5228 if (UnwindDest == nullptr) {
5229 if (DTU) {
5230 DTU->applyUpdates(Updates);
5231 Updates.clear();
5232 }
5233 removeUnwindEdge(PredBB, DTU);
5234 ++NumInvokes;
5235 } else {
5236 BB->removePredecessor(PredBB);
5237 Instruction *TI = PredBB->getTerminator();
5238 TI->replaceUsesOfWith(BB, UnwindDest);
5239 if (DTU) {
5240 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5241 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5242 }
5243 }
5244 }
5245
5246 if (DTU)
5247 DTU->applyUpdates(Updates);
5248
5249 DeleteDeadBlock(BB, DTU);
5250
5251 return true;
5252}
5253
5254// Try to merge two cleanuppads together.
5256 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5257 // with.
5258 BasicBlock *UnwindDest = RI->getUnwindDest();
5259 if (!UnwindDest)
5260 return false;
5261
5262 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5263 // be safe to merge without code duplication.
5264 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5265 return false;
5266
5267 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5268 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5269 if (!SuccessorCleanupPad)
5270 return false;
5271
5272 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5273 // Replace any uses of the successor cleanupad with the predecessor pad
5274 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5275 // funclet bundle operands.
5276 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5277 // Remove the old cleanuppad.
5278 SuccessorCleanupPad->eraseFromParent();
5279 // Now, we simply replace the cleanupret with a branch to the unwind
5280 // destination.
5281 BranchInst::Create(UnwindDest, RI->getParent());
5282 RI->eraseFromParent();
5283
5284 return true;
5285}
5286
5287bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5288 // It is possible to transiantly have an undef cleanuppad operand because we
5289 // have deleted some, but not all, dead blocks.
5290 // Eventually, this block will be deleted.
5291 if (isa<UndefValue>(RI->getOperand(0)))
5292 return false;
5293
5294 if (mergeCleanupPad(RI))
5295 return true;
5296
5297 if (removeEmptyCleanup(RI, DTU))
5298 return true;
5299
5300 return false;
5301}
5302
5303// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5304bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5305 BasicBlock *BB = UI->getParent();
5306
5307 bool Changed = false;
5308
5309 // Ensure that any debug-info records that used to occur after the Unreachable
5310 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5311 // the block.
5313
5314 // Debug-info records on the unreachable inst itself should be deleted, as
5315 // below we delete everything past the final executable instruction.
5316 UI->dropDbgRecords();
5317
5318 // If there are any instructions immediately before the unreachable that can
5319 // be removed, do so.
5320 while (UI->getIterator() != BB->begin()) {
5322 --BBI;
5323
5325 break; // Can not drop any more instructions. We're done here.
5326 // Otherwise, this instruction can be freely erased,
5327 // even if it is not side-effect free.
5328
5329 // Note that deleting EH's here is in fact okay, although it involves a bit
5330 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5331 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5332 // and we can therefore guarantee this block will be erased.
5333
5334 // If we're deleting this, we're deleting any subsequent debug info, so
5335 // delete DbgRecords.
5336 BBI->dropDbgRecords();
5337
5338 // Delete this instruction (any uses are guaranteed to be dead)
5339 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5340 BBI->eraseFromParent();
5341 Changed = true;
5342 }
5343
5344 // If the unreachable instruction is the first in the block, take a gander
5345 // at all of the predecessors of this instruction, and simplify them.
5346 if (&BB->front() != UI)
5347 return Changed;
5348
5349 std::vector<DominatorTree::UpdateType> Updates;
5350
5352 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5353 auto *Predecessor = Preds[i];
5354 Instruction *TI = Predecessor->getTerminator();
5355 IRBuilder<> Builder(TI);
5356 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5357 // We could either have a proper unconditional branch,
5358 // or a degenerate conditional branch with matching destinations.
5359 if (all_of(BI->successors(),
5360 [BB](auto *Successor) { return Successor == BB; })) {
5361 new UnreachableInst(TI->getContext(), TI->getIterator());
5362 TI->eraseFromParent();
5363 Changed = true;
5364 } else {
5365 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5366 Value* Cond = BI->getCondition();
5367 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5368 "The destinations are guaranteed to be different here.");
5369 CallInst *Assumption;
5370 if (BI->getSuccessor(0) == BB) {
5371 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5372 Builder.CreateBr(BI->getSuccessor(1));
5373 } else {
5374 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5375 Assumption = Builder.CreateAssumption(Cond);
5376 Builder.CreateBr(BI->getSuccessor(0));
5377 }
5378 if (Options.AC)
5379 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5380
5382 Changed = true;
5383 }
5384 if (DTU)
5385 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5386 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5388 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5389 if (i->getCaseSuccessor() != BB) {
5390 ++i;
5391 continue;
5392 }
5393 BB->removePredecessor(SU->getParent());
5394 i = SU.removeCase(i);
5395 e = SU->case_end();
5396 Changed = true;
5397 }
5398 // Note that the default destination can't be removed!
5399 if (DTU && SI->getDefaultDest() != BB)
5400 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5401 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5402 if (II->getUnwindDest() == BB) {
5403 if (DTU) {
5404 DTU->applyUpdates(Updates);
5405 Updates.clear();
5406 }
5407 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5408 if (!CI->doesNotThrow())
5409 CI->setDoesNotThrow();
5410 Changed = true;
5411 }
5412 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5413 if (CSI->getUnwindDest() == BB) {
5414 if (DTU) {
5415 DTU->applyUpdates(Updates);
5416 Updates.clear();
5417 }
5418 removeUnwindEdge(TI->getParent(), DTU);
5419 Changed = true;
5420 continue;
5421 }
5422
5423 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5424 E = CSI->handler_end();
5425 I != E; ++I) {
5426 if (*I == BB) {
5427 CSI->removeHandler(I);
5428 --I;
5429 --E;
5430 Changed = true;
5431 }
5432 }
5433 if (DTU)
5434 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5435 if (CSI->getNumHandlers() == 0) {
5436 if (CSI->hasUnwindDest()) {
5437 // Redirect all predecessors of the block containing CatchSwitchInst
5438 // to instead branch to the CatchSwitchInst's unwind destination.
5439 if (DTU) {
5440 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5441 Updates.push_back({DominatorTree::Insert,
5442 PredecessorOfPredecessor,
5443 CSI->getUnwindDest()});
5444 Updates.push_back({DominatorTree::Delete,
5445 PredecessorOfPredecessor, Predecessor});
5446 }
5447 }
5448 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5449 } else {
5450 // Rewrite all preds to unwind to caller (or from invoke to call).
5451 if (DTU) {
5452 DTU->applyUpdates(Updates);
5453 Updates.clear();
5454 }
5455 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5456 for (BasicBlock *EHPred : EHPreds)
5457 removeUnwindEdge(EHPred, DTU);
5458 }
5459 // The catchswitch is no longer reachable.
5460 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5461 CSI->eraseFromParent();
5462 Changed = true;
5463 }
5464 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5465 (void)CRI;
5466 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5467 "Expected to always have an unwind to BB.");
5468 if (DTU)
5469 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5470 new UnreachableInst(TI->getContext(), TI->getIterator());
5471 TI->eraseFromParent();
5472 Changed = true;
5473 }
5474 }
5475
5476 if (DTU)
5477 DTU->applyUpdates(Updates);
5478
5479 // If this block is now dead, remove it.
5480 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5481 DeleteDeadBlock(BB, DTU);
5482 return true;
5483 }
5484
5485 return Changed;
5486}
5487
5489 assert(Cases.size() >= 1);
5490
5492 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5493 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5494 return false;
5495 }
5496 return true;
5497}
5498
5500 DomTreeUpdater *DTU) {
5501 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5502 auto *BB = Switch->getParent();
5503 auto *OrigDefaultBlock = Switch->getDefaultDest();
5504 OrigDefaultBlock->removePredecessor(BB);
5505 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5506 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5507 OrigDefaultBlock);
5508 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5509 Switch->setDefaultDest(&*NewDefaultBlock);
5510 if (DTU) {
5512 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5513 if (!is_contained(successors(BB), OrigDefaultBlock))
5514 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5515 DTU->applyUpdates(Updates);
5516 }
5517}
5518
5519/// Turn a switch into an integer range comparison and branch.
5520/// Switches with more than 2 destinations are ignored.
5521/// Switches with 1 destination are also ignored.
5522bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5523 IRBuilder<> &Builder) {
5524 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5525
5526 bool HasDefault =
5527 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5528
5529 auto *BB = SI->getParent();
5530
5531 // Partition the cases into two sets with different destinations.
5532 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5533 BasicBlock *DestB = nullptr;
5536
5537 for (auto Case : SI->cases()) {
5538 BasicBlock *Dest = Case.getCaseSuccessor();
5539 if (!DestA)
5540 DestA = Dest;
5541 if (Dest == DestA) {
5542 CasesA.push_back(Case.getCaseValue());
5543 continue;
5544 }
5545 if (!DestB)
5546 DestB = Dest;
5547 if (Dest == DestB) {
5548 CasesB.push_back(Case.getCaseValue());
5549 continue;
5550 }
5551 return false; // More than two destinations.
5552 }
5553 if (!DestB)
5554 return false; // All destinations are the same and the default is unreachable
5555
5556 assert(DestA && DestB &&
5557 "Single-destination switch should have been folded.");
5558 assert(DestA != DestB);
5559 assert(DestB != SI->getDefaultDest());
5560 assert(!CasesB.empty() && "There must be non-default cases.");
5561 assert(!CasesA.empty() || HasDefault);
5562
5563 // Figure out if one of the sets of cases form a contiguous range.
5564 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5565 BasicBlock *ContiguousDest = nullptr;
5566 BasicBlock *OtherDest = nullptr;
5567 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5568 ContiguousCases = &CasesA;
5569 ContiguousDest = DestA;
5570 OtherDest = DestB;
5571 } else if (CasesAreContiguous(CasesB)) {
5572 ContiguousCases = &CasesB;
5573 ContiguousDest = DestB;
5574 OtherDest = DestA;
5575 } else
5576 return false;
5577
5578 // Start building the compare and branch.
5579
5580 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5581 Constant *NumCases =
5582 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5583
5584 Value *Sub = SI->getCondition();
5585 if (!Offset->isNullValue())
5586 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5587
5588 Value *Cmp;
5589 // If NumCases overflowed, then all possible values jump to the successor.
5590 if (NumCases->isNullValue() && !ContiguousCases->empty())
5591 Cmp = ConstantInt::getTrue(SI->getContext());
5592 else
5593 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5594 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5595
5596 // Update weight for the newly-created conditional branch.
5597 if (hasBranchWeightMD(*SI)) {
5599 GetBranchWeights(SI, Weights);
5600 if (Weights.size() == 1 + SI->getNumCases()) {
5601 uint64_t TrueWeight = 0;
5602 uint64_t FalseWeight = 0;
5603 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5604 if (SI->getSuccessor(I) == ContiguousDest)
5605 TrueWeight += Weights[I];
5606 else
5607 FalseWeight += Weights[I];
5608 }
5609 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5610 TrueWeight /= 2;
5611 FalseWeight /= 2;
5612 }
5613 setBranchWeights(NewBI, TrueWeight, FalseWeight);
5614 }
5615 }
5616
5617 // Prune obsolete incoming values off the successors' PHI nodes.
5618 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5619 unsigned PreviousEdges = ContiguousCases->size();
5620 if (ContiguousDest == SI->getDefaultDest())
5621 ++PreviousEdges;
5622 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5623 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5624 }
5625 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5626 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5627 if (OtherDest == SI->getDefaultDest())
5628 ++PreviousEdges;
5629 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5630 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5631 }
5632
5633 // Clean up the default block - it may have phis or other instructions before
5634 // the unreachable terminator.
5635 if (!HasDefault)
5637
5638 auto *UnreachableDefault = SI->getDefaultDest();
5639
5640 // Drop the switch.
5641 SI->eraseFromParent();
5642
5643 if (!HasDefault && DTU)
5644 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5645
5646 return true;
5647}
5648
5649/// Compute masked bits for the condition of a switch
5650/// and use it to remove dead cases.
5652 AssumptionCache *AC,
5653 const DataLayout &DL) {
5654 Value *Cond = SI->getCondition();
5655 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5656
5657 // We can also eliminate cases by determining that their values are outside of
5658 // the limited range of the condition based on how many significant (non-sign)
5659 // bits are in the condition value.
5660 unsigned MaxSignificantBitsInCond =
5661 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5662
5663 // Gather dead cases.
5665 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5666 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5667 for (const auto &Case : SI->cases()) {
5668 auto *Successor = Case.getCaseSuccessor();
5669 if (DTU) {
5670 if (!NumPerSuccessorCases.count(Successor))
5671 UniqueSuccessors.push_back(Successor);
5672 ++NumPerSuccessorCases[Successor];
5673 }
5674 const APInt &CaseVal = Case.getCaseValue()->getValue();
5675 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5676 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5677 DeadCases.push_back(Case.getCaseValue());
5678 if (DTU)
5679 --NumPerSuccessorCases[Successor];
5680 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5681 << " is dead.\n");
5682 }
5683 }
5684
5685 // If we can prove that the cases must cover all possible values, the
5686 // default destination becomes dead and we can remove it. If we know some
5687 // of the bits in the value, we can use that to more precisely compute the
5688 // number of possible unique case values.
5689 bool HasDefault =
5690 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5691 const unsigned NumUnknownBits =
5692 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5693 assert(NumUnknownBits <= Known.getBitWidth());
5694 if (HasDefault && DeadCases.empty() &&
5695 NumUnknownBits < 64 /* avoid overflow */ &&
5696 SI->getNumCases() == (1ULL << NumUnknownBits)) {
5698 return true;
5699 }
5700
5701 if (DeadCases.empty())
5702 return false;
5703
5705 for (ConstantInt *DeadCase : DeadCases) {
5706 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5707 assert(CaseI != SI->case_default() &&
5708 "Case was not found. Probably mistake in DeadCases forming.");
5709 // Prune unused values from PHI nodes.
5710 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5711 SIW.removeCase(CaseI);
5712 }
5713
5714 if (DTU) {
5715 std::vector<DominatorTree::UpdateType> Updates;
5716 for (auto *Successor : UniqueSuccessors)
5717 if (NumPerSuccessorCases[Successor] == 0)
5718 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5719 DTU->applyUpdates(Updates);
5720 }
5721
5722 return true;
5723}
5724
5725/// If BB would be eligible for simplification by
5726/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5727/// by an unconditional branch), look at the phi node for BB in the successor
5728/// block and see if the incoming value is equal to CaseValue. If so, return
5729/// the phi node, and set PhiIndex to BB's index in the phi node.
5731 BasicBlock *BB, int *PhiIndex) {
5732 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5733 return nullptr; // BB must be empty to be a candidate for simplification.
5734 if (!BB->getSinglePredecessor())
5735 return nullptr; // BB must be dominated by the switch.
5736
5737 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5738 if (!Branch || !Branch->isUnconditional())
5739 return nullptr; // Terminator must be unconditional branch.
5740
5741 BasicBlock *Succ = Branch->getSuccessor(0);
5742
5743 for (PHINode &PHI : Succ->phis()) {
5744 int Idx = PHI.getBasicBlockIndex(BB);
5745 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5746
5747 Value *InValue = PHI.getIncomingValue(Idx);
5748 if (InValue != CaseValue)
5749 continue;
5750
5751 *PhiIndex = Idx;
5752 return &PHI;
5753 }
5754
5755 return nullptr;
5756}
5757
5758/// Try to forward the condition of a switch instruction to a phi node
5759/// dominated by the switch, if that would mean that some of the destination
5760/// blocks of the switch can be folded away. Return true if a change is made.
5762 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5763
5764 ForwardingNodesMap ForwardingNodes;
5765 BasicBlock *SwitchBlock = SI->getParent();
5766 bool Changed = false;
5767 for (const auto &Case : SI->cases()) {
5768 ConstantInt *CaseValue = Case.getCaseValue();
5769 BasicBlock *CaseDest = Case.getCaseSuccessor();
5770
5771 // Replace phi operands in successor blocks that are using the constant case
5772 // value rather than the switch condition variable:
5773 // switchbb:
5774 // switch i32 %x, label %default [
5775 // i32 17, label %succ
5776 // ...
5777 // succ:
5778 // %r = phi i32 ... [ 17, %switchbb ] ...
5779 // -->
5780 // %r = phi i32 ... [ %x, %switchbb ] ...
5781
5782 for (PHINode &Phi : CaseDest->phis()) {
5783 // This only works if there is exactly 1 incoming edge from the switch to
5784 // a phi. If there is >1, that means multiple cases of the switch map to 1
5785 // value in the phi, and that phi value is not the switch condition. Thus,
5786 // this transform would not make sense (the phi would be invalid because
5787 // a phi can't have different incoming values from the same block).
5788 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5789 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5790 count(Phi.blocks(), SwitchBlock) == 1) {
5791 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5792 Changed = true;
5793 }
5794 }
5795
5796 // Collect phi nodes that are indirectly using this switch's case constants.
5797 int PhiIdx;
5798 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5799 ForwardingNodes[Phi].push_back(PhiIdx);
5800 }
5801
5802 for (auto &ForwardingNode : ForwardingNodes) {
5803 PHINode *Phi = ForwardingNode.first;
5804 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5805 if (Indexes.size() < 2)
5806 continue;
5807
5808 for (int Index : Indexes)
5809 Phi->setIncomingValue(Index, SI->getCondition());
5810 Changed = true;
5811 }
5812
5813 return Changed;
5814}
5815
5816/// Return true if the backend will be able to handle
5817/// initializing an array of constants like C.
5819 if (C->isThreadDependent())
5820 return false;
5821 if (C->isDLLImportDependent())
5822 return false;
5823
5824 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5825 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5826 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5827 return false;
5828
5829 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5830 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5831 // materializing the array of constants.
5832 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5833 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5834 return false;
5835 }
5836
5838 return false;
5839
5840 return true;
5841}
5842
5843/// If V is a Constant, return it. Otherwise, try to look up
5844/// its constant value in ConstantPool, returning 0 if it's not there.
5845static Constant *
5848 if (Constant *C = dyn_cast<Constant>(V))
5849 return C;
5850 return ConstantPool.lookup(V);
5851}
5852
5853/// Try to fold instruction I into a constant. This works for
5854/// simple instructions such as binary operations where both operands are
5855/// constant or can be replaced by constants from the ConstantPool. Returns the
5856/// resulting constant on success, 0 otherwise.
5857static Constant *
5860 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5861 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5862 if (!A)
5863 return nullptr;
5864 if (A->isAllOnesValue())
5865 return LookupConstant(Select->getTrueValue(), ConstantPool);
5866 if (A->isNullValue())
5867 return LookupConstant(Select->getFalseValue(), ConstantPool);
5868 return nullptr;
5869 }
5870
5872 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5873 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5874 COps.push_back(A);
5875 else
5876 return nullptr;
5877 }
5878
5879 return ConstantFoldInstOperands(I, COps, DL);
5880}
5881
5882/// Try to determine the resulting constant values in phi nodes
5883/// at the common destination basic block, *CommonDest, for one of the case
5884/// destionations CaseDest corresponding to value CaseVal (0 for the default
5885/// case), of a switch instruction SI.
5886static bool
5888 BasicBlock **CommonDest,
5889 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5890 const DataLayout &DL, const TargetTransformInfo &TTI) {
5891 // The block from which we enter the common destination.
5892 BasicBlock *Pred = SI->getParent();
5893
5894 // If CaseDest is empty except for some side-effect free instructions through
5895 // which we can constant-propagate the CaseVal, continue to its successor.
5897 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5898 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5899 if (I.isTerminator()) {
5900 // If the terminator is a simple branch, continue to the next block.
5901 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5902 return false;
5903 Pred = CaseDest;
5904 CaseDest = I.getSuccessor(0);
5905 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5906 // Instruction is side-effect free and constant.
5907
5908 // If the instruction has uses outside this block or a phi node slot for
5909 // the block, it is not safe to bypass the instruction since it would then
5910 // no longer dominate all its uses.
5911 for (auto &Use : I.uses()) {
5912 User *User = Use.getUser();
5913 if (Instruction *I = dyn_cast<Instruction>(User))
5914 if (I->getParent() == CaseDest)
5915 continue;
5916 if (PHINode *Phi = dyn_cast<PHINode>(User))
5917 if (Phi->getIncomingBlock(Use) == CaseDest)
5918 continue;
5919 return false;
5920 }
5921
5922 ConstantPool.insert(std::make_pair(&I, C));
5923 } else {
5924 break;
5925 }
5926 }
5927
5928 // If we did not have a CommonDest before, use the current one.
5929 if (!*CommonDest)
5930 *CommonDest = CaseDest;
5931 // If the destination isn't the common one, abort.
5932 if (CaseDest != *CommonDest)
5933 return false;
5934
5935 // Get the values for this case from phi nodes in the destination block.
5936 for (PHINode &PHI : (*CommonDest)->phis()) {
5937 int Idx = PHI.getBasicBlockIndex(Pred);
5938 if (Idx == -1)
5939 continue;
5940
5941 Constant *ConstVal =
5942 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5943 if (!ConstVal)
5944 return false;
5945
5946 // Be conservative about which kinds of constants we support.
5947 if (!ValidLookupTableConstant(ConstVal, TTI))
5948 return false;
5949
5950 Res.push_back(std::make_pair(&PHI, ConstVal));
5951 }
5952
5953 return Res.size() > 0;
5954}
5955
5956// Helper function used to add CaseVal to the list of cases that generate
5957// Result. Returns the updated number of cases that generate this result.
5958static size_t mapCaseToResult(ConstantInt *CaseVal,
5959 SwitchCaseResultVectorTy &UniqueResults,
5960 Constant *Result) {
5961 for (auto &I : UniqueResults) {
5962 if (I.first == Result) {
5963 I.second.push_back(CaseVal);
5964 return I.second.size();
5965 }
5966 }
5967 UniqueResults.push_back(
5968 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5969 return 1;
5970}
5971
5972// Helper function that initializes a map containing
5973// results for the PHI node of the common destination block for a switch
5974// instruction. Returns false if multiple PHI nodes have been found or if
5975// there is not a common destination block for the switch.
5977 BasicBlock *&CommonDest,
5978 SwitchCaseResultVectorTy &UniqueResults,
5979 Constant *&DefaultResult,
5980 const DataLayout &DL,
5981 const TargetTransformInfo &TTI,
5982 uintptr_t MaxUniqueResults) {
5983 for (const auto &I : SI->cases()) {
5984 ConstantInt *CaseVal = I.getCaseValue();
5985
5986 // Resulting value at phi nodes for this case value.
5987 SwitchCaseResultsTy Results;
5988 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5989 DL, TTI))
5990 return false;
5991
5992 // Only one value per case is permitted.
5993 if (Results.size() > 1)
5994 return false;
5995
5996 // Add the case->result mapping to UniqueResults.
5997 const size_t NumCasesForResult =
5998 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
5999
6000 // Early out if there are too many cases for this result.
6001 if (NumCasesForResult > MaxSwitchCasesPerResult)
6002 return false;
6003
6004 // Early out if there are too many unique results.
6005 if (UniqueResults.size() > MaxUniqueResults)
6006 return false;
6007
6008 // Check the PHI consistency.
6009 if (!PHI)
6010 PHI = Results[0].first;
6011 else if (PHI != Results[0].first)
6012 return false;
6013 }
6014 // Find the default result value.
6016 BasicBlock *DefaultDest = SI->getDefaultDest();
6017 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6018 DL, TTI);
6019 // If the default value is not found abort unless the default destination
6020 // is unreachable.
6021 DefaultResult =
6022 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6023 if ((!DefaultResult &&
6024 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6025 return false;
6026
6027 return true;
6028}
6029
6030// Helper function that checks if it is possible to transform a switch with only
6031// two cases (or two cases + default) that produces a result into a select.
6032// TODO: Handle switches with more than 2 cases that map to the same result.
6033static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6034 Constant *DefaultResult, Value *Condition,
6035 IRBuilder<> &Builder) {
6036 // If we are selecting between only two cases transform into a simple
6037 // select or a two-way select if default is possible.
6038 // Example:
6039 // switch (a) { %0 = icmp eq i32 %a, 10
6040 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6041 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6042 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6043 // }
6044 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6045 ResultVector[1].second.size() == 1) {
6046 ConstantInt *FirstCase = ResultVector[0].second[0];
6047 ConstantInt *SecondCase = ResultVector[1].second[0];
6048 Value *SelectValue = ResultVector[1].first;
6049 if (DefaultResult) {
6050 Value *ValueCompare =
6051 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6052 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6053 DefaultResult, "switch.select");
6054 }
6055 Value *ValueCompare =
6056 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6057 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6058 SelectValue, "switch.select");
6059 }
6060
6061 // Handle the degenerate case where two cases have the same result value.
6062 if (ResultVector.size() == 1 && DefaultResult) {
6063 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6064 unsigned CaseCount = CaseValues.size();
6065 // n bits group cases map to the same result:
6066 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6067 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6068 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6069 if (isPowerOf2_32(CaseCount)) {
6070 ConstantInt *MinCaseVal = CaseValues[0];
6071 // Find mininal value.
6072 for (auto *Case : CaseValues)
6073 if (Case->getValue().slt(MinCaseVal->getValue()))
6074 MinCaseVal = Case;
6075
6076 // Mark the bits case number touched.
6077 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6078 for (auto *Case : CaseValues)
6079 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6080
6081 // Check if cases with the same result can cover all number
6082 // in touched bits.
6083 if (BitMask.popcount() == Log2_32(CaseCount)) {
6084 if (!MinCaseVal->isNullValue())
6085 Condition = Builder.CreateSub(Condition, MinCaseVal);
6086 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6087 Value *Cmp = Builder.CreateICmpEQ(
6088 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6089 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6090 }
6091 }
6092
6093 // Handle the degenerate case where two cases have the same value.
6094 if (CaseValues.size() == 2) {
6095 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6096 "switch.selectcmp.case1");
6097 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6098 "switch.selectcmp.case2");
6099 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6100 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6101 }
6102 }
6103
6104 return nullptr;
6105}
6106
6107// Helper function to cleanup a switch instruction that has been converted into
6108// a select, fixing up PHI nodes and basic blocks.
6110 Value *SelectValue,
6111 IRBuilder<> &Builder,
6112 DomTreeUpdater *DTU) {
6113 std::vector<DominatorTree::UpdateType> Updates;
6114
6115 BasicBlock *SelectBB = SI->getParent();
6116 BasicBlock *DestBB = PHI->getParent();
6117
6118 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6119 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6120 Builder.CreateBr(DestBB);
6121
6122 // Remove the switch.
6123
6124 PHI->removeIncomingValueIf(
6125 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6126 PHI->addIncoming(SelectValue, SelectBB);
6127
6128 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6129 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6130 BasicBlock *Succ = SI->getSuccessor(i);
6131
6132 if (Succ == DestBB)
6133 continue;
6134 Succ->removePredecessor(SelectBB);
6135 if (DTU && RemovedSuccessors.insert(Succ).second)
6136 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6137 }
6138 SI->eraseFromParent();
6139 if (DTU)
6140 DTU->applyUpdates(Updates);
6141}
6142
6143/// If a switch is only used to initialize one or more phi nodes in a common
6144/// successor block with only two different constant values, try to replace the
6145/// switch with a select. Returns true if the fold was made.
6146static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6147 DomTreeUpdater *DTU, const DataLayout &DL,
6148 const TargetTransformInfo &TTI) {
6149 Value *const Cond = SI->getCondition();
6150 PHINode *PHI = nullptr;
6151 BasicBlock *CommonDest = nullptr;
6152 Constant *DefaultResult;
6153 SwitchCaseResultVectorTy UniqueResults;
6154 // Collect all the cases that will deliver the same value from the switch.
6155 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6156 DL, TTI, /*MaxUniqueResults*/ 2))
6157 return false;
6158
6159 assert(PHI != nullptr && "PHI for value select not found");
6160 Builder.SetInsertPoint(SI);
6161 Value *SelectValue =
6162 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6163 if (!SelectValue)
6164 return false;
6165
6166 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6167 return true;
6168}
6169
6170namespace {
6171
6172/// This class represents a lookup table that can be used to replace a switch.
6173class SwitchLookupTable {
6174public:
6175 /// Create a lookup table to use as a switch replacement with the contents
6176 /// of Values, using DefaultValue to fill any holes in the table.
6177 SwitchLookupTable(
6178 Module &M, uint64_t TableSize, ConstantInt *Offset,
6179 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6180 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6181
6182 /// Build instructions with Builder to retrieve the value at
6183 /// the position given by Index in the lookup table.
6184 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6185
6186 /// Return true if a table with TableSize elements of
6187 /// type ElementType would fit in a target-legal register.
6188 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6189 Type *ElementType);
6190
6191private:
6192 // Depending on the contents of the table, it can be represented in
6193 // different ways.
6194 enum {
6195 // For tables where each element contains the same value, we just have to
6196 // store that single value and return it for each lookup.
6197 SingleValueKind,
6198
6199 // For tables where there is a linear relationship between table index
6200 // and values. We calculate the result with a simple multiplication
6201 // and addition instead of a table lookup.
6202 LinearMapKind,
6203
6204 // For small tables with integer elements, we can pack them into a bitmap
6205 // that fits into a target-legal register. Values are retrieved by
6206 // shift and mask operations.
6207 BitMapKind,
6208
6209 // The table is stored as an array of values. Values are retrieved by load
6210 // instructions from the table.
6211 ArrayKind
6212 } Kind;
6213
6214 // For SingleValueKind, this is the single value.
6215 Constant *SingleValue = nullptr;
6216
6217 // For BitMapKind, this is the bitmap.
6218 ConstantInt *BitMap = nullptr;
6219 IntegerType *BitMapElementTy = nullptr;
6220
6221 // For LinearMapKind, these are the constants used to derive the value.
6222 ConstantInt *LinearOffset = nullptr;
6223 ConstantInt *LinearMultiplier = nullptr;
6224 bool LinearMapValWrapped = false;
6225
6226 // For ArrayKind, this is the array.
6227 GlobalVariable *Array = nullptr;
6228};
6229
6230} // end anonymous namespace
6231
6232SwitchLookupTable::SwitchLookupTable(
6233 Module &M, uint64_t TableSize, ConstantInt *Offset,
6234 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6235 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6236 assert(Values.size() && "Can't build lookup table without values!");
6237 assert(TableSize >= Values.size() && "Can't fit values in table!");
6238
6239 // If all values in the table are equal, this is that value.
6240 SingleValue = Values.begin()->second;
6241
6242 Type *ValueType = Values.begin()->second->getType();
6243
6244 // Build up the table contents.
6245 SmallVector<Constant *, 64> TableContents(TableSize);
6246 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6247 ConstantInt *CaseVal = Values[I].first;
6248 Constant *CaseRes = Values[I].second;
6249 assert(CaseRes->getType() == ValueType);
6250
6251 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6252 TableContents[Idx] = CaseRes;
6253
6254 if (CaseRes != SingleValue)
6255 SingleValue = nullptr;
6256 }
6257
6258 // Fill in any holes in the table with the default result.
6259 if (Values.size() < TableSize) {
6260 assert(DefaultValue &&
6261 "Need a default value to fill the lookup table holes.");
6262 assert(DefaultValue->getType() == ValueType);
6263 for (uint64_t I = 0; I < TableSize; ++I) {
6264 if (!TableContents[I])
6265 TableContents[I] = DefaultValue;
6266 }
6267
6268 if (DefaultValue != SingleValue)
6269 SingleValue = nullptr;
6270 }
6271
6272 // If each element in the table contains the same value, we only need to store
6273 // that single value.
6274 if (SingleValue) {
6275 Kind = SingleValueKind;
6276 return;
6277 }
6278
6279 // Check if we can derive the value with a linear transformation from the
6280 // table index.
6281 if (isa<IntegerType>(ValueType)) {
6282 bool LinearMappingPossible = true;
6283 APInt PrevVal;
6284 APInt DistToPrev;
6285 // When linear map is monotonic and signed overflow doesn't happen on
6286 // maximum index, we can attach nsw on Add and Mul.
6287 bool NonMonotonic = false;
6288 assert(TableSize >= 2 && "Should be a SingleValue table.");
6289 // Check if there is the same distance between two consecutive values.
6290 for (uint64_t I = 0; I < TableSize; ++I) {
6291 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6292 if (!ConstVal) {
6293 // This is an undef. We could deal with it, but undefs in lookup tables
6294 // are very seldom. It's probably not worth the additional complexity.
6295 LinearMappingPossible = false;
6296 break;
6297 }
6298 const APInt &Val = ConstVal->getValue();
6299 if (I != 0) {
6300 APInt Dist = Val - PrevVal;
6301 if (I == 1) {
6302 DistToPrev = Dist;
6303 } else if (Dist != DistToPrev) {
6304 LinearMappingPossible = false;
6305 break;
6306 }
6307 NonMonotonic |=
6308 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6309 }
6310 PrevVal = Val;
6311 }
6312 if (LinearMappingPossible) {
6313 LinearOffset = cast<ConstantInt>(TableContents[0]);
6314 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6315 bool MayWrap = false;
6316 APInt M = LinearMultiplier->getValue();
6317 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6318 LinearMapValWrapped = NonMonotonic || MayWrap;
6319 Kind = LinearMapKind;
6320 ++NumLinearMaps;
6321 return;
6322 }
6323 }
6324
6325 // If the type is integer and the table fits in a register, build a bitmap.
6326 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6327 IntegerType *IT = cast<IntegerType>(ValueType);
6328 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6329 for (uint64_t I = TableSize; I > 0; --I) {
6330 TableInt <<= IT->getBitWidth();
6331 // Insert values into the bitmap. Undef values are set to zero.
6332 if (!isa<UndefValue>(TableContents[I - 1])) {
6333 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6334 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6335 }
6336 }
6337 BitMap = ConstantInt::get(M.getContext(), TableInt);
6338 BitMapElementTy = IT;
6339 Kind = BitMapKind;
6340 ++NumBitMaps;
6341 return;
6342 }
6343
6344 // Store the table in an array.
6345 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6346 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6347
6348 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6349 GlobalVariable::PrivateLinkage, Initializer,
6350 "switch.table." + FuncName);
6351 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6352 // Set the alignment to that of an array items. We will be only loading one
6353 // value out of it.
6354 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6355 Kind = ArrayKind;
6356}
6357
6358Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6359 switch (Kind) {
6360 case SingleValueKind:
6361 return SingleValue;
6362 case LinearMapKind: {
6363 // Derive the result value from the input value.
6364 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6365 false, "switch.idx.cast");
6366 if (!LinearMultiplier->isOne())
6367 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6368 /*HasNUW = */ false,
6369 /*HasNSW = */ !LinearMapValWrapped);
6370
6371 if (!LinearOffset->isZero())
6372 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6373 /*HasNUW = */ false,
6374 /*HasNSW = */ !LinearMapValWrapped);
6375 return Result;
6376 }
6377 case BitMapKind: {
6378 // Type of the bitmap (e.g. i59).
6379 IntegerType *MapTy = BitMap->getIntegerType();
6380
6381 // Cast Index to the same type as the bitmap.
6382 // Note: The Index is <= the number of elements in the table, so
6383 // truncating it to the width of the bitmask is safe.
6384 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6385
6386 // Multiply the shift amount by the element width. NUW/NSW can always be
6387 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6388 // BitMap's bit width.
6389 ShiftAmt = Builder.CreateMul(
6390 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6391 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6392
6393 // Shift down.
6394 Value *DownShifted =
6395 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6396 // Mask off.
6397 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6398 }
6399 case ArrayKind: {
6400 // Make sure the table index will not overflow when treated as signed.
6401 IntegerType *IT = cast<IntegerType>(Index->getType());
6402 uint64_t TableSize =
6403 Array->getInitializer()->getType()->getArrayNumElements();
6404 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6405 Index = Builder.CreateZExt(
6406 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6407 "switch.tableidx.zext");
6408
6409 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6410 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6411 GEPIndices, "switch.gep");
6412 return Builder.CreateLoad(
6413 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6414 "switch.load");
6415 }
6416 }
6417 llvm_unreachable("Unknown lookup table kind!");
6418}
6419
6420bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6421 uint64_t TableSize,
6422 Type *ElementType) {
6423 auto *IT = dyn_cast<IntegerType>(ElementType);
6424 if (!IT)
6425 return false;
6426 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6427 // are <= 15, we could try to narrow the type.
6428
6429 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6430 if (TableSize >= UINT_MAX / IT->getBitWidth())
6431 return false;
6432 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6433}
6434
6436 const DataLayout &DL) {
6437 // Allow any legal type.
6438 if (TTI.isTypeLegal(Ty))
6439 return true;
6440
6441 auto *IT = dyn_cast<IntegerType>(Ty);
6442 if (!IT)
6443 return false;
6444
6445 // Also allow power of 2 integer types that have at least 8 bits and fit in
6446 // a register. These types are common in frontend languages and targets
6447 // usually support loads of these types.
6448 // TODO: We could relax this to any integer that fits in a register and rely
6449 // on ABI alignment and padding in the table to allow the load to be widened.
6450 // Or we could widen the constants and truncate the load.
6451 unsigned BitWidth = IT->getBitWidth();
6452 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6453 DL.fitsInLegalInteger(IT->getBitWidth());
6454}
6455
6456static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6457 // 40% is the default density for building a jump table in optsize/minsize
6458 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6459 // function was based on.
6460 const uint64_t MinDensity = 40;
6461
6462 if (CaseRange >= UINT64_MAX / 100)
6463 return false; // Avoid multiplication overflows below.
6464
6465 return NumCases * 100 >= CaseRange * MinDensity;
6466}
6467
6469 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6470 uint64_t Range = Diff + 1;
6471 if (Range < Diff)
6472 return false; // Overflow.
6473
6474 return isSwitchDense(Values.size(), Range);
6475}
6476
6477/// Determine whether a lookup table should be built for this switch, based on
6478/// the number of cases, size of the table, and the types of the results.
6479// TODO: We could support larger than legal types by limiting based on the
6480// number of loads required and/or table size. If the constants are small we
6481// could use smaller table entries and extend after the load.
6482static bool
6484 const TargetTransformInfo &TTI, const DataLayout &DL,
6485 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6486 if (SI->getNumCases() > TableSize)
6487 return false; // TableSize overflowed.
6488
6489 bool AllTablesFitInRegister = true;
6490 bool HasIllegalType = false;
6491 for (const auto &I : ResultTypes) {
6492 Type *Ty = I.second;
6493
6494 // Saturate this flag to true.
6495 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6496
6497 // Saturate this flag to false.
6498 AllTablesFitInRegister =
6499 AllTablesFitInRegister &&
6500 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6501
6502 // If both flags saturate, we're done. NOTE: This *only* works with
6503 // saturating flags, and all flags have to saturate first due to the
6504 // non-deterministic behavior of iterating over a dense map.
6505 if (HasIllegalType && !AllTablesFitInRegister)
6506 break;
6507 }
6508
6509 // If each table would fit in a register, we should build it anyway.
6510 if (AllTablesFitInRegister)
6511 return true;
6512
6513 // Don't build a table that doesn't fit in-register if it has illegal types.
6514 if (HasIllegalType)
6515 return false;
6516
6517 return isSwitchDense(SI->getNumCases(), TableSize);
6518}
6519
6521 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6522 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6523 const DataLayout &DL, const TargetTransformInfo &TTI) {
6524 if (MinCaseVal.isNullValue())
6525 return true;
6526 if (MinCaseVal.isNegative() ||
6527 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6528 !HasDefaultResults)
6529 return false;
6530 return all_of(ResultTypes, [&](const auto &KV) {
6531 return SwitchLookupTable::WouldFitInRegister(
6532 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6533 KV.second /* ResultType */);
6534 });
6535}
6536
6537/// Try to reuse the switch table index compare. Following pattern:
6538/// \code
6539/// if (idx < tablesize)
6540/// r = table[idx]; // table does not contain default_value
6541/// else
6542/// r = default_value;
6543/// if (r != default_value)
6544/// ...
6545/// \endcode
6546/// Is optimized to:
6547/// \code
6548/// cond = idx < tablesize;
6549/// if (cond)
6550/// r = table[idx];
6551/// else
6552/// r = default_value;
6553/// if (cond)
6554/// ...
6555/// \endcode
6556/// Jump threading will then eliminate the second if(cond).
6558 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6559 Constant *DefaultValue,
6560 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6561 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6562 if (!CmpInst)
6563 return;
6564
6565 // We require that the compare is in the same block as the phi so that jump
6566 // threading can do its work afterwards.
6567 if (CmpInst->getParent() != PhiBlock)
6568 return;
6569
6570 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6571 if (!CmpOp1)
6572 return;
6573
6574 Value *RangeCmp = RangeCheckBranch->getCondition();
6575 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6576 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6577
6578 // Check if the compare with the default value is constant true or false.
6580 DefaultValue, CmpOp1, true);
6581 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6582 return;
6583
6584 // Check if the compare with the case values is distinct from the default
6585 // compare result.
6586 for (auto ValuePair : Values) {
6588 ValuePair.second, CmpOp1, true);
6589 if (!CaseConst || CaseConst == DefaultConst ||
6590 (CaseConst != TrueConst && CaseConst != FalseConst))
6591 return;
6592 }
6593
6594 // Check if the branch instruction dominates the phi node. It's a simple
6595 // dominance check, but sufficient for our needs.
6596 // Although this check is invariant in the calling loops, it's better to do it
6597 // at this late stage. Practically we do it at most once for a switch.
6598 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6599 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6600 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6601 return;
6602 }
6603
6604 if (DefaultConst == FalseConst) {
6605 // The compare yields the same result. We can replace it.
6606 CmpInst->replaceAllUsesWith(RangeCmp);
6607 ++NumTableCmpReuses;
6608 } else {
6609 // The compare yields the same result, just inverted. We can replace it.
6610 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6611 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6612 RangeCheckBranch->getIterator());
6613 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6614 ++NumTableCmpReuses;
6615 }
6616}
6617
6618/// If the switch is only used to initialize one or more phi nodes in a common
6619/// successor block with different constant values, replace the switch with
6620/// lookup tables.
6622 DomTreeUpdater *DTU, const DataLayout &DL,
6623 const TargetTransformInfo &TTI) {
6624 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6625
6626 BasicBlock *BB = SI->getParent();
6627 Function *Fn = BB->getParent();
6628 // Only build lookup table when we have a target that supports it or the
6629 // attribute is not set.
6631 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6632 return false;
6633
6634 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6635 // split off a dense part and build a lookup table for that.
6636
6637 // FIXME: This creates arrays of GEPs to constant strings, which means each
6638 // GEP needs a runtime relocation in PIC code. We should just build one big
6639 // string and lookup indices into that.
6640
6641 // Ignore switches with less than three cases. Lookup tables will not make
6642 // them faster, so we don't analyze them.
6643 if (SI->getNumCases() < 3)
6644 return false;
6645
6646 // Figure out the corresponding result for each case value and phi node in the
6647 // common destination, as well as the min and max case values.
6648 assert(!SI->cases().empty());
6649 SwitchInst::CaseIt CI = SI->case_begin();
6650 ConstantInt *MinCaseVal = CI->getCaseValue();
6651 ConstantInt *MaxCaseVal = CI->getCaseValue();
6652
6653 BasicBlock *CommonDest = nullptr;
6654
6655 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6657
6661
6662 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6663 ConstantInt *CaseVal = CI->getCaseValue();
6664 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6665 MinCaseVal = CaseVal;
6666 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6667 MaxCaseVal = CaseVal;
6668
6669 // Resulting value at phi nodes for this case value.
6671 ResultsTy Results;
6672 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6673 Results, DL, TTI))
6674 return false;
6675
6676 // Append the result from this case to the list for each phi.
6677 for (const auto &I : Results) {
6678 PHINode *PHI = I.first;
6679 Constant *Value = I.second;
6680 if (!ResultLists.count(PHI))
6681 PHIs.push_back(PHI);
6682 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6683 }
6684 }
6685
6686 // Keep track of the result types.
6687 for (PHINode *PHI : PHIs) {
6688 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6689 }
6690
6691 uint64_t NumResults = ResultLists[PHIs[0]].size();
6692
6693 // If the table has holes, we need a constant result for the default case
6694 // or a bitmask that fits in a register.
6695 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6696 bool HasDefaultResults =
6697 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6698 DefaultResultsList, DL, TTI);
6699
6700 for (const auto &I : DefaultResultsList) {
6701 PHINode *PHI = I.first;
6702 Constant *Result = I.second;
6703 DefaultResults[PHI] = Result;
6704 }
6705
6706 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6707 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6708 uint64_t TableSize;
6709 if (UseSwitchConditionAsTableIndex)
6710 TableSize = MaxCaseVal->getLimitedValue() + 1;
6711 else
6712 TableSize =
6713 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6714
6715 bool TableHasHoles = (NumResults < TableSize);
6716 bool NeedMask = (TableHasHoles && !HasDefaultResults);
6717 if (NeedMask) {
6718 // As an extra penalty for the validity test we require more cases.
6719 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6720 return false;
6721 if (!DL.fitsInLegalInteger(TableSize))
6722 return false;
6723 }
6724
6725 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6726 return false;
6727
6728 std::vector<DominatorTree::UpdateType> Updates;
6729
6730 // Compute the maximum table size representable by the integer type we are
6731 // switching upon.
6732 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6733 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6734 assert(MaxTableSize >= TableSize &&
6735 "It is impossible for a switch to have more entries than the max "
6736 "representable value of its input integer type's size.");
6737
6738 // If the default destination is unreachable, or if the lookup table covers
6739 // all values of the conditional variable, branch directly to the lookup table
6740 // BB. Otherwise, check that the condition is within the case range.
6741 bool DefaultIsReachable =
6742 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6743
6744 // Create the BB that does the lookups.
6745 Module &Mod = *CommonDest->getParent()->getParent();
6746 BasicBlock *LookupBB = BasicBlock::Create(
6747 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6748
6749 // Compute the table index value.
6750 Builder.SetInsertPoint(SI);
6751 Value *TableIndex;
6752 ConstantInt *TableIndexOffset;
6753 if (UseSwitchConditionAsTableIndex) {
6754 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6755 TableIndex = SI->getCondition();
6756 } else {
6757 TableIndexOffset = MinCaseVal;
6758 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6759 // we can try to attach nsw.
6760 bool MayWrap = true;
6761 if (!DefaultIsReachable) {
6762 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6763 (void)Res;
6764 }
6765
6766 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6767 "switch.tableidx", /*HasNUW =*/false,
6768 /*HasNSW =*/!MayWrap);
6769 }
6770
6771 BranchInst *RangeCheckBranch = nullptr;
6772
6773 // Grow the table to cover all possible index values to avoid the range check.
6774 // It will use the default result to fill in the table hole later, so make
6775 // sure it exist.
6776 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6777 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6778 // Grow the table shouldn't have any size impact by checking
6779 // WouldFitInRegister.
6780 // TODO: Consider growing the table also when it doesn't fit in a register
6781 // if no optsize is specified.
6782 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6783 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6784 return SwitchLookupTable::WouldFitInRegister(
6785 DL, UpperBound, KV.second /* ResultType */);
6786 })) {
6787 // There may be some case index larger than the UpperBound (unreachable
6788 // case), so make sure the table size does not get smaller.
6789 TableSize = std::max(UpperBound, TableSize);
6790 // The default branch is unreachable after we enlarge the lookup table.
6791 // Adjust DefaultIsReachable to reuse code path.
6792 DefaultIsReachable = false;
6793 }
6794 }
6795
6796 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6797 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6798 Builder.CreateBr(LookupBB);
6799 if (DTU)
6800 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6801 // Note: We call removeProdecessor later since we need to be able to get the
6802 // PHI value for the default case in case we're using a bit mask.
6803 } else {
6804 Value *Cmp = Builder.CreateICmpULT(
6805 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6806 RangeCheckBranch =
6807 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6808 if (DTU)
6809 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6810 }
6811
6812 // Populate the BB that does the lookups.
6813 Builder.SetInsertPoint(LookupBB);
6814
6815 if (NeedMask) {
6816 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6817 // re-purposed to do the hole check, and we create a new LookupBB.
6818 BasicBlock *MaskBB = LookupBB;
6819 MaskBB->setName("switch.hole_check");
6820 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6821 CommonDest->getParent(), CommonDest);
6822
6823 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6824 // unnecessary illegal types.
6825 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6826 APInt MaskInt(TableSizePowOf2, 0);
6827 APInt One(TableSizePowOf2, 1);
6828 // Build bitmask; fill in a 1 bit for every case.
6829 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6830 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6831 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6832 .getLimitedValue();
6833 MaskInt |= One << Idx;
6834 }
6835 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6836
6837 // Get the TableIndex'th bit of the bitmask.
6838 // If this bit is 0 (meaning hole) jump to the default destination,
6839 // else continue with table lookup.
6840 IntegerType *MapTy = TableMask->getIntegerType();
6841 Value *MaskIndex =
6842 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6843 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6844 Value *LoBit = Builder.CreateTrunc(
6845 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6846 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6847 if (DTU) {
6848 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6849 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6850 }
6851 Builder.SetInsertPoint(LookupBB);
6852 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6853 }
6854
6855 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6856 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6857 // do not delete PHINodes here.
6858 SI->getDefaultDest()->removePredecessor(BB,
6859 /*KeepOneInputPHIs=*/true);
6860 if (DTU)
6861 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6862 }
6863
6864 for (PHINode *PHI : PHIs) {
6865 const ResultListTy &ResultList = ResultLists[PHI];
6866
6867 // If using a bitmask, use any value to fill the lookup table holes.
6868 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6869 StringRef FuncName = Fn->getName();
6870 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6871 DL, FuncName);
6872
6873 Value *Result = Table.BuildLookup(TableIndex, Builder);
6874
6875 // Do a small peephole optimization: re-use the switch table compare if
6876 // possible.
6877 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6878 BasicBlock *PhiBlock = PHI->getParent();
6879 // Search for compare instructions which use the phi.
6880 for (auto *User : PHI->users()) {
6881 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6882 }
6883 }
6884
6885 PHI->addIncoming(Result, LookupBB);
6886 }
6887
6888 Builder.CreateBr(CommonDest);
6889 if (DTU)
6890 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6891
6892 // Remove the switch.
6893 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6894 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6895 BasicBlock *Succ = SI->getSuccessor(i);
6896
6897 if (Succ == SI->getDefaultDest())
6898 continue;
6899 Succ->removePredecessor(BB);
6900 if (DTU && RemovedSuccessors.insert(Succ).second)
6901 Updates.push_back({DominatorTree::Delete, BB, Succ});
6902 }
6903 SI->eraseFromParent();
6904
6905 if (DTU)
6906 DTU->applyUpdates(Updates);
6907
6908 ++NumLookupTables;
6909 if (NeedMask)
6910 ++NumLookupTablesHoles;
6911 return true;
6912}
6913
6914/// Try to transform a switch that has "holes" in it to a contiguous sequence
6915/// of cases.
6916///
6917/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6918/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6919///
6920/// This converts a sparse switch into a dense switch which allows better
6921/// lowering and could also allow transforming into a lookup table.
6922static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6923 const DataLayout &DL,
6924 const TargetTransformInfo &TTI) {
6925 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6926 if (CondTy->getIntegerBitWidth() > 64 ||
6927 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6928 return false;
6929 // Only bother with this optimization if there are more than 3 switch cases;
6930 // SDAG will only bother creating jump tables for 4 or more cases.
6931 if (SI->getNumCases() < 4)
6932 return false;
6933
6934 // This transform is agnostic to the signedness of the input or case values. We
6935 // can treat the case values as signed or unsigned. We can optimize more common
6936 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6937 // as signed.
6939 for (const auto &C : SI->cases())
6940 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6941 llvm::sort(Values);
6942
6943 // If the switch is already dense, there's nothing useful to do here.
6944 if (isSwitchDense(Values))
6945 return false;
6946
6947 // First, transform the values such that they start at zero and ascend.
6948 int64_t Base = Values[0];
6949 for (auto &V : Values)
6950 V -= (uint64_t)(Base);
6951
6952 // Now we have signed numbers that have been shifted so that, given enough
6953 // precision, there are no negative values. Since the rest of the transform
6954 // is bitwise only, we switch now to an unsigned representation.
6955
6956 // This transform can be done speculatively because it is so cheap - it
6957 // results in a single rotate operation being inserted.
6958
6959 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6960 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6961 // less than 64.
6962 unsigned Shift = 64;
6963 for (auto &V : Values)
6964 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6965 assert(Shift < 64);
6966 if (Shift > 0)
6967 for (auto &V : Values)
6968 V = (int64_t)((uint64_t)V >> Shift);
6969
6970 if (!isSwitchDense(Values))
6971 // Transform didn't create a dense switch.
6972 return false;
6973
6974 // The obvious transform is to shift the switch condition right and emit a
6975 // check that the condition actually cleanly divided by GCD, i.e.
6976 // C & (1 << Shift - 1) == 0
6977 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6978 //
6979 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6980 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6981 // are nonzero then the switch condition will be very large and will hit the
6982 // default case.
6983
6984 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6985 Builder.SetInsertPoint(SI);
6986 Value *Sub =
6987 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6988 Value *Rot = Builder.CreateIntrinsic(
6989 Ty, Intrinsic::fshl,
6990 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6991 SI->replaceUsesOfWith(SI->getCondition(), Rot);
6992
6993 for (auto Case : SI->cases()) {
6994 auto *Orig = Case.getCaseValue();
6995 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
6996 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
6997 }
6998 return true;
6999}
7000
7001/// Tries to transform switch of powers of two to reduce switch range.
7002/// For example, switch like:
7003/// switch (C) { case 1: case 2: case 64: case 128: }
7004/// will be transformed to:
7005/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7006///
7007/// This transformation allows better lowering and could allow transforming into
7008/// a lookup table.
7010 const DataLayout &DL,
7011 const TargetTransformInfo &TTI) {
7012 Value *Condition = SI->getCondition();
7013 LLVMContext &Context = SI->getContext();
7014 auto *CondTy = cast<IntegerType>(Condition->getType());
7015
7016 if (CondTy->getIntegerBitWidth() > 64 ||
7017 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7018 return false;
7019
7020 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7021 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7022 {Condition, ConstantInt::getTrue(Context)}),
7024
7025 if (CttzIntrinsicCost > TTI::TCC_Basic)
7026 // Inserting intrinsic is too expensive.
7027 return false;
7028
7029 // Only bother with this optimization if there are more than 3 switch cases.
7030 // SDAG will only bother creating jump tables for 4 or more cases.
7031 if (SI->getNumCases() < 4)
7032 return false;
7033
7034 // We perform this optimization only for switches with
7035 // unreachable default case.
7036 // This assumtion will save us from checking if `Condition` is a power of two.
7037 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7038 return false;
7039
7040 // Check that switch cases are powers of two.
7042 for (const auto &Case : SI->cases()) {
7043 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7044 if (llvm::has_single_bit(CaseValue))
7045 Values.push_back(CaseValue);
7046 else
7047 return false;
7048 }
7049
7050 // isSwichDense requires case values to be sorted.
7051 llvm::sort(Values);
7052 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7053 llvm::countr_zero(Values.front()) + 1))
7054 // Transform is unable to generate dense switch.
7055 return false;
7056
7057 Builder.SetInsertPoint(SI);
7058
7059 // Replace each case with its trailing zeros number.
7060 for (auto &Case : SI->cases()) {
7061 auto *OrigValue = Case.getCaseValue();
7062 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7063 OrigValue->getValue().countr_zero()));
7064 }
7065
7066 // Replace condition with its trailing zeros number.
7067 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7068 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7069
7070 SI->setCondition(ConditionTrailingZeros);
7071
7072 return true;
7073}
7074
7075bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7076 BasicBlock *BB = SI->getParent();
7077
7078 if (isValueEqualityComparison(SI)) {
7079 // If we only have one predecessor, and if it is a branch on this value,
7080 // see if that predecessor totally determines the outcome of this switch.
7081 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7082 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7083 return requestResimplify();
7084
7085 Value *Cond = SI->getCondition();
7086 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7087 if (SimplifySwitchOnSelect(SI, Select))
7088 return requestResimplify();
7089
7090 // If the block only contains the switch, see if we can fold the block
7091 // away into any preds.
7092 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7093 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7094 return requestResimplify();
7095 }
7096
7097 // Try to transform the switch into an icmp and a branch.
7098 // The conversion from switch to comparison may lose information on
7099 // impossible switch values, so disable it early in the pipeline.
7100 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7101 return requestResimplify();
7102
7103 // Remove unreachable cases.
7104 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7105 return requestResimplify();
7106
7107 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7108 return requestResimplify();
7109
7110 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7111 return requestResimplify();
7112
7113 // The conversion from switch to lookup tables results in difficult-to-analyze
7114 // code and makes pruning branches much harder. This is a problem if the
7115 // switch expression itself can still be restricted as a result of inlining or
7116 // CVP. Therefore, only apply this transformation during late stages of the
7117 // optimisation pipeline.
7118 if (Options.ConvertSwitchToLookupTable &&
7119 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7120 return requestResimplify();
7121
7122 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7123 return requestResimplify();
7124
7125 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7126 return requestResimplify();
7127
7128 if (HoistCommon &&
7129 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7130 return requestResimplify();
7131
7132 return false;
7133}
7134
7135bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7136 BasicBlock *BB = IBI->getParent();
7137 bool Changed = false;
7138
7139 // Eliminate redundant destinations.
7142 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7143 BasicBlock *Dest = IBI->getDestination(i);
7144 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7145 if (!Dest->hasAddressTaken())
7146 RemovedSuccs.insert(Dest);
7147 Dest->removePredecessor(BB);
7148 IBI->removeDestination(i);
7149 --i;
7150 --e;
7151 Changed = true;
7152 }
7153 }
7154
7155 if (DTU) {
7156 std::vector<DominatorTree::UpdateType> Updates;
7157 Updates.reserve(RemovedSuccs.size());
7158 for (auto *RemovedSucc : RemovedSuccs)
7159 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7160 DTU->applyUpdates(Updates);
7161 }
7162
7163 if (IBI->getNumDestinations() == 0) {
7164 // If the indirectbr has no successors, change it to unreachable.
7165 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7167 return true;
7168 }
7169
7170 if (IBI->getNumDestinations() == 1) {
7171 // If the indirectbr has one successor, change it to a direct branch.
7174 return true;
7175 }
7176
7177 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7178 if (SimplifyIndirectBrOnSelect(IBI, SI))
7179 return requestResimplify();
7180 }
7181 return Changed;
7182}
7183
7184/// Given an block with only a single landing pad and a unconditional branch
7185/// try to find another basic block which this one can be merged with. This
7186/// handles cases where we have multiple invokes with unique landing pads, but
7187/// a shared handler.
7188///
7189/// We specifically choose to not worry about merging non-empty blocks
7190/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7191/// practice, the optimizer produces empty landing pad blocks quite frequently
7192/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7193/// sinking in this file)
7194///
7195/// This is primarily a code size optimization. We need to avoid performing
7196/// any transform which might inhibit optimization (such as our ability to
7197/// specialize a particular handler via tail commoning). We do this by not
7198/// merging any blocks which require us to introduce a phi. Since the same
7199/// values are flowing through both blocks, we don't lose any ability to
7200/// specialize. If anything, we make such specialization more likely.
7201///
7202/// TODO - This transformation could remove entries from a phi in the target
7203/// block when the inputs in the phi are the same for the two blocks being
7204/// merged. In some cases, this could result in removal of the PHI entirely.
7206 BasicBlock *BB, DomTreeUpdater *DTU) {
7207 auto Succ = BB->getUniqueSuccessor();
7208 assert(Succ);
7209 // If there's a phi in the successor block, we'd likely have to introduce
7210 // a phi into the merged landing pad block.
7211 if (isa<PHINode>(*Succ->begin()))
7212 return false;
7213
7214 for (BasicBlock *OtherPred : predecessors(Succ)) {
7215 if (BB == OtherPred)
7216 continue;
7217 BasicBlock::iterator I = OtherPred->begin();
7218 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7219 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7220 continue;
7221 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7222 ;
7223 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7224 if (!BI2 || !BI2->isIdenticalTo(BI))
7225 continue;
7226
7227 std::vector<DominatorTree::UpdateType> Updates;
7228
7229 // We've found an identical block. Update our predecessors to take that
7230 // path instead and make ourselves dead.
7232 for (BasicBlock *Pred : UniquePreds) {
7233 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7234 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7235 "unexpected successor");
7236 II->setUnwindDest(OtherPred);
7237 if (DTU) {
7238 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7239 Updates.push_back({DominatorTree::Delete, Pred, BB});
7240 }
7241 }
7242
7243 // The debug info in OtherPred doesn't cover the merged control flow that
7244 // used to go through BB. We need to delete it or update it.
7245 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7246 if (isa<DbgInfoIntrinsic>(Inst))
7247 Inst.eraseFromParent();
7248
7250 for (BasicBlock *Succ : UniqueSuccs) {
7251 Succ->removePredecessor(BB);
7252 if (DTU)
7253 Updates.push_back({DominatorTree::Delete, BB, Succ});
7254 }
7255
7256 IRBuilder<> Builder(BI);
7257 Builder.CreateUnreachable();
7258 BI->eraseFromParent();
7259 if (DTU)
7260 DTU->applyUpdates(Updates);
7261 return true;
7262 }
7263 return false;
7264}
7265
7266bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7267 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7268 : simplifyCondBranch(Branch, Builder);
7269}
7270
7271bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7272 IRBuilder<> &Builder) {
7273 BasicBlock *BB = BI->getParent();
7274 BasicBlock *Succ = BI->getSuccessor(0);
7275
7276 // If the Terminator is the only non-phi instruction, simplify the block.
7277 // If LoopHeader is provided, check if the block or its successor is a loop
7278 // header. (This is for early invocations before loop simplify and
7279 // vectorization to keep canonical loop forms for nested loops. These blocks
7280 // can be eliminated when the pass is invoked later in the back-end.)
7281 // Note that if BB has only one predecessor then we do not introduce new
7282 // backedge, so we can eliminate BB.
7283 bool NeedCanonicalLoop =
7284 Options.NeedCanonicalLoop &&
7285 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7286 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7288 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7289 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7290 return true;
7291
7292 // If the only instruction in the block is a seteq/setne comparison against a
7293 // constant, try to simplify the block.
7294 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7295 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7296 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7297 ;
7298 if (I->isTerminator() &&
7299 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7300 return true;
7301 }
7302
7303 // See if we can merge an empty landing pad block with another which is
7304 // equivalent.
7305 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7306 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7307 ;
7308 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7309 return true;
7310 }
7311
7312 // If this basic block is ONLY a compare and a branch, and if a predecessor
7313 // branches to us and our successor, fold the comparison into the
7314 // predecessor and use logical operations to update the incoming value
7315 // for PHI nodes in common successor.
7316 if (Options.SpeculateBlocks &&
7317 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7318 Options.BonusInstThreshold))
7319 return requestResimplify();
7320 return false;
7321}
7322
7324 BasicBlock *PredPred = nullptr;
7325 for (auto *P : predecessors(BB)) {
7326 BasicBlock *PPred = P->getSinglePredecessor();
7327 if (!PPred || (PredPred && PredPred != PPred))
7328 return nullptr;
7329 PredPred = PPred;
7330 }
7331 return PredPred;
7332}
7333
7334bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7335 assert(
7336 !isa<ConstantInt>(BI->getCondition()) &&
7337 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7338 "Tautological conditional branch should have been eliminated already.");
7339
7340 BasicBlock *BB = BI->getParent();
7341 if (!Options.SimplifyCondBranch ||
7342 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7343 return false;
7344
7345 // Conditional branch
7346 if (isValueEqualityComparison(BI)) {
7347 // If we only have one predecessor, and if it is a branch on this value,
7348 // see if that predecessor totally determines the outcome of this
7349 // switch.
7350 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7351 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7352 return requestResimplify();
7353
7354 // This block must be empty, except for the setcond inst, if it exists.
7355 // Ignore dbg and pseudo intrinsics.
7356 auto I = BB->instructionsWithoutDebug(true).begin();
7357 if (&*I == BI) {
7358 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7359 return requestResimplify();
7360 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7361 ++I;
7362 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7363 return requestResimplify();
7364 }
7365 }
7366
7367 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7368 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7369 return true;
7370
7371 // If this basic block has dominating predecessor blocks and the dominating
7372 // blocks' conditions imply BI's condition, we know the direction of BI.
7373 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7374 if (Imp) {
7375 // Turn this into a branch on constant.
7376 auto *OldCond = BI->getCondition();
7377 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7378 : ConstantInt::getFalse(BB->getContext());
7379 BI->setCondition(TorF);
7381 return requestResimplify();
7382 }
7383
7384 // If this basic block is ONLY a compare and a branch, and if a predecessor
7385 // branches to us and one of our successors, fold the comparison into the
7386 // predecessor and use logical operations to pick the right destination.
7387 if (Options.SpeculateBlocks &&
7388 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7389 Options.BonusInstThreshold))
7390 return requestResimplify();
7391
7392 // We have a conditional branch to two blocks that are only reachable
7393 // from BI. We know that the condbr dominates the two blocks, so see if
7394 // there is any identical code in the "then" and "else" blocks. If so, we
7395 // can hoist it up to the branching block.
7396 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7397 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7398 if (HoistCommon && hoistCommonCodeFromSuccessors(
7399 BI->getParent(), !Options.HoistCommonInsts))
7400 return requestResimplify();
7401 } else {
7402 // If Successor #1 has multiple preds, we may be able to conditionally
7403 // execute Successor #0 if it branches to Successor #1.
7404 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7405 if (Succ0TI->getNumSuccessors() == 1 &&
7406 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7407 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7408 return requestResimplify();
7409 }
7410 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7411 // If Successor #0 has multiple preds, we may be able to conditionally
7412 // execute Successor #1 if it branches to Successor #0.
7413 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7414 if (Succ1TI->getNumSuccessors() == 1 &&
7415 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7416 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7417 return requestResimplify();
7418 }
7419
7420 // If this is a branch on something for which we know the constant value in
7421 // predecessors (e.g. a phi node in the current block), thread control
7422 // through this block.
7424 return requestResimplify();
7425
7426 // Scan predecessor blocks for conditional branches.
7427 for (BasicBlock *Pred : predecessors(BB))
7428 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7429 if (PBI != BI && PBI->isConditional())
7430 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7431 return requestResimplify();
7432
7433 // Look for diamond patterns.
7434 if (MergeCondStores)
7436 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7437 if (PBI != BI && PBI->isConditional())
7438 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7439 return requestResimplify();
7440
7441 return false;
7442}
7443
7444/// Check if passing a value to an instruction will cause undefined behavior.
7445static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7446 Constant *C = dyn_cast<Constant>(V);
7447 if (!C)
7448 return false;
7449
7450 if (I->use_empty())
7451 return false;
7452
7453 if (C->isNullValue() || isa<UndefValue>(C)) {
7454 // Only look at the first use, avoid hurting compile time with long uselists
7455 auto *Use = cast<Instruction>(*I->user_begin());
7456 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7457 // before I in the block. The latter two can be the case if Use is a PHI
7458 // node.
7459 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7460 return false;
7461
7462 // Now make sure that there are no instructions in between that can alter
7463 // control flow (eg. calls)
7464 auto InstrRange =
7465 make_range(std::next(I->getIterator()), Use->getIterator());
7466 if (any_of(InstrRange, [](Instruction &I) {
7468 }))
7469 return false;
7470
7471 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7472 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7473 if (GEP->getPointerOperand() == I) {
7474 // The current base address is null, there are four cases to consider:
7475 // getelementptr (TY, null, 0) -> null
7476 // getelementptr (TY, null, not zero) -> may be modified
7477 // getelementptr inbounds (TY, null, 0) -> null
7478 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7479 // undefined?
7480 if (!GEP->hasAllZeroIndices() &&
7481 (!GEP->isInBounds() ||
7482 NullPointerIsDefined(GEP->getFunction(),
7483 GEP->getPointerAddressSpace())))
7484 PtrValueMayBeModified = true;
7485 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7486 }
7487
7488 // Look through return.
7489 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7490 bool HasNoUndefAttr =
7491 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7492 // Return undefined to a noundef return value is undefined.
7493 if (isa<UndefValue>(C) && HasNoUndefAttr)
7494 return true;
7495 // Return null to a nonnull+noundef return value is undefined.
7496 if (C->isNullValue() && HasNoUndefAttr &&
7497 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7498 return !PtrValueMayBeModified;
7499 }
7500 }
7501
7502 // Look through bitcasts.
7503 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7504 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7505
7506 // Load from null is undefined.
7507 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7508 if (!LI->isVolatile())
7509 return !NullPointerIsDefined(LI->getFunction(),
7510 LI->getPointerAddressSpace());
7511
7512 // Store to null is undefined.
7513 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7514 if (!SI->isVolatile())
7515 return (!NullPointerIsDefined(SI->getFunction(),
7516 SI->getPointerAddressSpace())) &&
7517 SI->getPointerOperand() == I;
7518
7519 if (auto *CB = dyn_cast<CallBase>(Use)) {
7520 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7521 return false;
7522 // A call to null is undefined.
7523 if (CB->getCalledOperand() == I)
7524 return true;
7525
7526 if (C->isNullValue()) {
7527 for (const llvm::Use &Arg : CB->args())
7528 if (Arg == I) {
7529 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7530 if (CB->isPassingUndefUB(ArgIdx) &&
7531 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7532 // Passing null to a nonnnull+noundef argument is undefined.
7533 return !PtrValueMayBeModified;
7534 }
7535 }
7536 } else if (isa<UndefValue>(C)) {
7537 // Passing undef to a noundef argument is undefined.
7538 for (const llvm::Use &Arg : CB->args())
7539 if (Arg == I) {
7540 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7541 if (CB->isPassingUndefUB(ArgIdx)) {
7542 // Passing undef to a noundef argument is undefined.
7543 return true;
7544 }
7545 }
7546 }
7547 }
7548 }
7549 return false;
7550}
7551
7552/// If BB has an incoming value that will always trigger undefined behavior
7553/// (eg. null pointer dereference), remove the branch leading here.
7555 DomTreeUpdater *DTU,
7556 AssumptionCache *AC) {
7557 for (PHINode &PHI : BB->phis())
7558 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7559 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7560 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7561 Instruction *T = Predecessor->getTerminator();
7562 IRBuilder<> Builder(T);
7563 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7564 BB->removePredecessor(Predecessor);
7565 // Turn unconditional branches into unreachables and remove the dead
7566 // destination from conditional branches.
7567 if (BI->isUnconditional())
7568 Builder.CreateUnreachable();
7569 else {
7570 // Preserve guarding condition in assume, because it might not be
7571 // inferrable from any dominating condition.
7572 Value *Cond = BI->getCondition();
7573 CallInst *Assumption;
7574 if (BI->getSuccessor(0) == BB)
7575 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7576 else
7577 Assumption = Builder.CreateAssumption(Cond);
7578 if (AC)
7579 AC->registerAssumption(cast<AssumeInst>(Assumption));
7580 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7581 : BI->getSuccessor(0));
7582 }
7583 BI->eraseFromParent();
7584 if (DTU)
7585 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7586 return true;
7587 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7588 // Redirect all branches leading to UB into
7589 // a newly created unreachable block.
7590 BasicBlock *Unreachable = BasicBlock::Create(
7591 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7592 Builder.SetInsertPoint(Unreachable);
7593 // The new block contains only one instruction: Unreachable
7594 Builder.CreateUnreachable();
7595 for (const auto &Case : SI->cases())
7596 if (Case.getCaseSuccessor() == BB) {
7597 BB->removePredecessor(Predecessor);
7598 Case.setSuccessor(Unreachable);
7599 }
7600 if (SI->getDefaultDest() == BB) {
7601 BB->removePredecessor(Predecessor);
7602 SI->setDefaultDest(Unreachable);
7603 }
7604
7605 if (DTU)
7606 DTU->applyUpdates(
7607 { { DominatorTree::Insert, Predecessor, Unreachable },
7608 { DominatorTree::Delete, Predecessor, BB } });
7609 return true;
7610 }
7611 }
7612
7613 return false;
7614}
7615
7616bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7617 bool Changed = false;
7618
7619 assert(BB && BB->getParent() && "Block not embedded in function!");
7620 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7621
7622 // Remove basic blocks that have no predecessors (except the entry block)...
7623 // or that just have themself as a predecessor. These are unreachable.
7624 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7625 BB->getSinglePredecessor() == BB) {
7626 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7627 DeleteDeadBlock(BB, DTU);
7628 return true;
7629 }
7630
7631 // Check to see if we can constant propagate this terminator instruction
7632 // away...
7633 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7634 /*TLI=*/nullptr, DTU);
7635
7636 // Check for and eliminate duplicate PHI nodes in this block.
7637 Changed |= EliminateDuplicatePHINodes(BB);
7638
7639 // Check for and remove branches that will always cause undefined behavior.
7641 return requestResimplify();
7642
7643 // Merge basic blocks into their predecessor if there is only one distinct
7644 // pred, and if there is only one distinct successor of the predecessor, and
7645 // if there are no PHI nodes.
7646 if (MergeBlockIntoPredecessor(BB, DTU))
7647 return true;
7648
7649 if (SinkCommon && Options.SinkCommonInsts)
7650 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7651 MergeCompatibleInvokes(BB, DTU)) {
7652 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7653 // so we may now how duplicate PHI's.
7654 // Let's rerun EliminateDuplicatePHINodes() first,
7655 // before FoldTwoEntryPHINode() potentially converts them into select's,
7656 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7657 return true;
7658 }
7659
7660 IRBuilder<> Builder(BB);
7661
7662 if (Options.SpeculateBlocks &&
7663 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7664 // If there is a trivial two-entry PHI node in this basic block, and we can
7665 // eliminate it, do so now.
7666 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7667 if (PN->getNumIncomingValues() == 2)
7668 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7669 return true;
7670 }
7671
7673 Builder.SetInsertPoint(Terminator);
7674 switch (Terminator->getOpcode()) {
7675 case Instruction::Br:
7676 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7677 break;
7678 case Instruction::Resume:
7679 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7680 break;
7681 case Instruction::CleanupRet:
7682 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7683 break;
7684 case Instruction::Switch:
7685 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7686 break;
7687 case Instruction::Unreachable:
7688 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7689 break;
7690 case Instruction::IndirectBr:
7691 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7692 break;
7693 }
7694
7695 return Changed;
7696}
7697
7698bool SimplifyCFGOpt::run(BasicBlock *BB) {
7699 bool Changed = false;
7700
7701 // Repeated simplify BB as long as resimplification is requested.
7702 do {
7703 Resimplify = false;
7704
7705 // Perform one round of simplifcation. Resimplify flag will be set if
7706 // another iteration is requested.
7707 Changed |= simplifyOnce(BB);
7708 } while (Resimplify);
7709
7710 return Changed;
7711}
7712
7715 ArrayRef<WeakVH> LoopHeaders) {
7716 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7717 Options)
7718 .run(BB);
7719}
#define Fail
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< Instruction *, SmallVector< Value *, 4 > > &PHIOperands)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:335
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
const Instruction & front() const
Definition: BasicBlock.h:453
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:474
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:490
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:324
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:712
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:672
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:478
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:613
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:509
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:889
BasicBlock * getBasicBlock() const
Definition: Constants.h:918
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
bool cannotMerge() const
Determine if the call cannot be tail merged.
Definition: InstrTypes.h:2280
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2402
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isNegative() const
Definition: Constants.h:200
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:255
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:184
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool hasPostDomTree() const
Returns true if it holds a PostDominatorTree.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
iterator begin()
Definition: Function.h:799
size_t size() const
Definition: Function.h:804
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1143
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:551
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1803
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:84
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:86
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1707
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:935
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
BasicBlock * getUnwindDest() const
void setNormalDest(BasicBlock *B)
void setUnwindDest(BasicBlock *B)
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Align getAlign() const
Definition: Instructions.h:369
bool isSimple() const
Definition: Instructions.h:406
Value * getValueOperand()
Definition: Instructions.h:414
bool isUnordered() const
Definition: Instructions.h:408
Value * getPointerOperand()
Definition: Instructions.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
bool user_empty() const
Definition: Value.h:385
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:466
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:809
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1898
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:539
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void RemapDbgVariableRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:285
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:693
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1119
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2043
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapDbgVariableRecord(Module *M, DbgVariableRecord *V, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:273
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3159
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:264
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3327
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3572
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4050
auto max_element(R &&Range)
Definition: STLExtras.h:1986
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1486
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254