LLVM 19.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
69#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <climits>
80#include <cstddef>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "simplifycfg"
94
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
121 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122 cl::init(20),
123 cl::desc("Allow reordering across at most this many "
124 "instructions when hoisting"));
125
126static cl::opt<bool>
127 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
128 cl::desc("Sink common instructions down to the end block"));
129
131 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
132 cl::desc("Hoist conditional stores if an unconditional store precedes"));
133
135 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
136 cl::desc("Hoist conditional stores even if an unconditional store does not "
137 "precede - hoist multiple conditional stores into a single "
138 "predicated store"));
139
141 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
142 cl::desc("When merging conditional stores, do so even if the resultant "
143 "basic blocks are unlikely to be if-converted as a result"));
144
146 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
147 cl::desc("Allow exactly one expensive instruction to be speculatively "
148 "executed"));
149
151 "max-speculation-depth", cl::Hidden, cl::init(10),
152 cl::desc("Limit maximum recursion depth when calculating costs of "
153 "speculatively executed instructions"));
154
155static cl::opt<int>
156 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157 cl::init(10),
158 cl::desc("Max size of a block which is still considered "
159 "small enough to thread through"));
160
161// Two is chosen to allow one negation and a logical combine.
163 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164 cl::init(2),
165 cl::desc("Maximum cost of combining conditions when "
166 "folding branches"));
167
169 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170 cl::init(2),
171 cl::desc("Multiplier to apply to threshold when determining whether or not "
172 "to fold branch to common destination when vector operations are "
173 "present"));
174
176 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
177 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
178
180 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
181 cl::desc("Limit cases to analyze when converting a switch to select"));
182
183STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184STATISTIC(NumLinearMaps,
185 "Number of switch instructions turned into linear mapping");
186STATISTIC(NumLookupTables,
187 "Number of switch instructions turned into lookup tables");
189 NumLookupTablesHoles,
190 "Number of switch instructions turned into lookup tables (holes checked)");
191STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192STATISTIC(NumFoldValueComparisonIntoPredecessors,
193 "Number of value comparisons folded into predecessor basic blocks");
194STATISTIC(NumFoldBranchToCommonDest,
195 "Number of branches folded into predecessor basic block");
197 NumHoistCommonCode,
198 "Number of common instruction 'blocks' hoisted up to the begin block");
199STATISTIC(NumHoistCommonInstrs,
200 "Number of common instructions hoisted up to the begin block");
201STATISTIC(NumSinkCommonCode,
202 "Number of common instruction 'blocks' sunk down to the end block");
203STATISTIC(NumSinkCommonInstrs,
204 "Number of common instructions sunk down to the end block");
205STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206STATISTIC(NumInvokes,
207 "Number of invokes with empty resume blocks simplified into calls");
208STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211namespace {
212
213// The first field contains the value that the switch produces when a certain
214// case group is selected, and the second field is a vector containing the
215// cases composing the case group.
216using SwitchCaseResultVectorTy =
218
219// The first field contains the phi node that generates a result of the switch
220// and the second field contains the value generated for a certain case in the
221// switch for that PHI.
222using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
223
224/// ValueEqualityComparisonCase - Represents a case of a switch.
225struct ValueEqualityComparisonCase {
227 BasicBlock *Dest;
228
229 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
230 : Value(Value), Dest(Dest) {}
231
232 bool operator<(ValueEqualityComparisonCase RHS) const {
233 // Comparing pointers is ok as we only rely on the order for uniquing.
234 return Value < RHS.Value;
235 }
236
237 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
238};
239
240class SimplifyCFGOpt {
242 DomTreeUpdater *DTU;
243 const DataLayout &DL;
244 ArrayRef<WeakVH> LoopHeaders;
246 bool Resimplify;
247
248 Value *isValueEqualityComparison(Instruction *TI);
249 BasicBlock *GetValueEqualityComparisonCases(
250 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252 BasicBlock *Pred,
253 IRBuilder<> &Builder);
254 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
255 Instruction *PTI,
256 IRBuilder<> &Builder);
257 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
258 IRBuilder<> &Builder);
259
260 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261 bool simplifySingleResume(ResumeInst *RI);
262 bool simplifyCommonResume(ResumeInst *RI);
263 bool simplifyCleanupReturn(CleanupReturnInst *RI);
264 bool simplifyUnreachable(UnreachableInst *UI);
265 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266 bool simplifyIndirectBr(IndirectBrInst *IBI);
267 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272 IRBuilder<> &Builder);
273
274 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
275 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276 Instruction *TI, Instruction *I1,
277 SmallVectorImpl<Instruction *> &OtherSuccTIs);
278 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
279 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
280 BasicBlock *TrueBB, BasicBlock *FalseBB,
281 uint32_t TrueWeight, uint32_t FalseWeight);
282 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283 const DataLayout &DL);
284 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
285 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
286 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288public:
289 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291 const SimplifyCFGOptions &Opts)
292 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
293 assert((!DTU || !DTU->hasPostDomTree()) &&
294 "SimplifyCFG is not yet capable of maintaining validity of a "
295 "PostDomTree, so don't ask for it.");
296 }
297
298 bool simplifyOnce(BasicBlock *BB);
299 bool run(BasicBlock *BB);
300
301 // Helper to set Resimplify and return change indication.
302 bool requestResimplify() {
303 Resimplify = true;
304 return true;
305 }
306};
307
308} // end anonymous namespace
309
310/// Return true if all the PHI nodes in the basic block \p BB
311/// receive compatible (identical) incoming values when coming from
312/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313///
314/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315/// is provided, and *both* of the values are present in the set,
316/// then they are considered equal.
318 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
319 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
320 assert(IncomingBlocks.size() == 2 &&
321 "Only for a pair of incoming blocks at the time!");
322
323 // FIXME: it is okay if one of the incoming values is an `undef` value,
324 // iff the other incoming value is guaranteed to be a non-poison value.
325 // FIXME: it is okay if one of the incoming values is a `poison` value.
326 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
328 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
329 if (IV0 == IV1)
330 return true;
331 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
332 EquivalenceSet->contains(IV1))
333 return true;
334 return false;
335 });
336}
337
338/// Return true if it is safe to merge these two
339/// terminator instructions together.
340static bool
342 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
343 if (SI1 == SI2)
344 return false; // Can't merge with self!
345
346 // It is not safe to merge these two switch instructions if they have a common
347 // successor, and if that successor has a PHI node, and if *that* PHI node has
348 // conflicting incoming values from the two switch blocks.
349 BasicBlock *SI1BB = SI1->getParent();
350 BasicBlock *SI2BB = SI2->getParent();
351
352 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
353 bool Fail = false;
354 for (BasicBlock *Succ : successors(SI2BB)) {
355 if (!SI1Succs.count(Succ))
356 continue;
357 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
358 continue;
359 Fail = true;
360 if (FailBlocks)
361 FailBlocks->insert(Succ);
362 else
363 break;
364 }
365
366 return !Fail;
367}
368
369/// Update PHI nodes in Succ to indicate that there will now be entries in it
370/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371/// will be the same as those coming in from ExistPred, an existing predecessor
372/// of Succ.
373static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
374 BasicBlock *ExistPred,
375 MemorySSAUpdater *MSSAU = nullptr) {
376 for (PHINode &PN : Succ->phis())
377 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
378 if (MSSAU)
379 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
380 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
381}
382
383/// Compute an abstract "cost" of speculating the given instruction,
384/// which is assumed to be safe to speculate. TCC_Free means cheap,
385/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386/// expensive.
388 const TargetTransformInfo &TTI) {
389 assert((!isa<Instruction>(I) ||
390 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391 "Instruction is not safe to speculatively execute!");
393}
394
395/// If we have a merge point of an "if condition" as accepted above,
396/// return true if the specified value dominates the block. We
397/// don't handle the true generality of domination here, just a special case
398/// which works well enough for us.
399///
400/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401/// see if V (which must be an instruction) and its recursive operands
402/// that do not dominate BB have a combined cost lower than Budget and
403/// are non-trapping. If both are true, the instruction is inserted into the
404/// set and true is returned.
405///
406/// The cost for most non-trapping instructions is defined as 1 except for
407/// Select whose cost is 2.
408///
409/// After this function returns, Cost is increased by the cost of
410/// V plus its non-dominating operands. If that cost is greater than
411/// Budget, false is returned and Cost is undefined.
413 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
415 InstructionCost Budget,
417 unsigned Depth = 0) {
418 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419 // so limit the recursion depth.
420 // TODO: While this recursion limit does prevent pathological behavior, it
421 // would be better to track visited instructions to avoid cycles.
423 return false;
424
425 Instruction *I = dyn_cast<Instruction>(V);
426 if (!I) {
427 // Non-instructions dominate all instructions and can be executed
428 // unconditionally.
429 return true;
430 }
431 BasicBlock *PBB = I->getParent();
432
433 // We don't want to allow weird loops that might have the "if condition" in
434 // the bottom of this block.
435 if (PBB == BB)
436 return false;
437
438 // If this instruction is defined in a block that contains an unconditional
439 // branch to BB, then it must be in the 'conditional' part of the "if
440 // statement". If not, it definitely dominates the region.
441 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
442 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
443 return true;
444
445 // If we have seen this instruction before, don't count it again.
446 if (AggressiveInsts.count(I))
447 return true;
448
449 // Okay, it looks like the instruction IS in the "condition". Check to
450 // see if it's a cheap instruction to unconditionally compute, and if it
451 // only uses stuff defined outside of the condition. If so, hoist it out.
453 return false;
454
456
457 // Allow exactly one instruction to be speculated regardless of its cost
458 // (as long as it is safe to do so).
459 // This is intended to flatten the CFG even if the instruction is a division
460 // or other expensive operation. The speculation of an expensive instruction
461 // is expected to be undone in CodeGenPrepare if the speculation has not
462 // enabled further IR optimizations.
463 if (Cost > Budget &&
464 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
465 !Cost.isValid()))
466 return false;
467
468 // Okay, we can only really hoist these out if their operands do
469 // not take us over the cost threshold.
470 for (Use &Op : I->operands())
471 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
472 Depth + 1))
473 return false;
474 // Okay, it's safe to do this! Remember this instruction.
475 AggressiveInsts.insert(I);
476 return true;
477}
478
479/// Extract ConstantInt from value, looking through IntToPtr
480/// and PointerNullValue. Return NULL if value is not a constant int.
482 // Normal constant int.
483 ConstantInt *CI = dyn_cast<ConstantInt>(V);
484 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
485 DL.isNonIntegralPointerType(V->getType()))
486 return CI;
487
488 // This is some kind of pointer constant. Turn it into a pointer-sized
489 // ConstantInt if possible.
490 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
491
492 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
493 if (isa<ConstantPointerNull>(V))
494 return ConstantInt::get(PtrTy, 0);
495
496 // IntToPtr const int.
497 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
498 if (CE->getOpcode() == Instruction::IntToPtr)
499 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
500 // The constant is very likely to have the right type already.
501 if (CI->getType() == PtrTy)
502 return CI;
503 else
504 return cast<ConstantInt>(
505 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
506 }
507 return nullptr;
508}
509
510namespace {
511
512/// Given a chain of or (||) or and (&&) comparison of a value against a
513/// constant, this will try to recover the information required for a switch
514/// structure.
515/// It will depth-first traverse the chain of comparison, seeking for patterns
516/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517/// representing the different cases for the switch.
518/// Note that if the chain is composed of '||' it will build the set of elements
519/// that matches the comparisons (i.e. any of this value validate the chain)
520/// while for a chain of '&&' it will build the set elements that make the test
521/// fail.
522struct ConstantComparesGatherer {
523 const DataLayout &DL;
524
525 /// Value found for the switch comparison
526 Value *CompValue = nullptr;
527
528 /// Extra clause to be checked before the switch
529 Value *Extra = nullptr;
530
531 /// Set of integers to match in switch
533
534 /// Number of comparisons matched in the and/or chain
535 unsigned UsedICmps = 0;
536
537 /// Construct and compute the result for the comparison instruction Cond
538 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
539 gather(Cond);
540 }
541
542 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543 ConstantComparesGatherer &
544 operator=(const ConstantComparesGatherer &) = delete;
545
546private:
547 /// Try to set the current value used for the comparison, it succeeds only if
548 /// it wasn't set before or if the new value is the same as the old one
549 bool setValueOnce(Value *NewVal) {
550 if (CompValue && CompValue != NewVal)
551 return false;
552 CompValue = NewVal;
553 return (CompValue != nullptr);
554 }
555
556 /// Try to match Instruction "I" as a comparison against a constant and
557 /// populates the array Vals with the set of values that match (or do not
558 /// match depending on isEQ).
559 /// Return false on failure. On success, the Value the comparison matched
560 /// against is placed in CompValue.
561 /// If CompValue is already set, the function is expected to fail if a match
562 /// is found but the value compared to is different.
563 bool matchInstruction(Instruction *I, bool isEQ) {
564 // If this is an icmp against a constant, handle this as one of the cases.
565 ICmpInst *ICI;
566 ConstantInt *C;
567 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
568 (C = GetConstantInt(I->getOperand(1), DL)))) {
569 return false;
570 }
571
572 Value *RHSVal;
573 const APInt *RHSC;
574
575 // Pattern match a special case
576 // (x & ~2^z) == y --> x == y || x == y|2^z
577 // This undoes a transformation done by instcombine to fuse 2 compares.
578 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579 // It's a little bit hard to see why the following transformations are
580 // correct. Here is a CVC3 program to verify them for 64-bit values:
581
582 /*
583 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584 x : BITVECTOR(64);
585 y : BITVECTOR(64);
586 z : BITVECTOR(64);
587 mask : BITVECTOR(64) = BVSHL(ONE, z);
588 QUERY( (y & ~mask = y) =>
589 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
590 );
591 QUERY( (y | mask = y) =>
592 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 );
594 */
595
596 // Please note that each pattern must be a dual implication (<--> or
597 // iff). One directional implication can create spurious matches. If the
598 // implication is only one-way, an unsatisfiable condition on the left
599 // side can imply a satisfiable condition on the right side. Dual
600 // implication ensures that satisfiable conditions are transformed to
601 // other satisfiable conditions and unsatisfiable conditions are
602 // transformed to other unsatisfiable conditions.
603
604 // Here is a concrete example of a unsatisfiable condition on the left
605 // implying a satisfiable condition on the right:
606 //
607 // mask = (1 << z)
608 // (x & ~mask) == y --> (x == y || x == (y | mask))
609 //
610 // Substituting y = 3, z = 0 yields:
611 // (x & -2) == 3 --> (x == 3 || x == 2)
612
613 // Pattern match a special case:
614 /*
615 QUERY( (y & ~mask = y) =>
616 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
617 );
618 */
619 if (match(ICI->getOperand(0),
620 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
621 APInt Mask = ~*RHSC;
622 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623 // If we already have a value for the switch, it has to match!
624 if (!setValueOnce(RHSVal))
625 return false;
626
627 Vals.push_back(C);
628 Vals.push_back(
629 ConstantInt::get(C->getContext(),
630 C->getValue() | Mask));
631 UsedICmps++;
632 return true;
633 }
634 }
635
636 // Pattern match a special case:
637 /*
638 QUERY( (y | mask = y) =>
639 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
640 );
641 */
642 if (match(ICI->getOperand(0),
643 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
644 APInt Mask = *RHSC;
645 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(RHSVal))
648 return false;
649
650 Vals.push_back(C);
651 Vals.push_back(ConstantInt::get(C->getContext(),
652 C->getValue() & ~Mask));
653 UsedICmps++;
654 return true;
655 }
656 }
657
658 // If we already have a value for the switch, it has to match!
659 if (!setValueOnce(ICI->getOperand(0)))
660 return false;
661
662 UsedICmps++;
663 Vals.push_back(C);
664 return ICI->getOperand(0);
665 }
666
667 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668 ConstantRange Span =
670
671 // Shift the range if the compare is fed by an add. This is the range
672 // compare idiom as emitted by instcombine.
673 Value *CandidateVal = I->getOperand(0);
674 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
675 Span = Span.subtract(*RHSC);
676 CandidateVal = RHSVal;
677 }
678
679 // If this is an and/!= check, then we are looking to build the set of
680 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
681 // x != 0 && x != 1.
682 if (!isEQ)
683 Span = Span.inverse();
684
685 // If there are a ton of values, we don't want to make a ginormous switch.
686 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
687 return false;
688 }
689
690 // If we already have a value for the switch, it has to match!
691 if (!setValueOnce(CandidateVal))
692 return false;
693
694 // Add all values from the range to the set
695 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
697
698 UsedICmps++;
699 return true;
700 }
701
702 /// Given a potentially 'or'd or 'and'd together collection of icmp
703 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
704 /// the value being compared, and stick the list constants into the Vals
705 /// vector.
706 /// One "Extra" case is allowed to differ from the other.
707 void gather(Value *V) {
708 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
709
710 // Keep a stack (SmallVector for efficiency) for depth-first traversal
713
714 // Initialize
715 Visited.insert(V);
716 DFT.push_back(V);
717
718 while (!DFT.empty()) {
719 V = DFT.pop_back_val();
720
721 if (Instruction *I = dyn_cast<Instruction>(V)) {
722 // If it is a || (or && depending on isEQ), process the operands.
723 Value *Op0, *Op1;
724 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
725 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
726 if (Visited.insert(Op1).second)
727 DFT.push_back(Op1);
728 if (Visited.insert(Op0).second)
729 DFT.push_back(Op0);
730
731 continue;
732 }
733
734 // Try to match the current instruction
735 if (matchInstruction(I, isEQ))
736 // Match succeed, continue the loop
737 continue;
738 }
739
740 // One element of the sequence of || (or &&) could not be match as a
741 // comparison against the same value as the others.
742 // We allow only one "Extra" case to be checked before the switch
743 if (!Extra) {
744 Extra = V;
745 continue;
746 }
747 // Failed to parse a proper sequence, abort now
748 CompValue = nullptr;
749 break;
750 }
751 }
752};
753
754} // end anonymous namespace
755
757 MemorySSAUpdater *MSSAU = nullptr) {
758 Instruction *Cond = nullptr;
759 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
760 Cond = dyn_cast<Instruction>(SI->getCondition());
761 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
762 if (BI->isConditional())
763 Cond = dyn_cast<Instruction>(BI->getCondition());
764 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
765 Cond = dyn_cast<Instruction>(IBI->getAddress());
766 }
767
768 TI->eraseFromParent();
769 if (Cond)
771}
772
773/// Return true if the specified terminator checks
774/// to see if a value is equal to constant integer value.
775Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
776 Value *CV = nullptr;
777 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
778 // Do not permit merging of large switch instructions into their
779 // predecessors unless there is only one predecessor.
780 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
781 CV = SI->getCondition();
782 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
783 if (BI->isConditional() && BI->getCondition()->hasOneUse())
784 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
785 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
786 CV = ICI->getOperand(0);
787 }
788
789 // Unwrap any lossless ptrtoint cast.
790 if (CV) {
791 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
792 Value *Ptr = PTII->getPointerOperand();
793 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794 CV = Ptr;
795 }
796 }
797 return CV;
798}
799
800/// Given a value comparison instruction,
801/// decode all of the 'cases' that it represents and return the 'default' block.
802BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
803 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
805 Cases.reserve(SI->getNumCases());
806 for (auto Case : SI->cases())
807 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
808 Case.getCaseSuccessor()));
809 return SI->getDefaultDest();
810 }
811
812 BranchInst *BI = cast<BranchInst>(TI);
813 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
814 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
815 Cases.push_back(ValueEqualityComparisonCase(
816 GetConstantInt(ICI->getOperand(1), DL), Succ));
817 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
818}
819
820/// Given a vector of bb/value pairs, remove any entries
821/// in the list that match the specified block.
822static void
824 std::vector<ValueEqualityComparisonCase> &Cases) {
825 llvm::erase(Cases, BB);
826}
827
828/// Return true if there are any keys in C1 that exist in C2 as well.
829static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830 std::vector<ValueEqualityComparisonCase> &C2) {
831 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
832
833 // Make V1 be smaller than V2.
834 if (V1->size() > V2->size())
835 std::swap(V1, V2);
836
837 if (V1->empty())
838 return false;
839 if (V1->size() == 1) {
840 // Just scan V2.
841 ConstantInt *TheVal = (*V1)[0].Value;
842 for (const ValueEqualityComparisonCase &VECC : *V2)
843 if (TheVal == VECC.Value)
844 return true;
845 }
846
847 // Otherwise, just sort both lists and compare element by element.
848 array_pod_sort(V1->begin(), V1->end());
849 array_pod_sort(V2->begin(), V2->end());
850 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
851 while (i1 != e1 && i2 != e2) {
852 if ((*V1)[i1].Value == (*V2)[i2].Value)
853 return true;
854 if ((*V1)[i1].Value < (*V2)[i2].Value)
855 ++i1;
856 else
857 ++i2;
858 }
859 return false;
860}
861
862// Set branch weights on SwitchInst. This sets the metadata if there is at
863// least one non-zero weight.
865 // Check that there is at least one non-zero weight. Otherwise, pass
866 // nullptr to setMetadata which will erase the existing metadata.
867 MDNode *N = nullptr;
868 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
869 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
870 SI->setMetadata(LLVMContext::MD_prof, N);
871}
872
873// Similar to the above, but for branch and select instructions that take
874// exactly 2 weights.
875static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
876 uint32_t FalseWeight) {
877 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
878 // Check that there is at least one non-zero weight. Otherwise, pass
879 // nullptr to setMetadata which will erase the existing metadata.
880 MDNode *N = nullptr;
881 if (TrueWeight || FalseWeight)
882 N = MDBuilder(I->getParent()->getContext())
883 .createBranchWeights(TrueWeight, FalseWeight);
884 I->setMetadata(LLVMContext::MD_prof, N);
885}
886
887/// If TI is known to be a terminator instruction and its block is known to
888/// only have a single predecessor block, check to see if that predecessor is
889/// also a value comparison with the same value, and if that comparison
890/// determines the outcome of this comparison. If so, simplify TI. This does a
891/// very limited form of jump threading.
892bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
893 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
894 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
895 if (!PredVal)
896 return false; // Not a value comparison in predecessor.
897
898 Value *ThisVal = isValueEqualityComparison(TI);
899 assert(ThisVal && "This isn't a value comparison!!");
900 if (ThisVal != PredVal)
901 return false; // Different predicates.
902
903 // TODO: Preserve branch weight metadata, similarly to how
904 // FoldValueComparisonIntoPredecessors preserves it.
905
906 // Find out information about when control will move from Pred to TI's block.
907 std::vector<ValueEqualityComparisonCase> PredCases;
908 BasicBlock *PredDef =
909 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
910 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
911
912 // Find information about how control leaves this block.
913 std::vector<ValueEqualityComparisonCase> ThisCases;
914 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
915 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
916
917 // If TI's block is the default block from Pred's comparison, potentially
918 // simplify TI based on this knowledge.
919 if (PredDef == TI->getParent()) {
920 // If we are here, we know that the value is none of those cases listed in
921 // PredCases. If there are any cases in ThisCases that are in PredCases, we
922 // can simplify TI.
923 if (!ValuesOverlap(PredCases, ThisCases))
924 return false;
925
926 if (isa<BranchInst>(TI)) {
927 // Okay, one of the successors of this condbr is dead. Convert it to a
928 // uncond br.
929 assert(ThisCases.size() == 1 && "Branch can only have one case!");
930 // Insert the new branch.
931 Instruction *NI = Builder.CreateBr(ThisDef);
932 (void)NI;
933
934 // Remove PHI node entries for the dead edge.
935 ThisCases[0].Dest->removePredecessor(PredDef);
936
937 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
938 << "Through successor TI: " << *TI << "Leaving: " << *NI
939 << "\n");
940
942
943 if (DTU)
944 DTU->applyUpdates(
945 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
946
947 return true;
948 }
949
950 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
951 // Okay, TI has cases that are statically dead, prune them away.
953 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
954 DeadCases.insert(PredCases[i].Value);
955
956 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
957 << "Through successor TI: " << *TI);
958
959 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
960 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
961 --i;
962 auto *Successor = i->getCaseSuccessor();
963 if (DTU)
964 ++NumPerSuccessorCases[Successor];
965 if (DeadCases.count(i->getCaseValue())) {
966 Successor->removePredecessor(PredDef);
967 SI.removeCase(i);
968 if (DTU)
969 --NumPerSuccessorCases[Successor];
970 }
971 }
972
973 if (DTU) {
974 std::vector<DominatorTree::UpdateType> Updates;
975 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
976 if (I.second == 0)
977 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
978 DTU->applyUpdates(Updates);
979 }
980
981 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
982 return true;
983 }
984
985 // Otherwise, TI's block must correspond to some matched value. Find out
986 // which value (or set of values) this is.
987 ConstantInt *TIV = nullptr;
988 BasicBlock *TIBB = TI->getParent();
989 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
990 if (PredCases[i].Dest == TIBB) {
991 if (TIV)
992 return false; // Cannot handle multiple values coming to this block.
993 TIV = PredCases[i].Value;
994 }
995 assert(TIV && "No edge from pred to succ?");
996
997 // Okay, we found the one constant that our value can be if we get into TI's
998 // BB. Find out which successor will unconditionally be branched to.
999 BasicBlock *TheRealDest = nullptr;
1000 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1001 if (ThisCases[i].Value == TIV) {
1002 TheRealDest = ThisCases[i].Dest;
1003 break;
1004 }
1005
1006 // If not handled by any explicit cases, it is handled by the default case.
1007 if (!TheRealDest)
1008 TheRealDest = ThisDef;
1009
1010 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1011
1012 // Remove PHI node entries for dead edges.
1013 BasicBlock *CheckEdge = TheRealDest;
1014 for (BasicBlock *Succ : successors(TIBB))
1015 if (Succ != CheckEdge) {
1016 if (Succ != TheRealDest)
1017 RemovedSuccs.insert(Succ);
1018 Succ->removePredecessor(TIBB);
1019 } else
1020 CheckEdge = nullptr;
1021
1022 // Insert the new branch.
1023 Instruction *NI = Builder.CreateBr(TheRealDest);
1024 (void)NI;
1025
1026 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1027 << "Through successor TI: " << *TI << "Leaving: " << *NI
1028 << "\n");
1029
1031 if (DTU) {
1033 Updates.reserve(RemovedSuccs.size());
1034 for (auto *RemovedSucc : RemovedSuccs)
1035 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1036 DTU->applyUpdates(Updates);
1037 }
1038 return true;
1039}
1040
1041namespace {
1042
1043/// This class implements a stable ordering of constant
1044/// integers that does not depend on their address. This is important for
1045/// applications that sort ConstantInt's to ensure uniqueness.
1046struct ConstantIntOrdering {
1047 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1048 return LHS->getValue().ult(RHS->getValue());
1049 }
1050};
1051
1052} // end anonymous namespace
1053
1055 ConstantInt *const *P2) {
1056 const ConstantInt *LHS = *P1;
1057 const ConstantInt *RHS = *P2;
1058 if (LHS == RHS)
1059 return 0;
1060 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1061}
1062
1063/// Get Weights of a given terminator, the default weight is at the front
1064/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1065/// metadata.
1067 SmallVectorImpl<uint64_t> &Weights) {
1068 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1069 assert(MD && "Invalid branch-weight metadata");
1070 extractFromBranchWeightMD64(MD, Weights);
1071
1072 // If TI is a conditional eq, the default case is the false case,
1073 // and the corresponding branch-weight data is at index 2. We swap the
1074 // default weight to be the first entry.
1075 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1076 assert(Weights.size() == 2);
1077 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1078 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1079 std::swap(Weights.front(), Weights.back());
1080 }
1081}
1082
1083/// Keep halving the weights until all can fit in uint32_t.
1085 uint64_t Max = *llvm::max_element(Weights);
1086 if (Max > UINT_MAX) {
1087 unsigned Offset = 32 - llvm::countl_zero(Max);
1088 for (uint64_t &I : Weights)
1089 I >>= Offset;
1090 }
1091}
1092
1094 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1095 Instruction *PTI = PredBlock->getTerminator();
1096
1097 // If we have bonus instructions, clone them into the predecessor block.
1098 // Note that there may be multiple predecessor blocks, so we cannot move
1099 // bonus instructions to a predecessor block.
1100 for (Instruction &BonusInst : *BB) {
1101 if (BonusInst.isTerminator())
1102 continue;
1103
1104 Instruction *NewBonusInst = BonusInst.clone();
1105
1106 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1107 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1108 // Unless the instruction has the same !dbg location as the original
1109 // branch, drop it. When we fold the bonus instructions we want to make
1110 // sure we reset their debug locations in order to avoid stepping on
1111 // dead code caused by folding dead branches.
1112 NewBonusInst->setDebugLoc(DebugLoc());
1113 }
1114
1115 RemapInstruction(NewBonusInst, VMap,
1117
1118 // If we speculated an instruction, we need to drop any metadata that may
1119 // result in undefined behavior, as the metadata might have been valid
1120 // only given the branch precondition.
1121 // Similarly strip attributes on call parameters that may cause UB in
1122 // location the call is moved to.
1123 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1124
1125 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1126 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1127 RemapDbgVariableRecordRange(NewBonusInst->getModule(), Range, VMap,
1130
1131 if (isa<DbgInfoIntrinsic>(BonusInst))
1132 continue;
1133
1134 NewBonusInst->takeName(&BonusInst);
1135 BonusInst.setName(NewBonusInst->getName() + ".old");
1136 VMap[&BonusInst] = NewBonusInst;
1137
1138 // Update (liveout) uses of bonus instructions,
1139 // now that the bonus instruction has been cloned into predecessor.
1140 // Note that we expect to be in a block-closed SSA form for this to work!
1141 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1142 auto *UI = cast<Instruction>(U.getUser());
1143 auto *PN = dyn_cast<PHINode>(UI);
1144 if (!PN) {
1145 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1146 "If the user is not a PHI node, then it should be in the same "
1147 "block as, and come after, the original bonus instruction.");
1148 continue; // Keep using the original bonus instruction.
1149 }
1150 // Is this the block-closed SSA form PHI node?
1151 if (PN->getIncomingBlock(U) == BB)
1152 continue; // Great, keep using the original bonus instruction.
1153 // The only other alternative is an "use" when coming from
1154 // the predecessor block - here we should refer to the cloned bonus instr.
1155 assert(PN->getIncomingBlock(U) == PredBlock &&
1156 "Not in block-closed SSA form?");
1157 U.set(NewBonusInst);
1158 }
1159 }
1160}
1161
1162bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1163 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1164 BasicBlock *BB = TI->getParent();
1165 BasicBlock *Pred = PTI->getParent();
1166
1168
1169 // Figure out which 'cases' to copy from SI to PSI.
1170 std::vector<ValueEqualityComparisonCase> BBCases;
1171 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1172
1173 std::vector<ValueEqualityComparisonCase> PredCases;
1174 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1175
1176 // Based on whether the default edge from PTI goes to BB or not, fill in
1177 // PredCases and PredDefault with the new switch cases we would like to
1178 // build.
1180
1181 // Update the branch weight metadata along the way
1183 bool PredHasWeights = hasBranchWeightMD(*PTI);
1184 bool SuccHasWeights = hasBranchWeightMD(*TI);
1185
1186 if (PredHasWeights) {
1187 GetBranchWeights(PTI, Weights);
1188 // branch-weight metadata is inconsistent here.
1189 if (Weights.size() != 1 + PredCases.size())
1190 PredHasWeights = SuccHasWeights = false;
1191 } else if (SuccHasWeights)
1192 // If there are no predecessor weights but there are successor weights,
1193 // populate Weights with 1, which will later be scaled to the sum of
1194 // successor's weights
1195 Weights.assign(1 + PredCases.size(), 1);
1196
1197 SmallVector<uint64_t, 8> SuccWeights;
1198 if (SuccHasWeights) {
1199 GetBranchWeights(TI, SuccWeights);
1200 // branch-weight metadata is inconsistent here.
1201 if (SuccWeights.size() != 1 + BBCases.size())
1202 PredHasWeights = SuccHasWeights = false;
1203 } else if (PredHasWeights)
1204 SuccWeights.assign(1 + BBCases.size(), 1);
1205
1206 if (PredDefault == BB) {
1207 // If this is the default destination from PTI, only the edges in TI
1208 // that don't occur in PTI, or that branch to BB will be activated.
1209 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1210 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1211 if (PredCases[i].Dest != BB)
1212 PTIHandled.insert(PredCases[i].Value);
1213 else {
1214 // The default destination is BB, we don't need explicit targets.
1215 std::swap(PredCases[i], PredCases.back());
1216
1217 if (PredHasWeights || SuccHasWeights) {
1218 // Increase weight for the default case.
1219 Weights[0] += Weights[i + 1];
1220 std::swap(Weights[i + 1], Weights.back());
1221 Weights.pop_back();
1222 }
1223
1224 PredCases.pop_back();
1225 --i;
1226 --e;
1227 }
1228
1229 // Reconstruct the new switch statement we will be building.
1230 if (PredDefault != BBDefault) {
1231 PredDefault->removePredecessor(Pred);
1232 if (DTU && PredDefault != BB)
1233 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1234 PredDefault = BBDefault;
1235 ++NewSuccessors[BBDefault];
1236 }
1237
1238 unsigned CasesFromPred = Weights.size();
1239 uint64_t ValidTotalSuccWeight = 0;
1240 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1241 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1242 PredCases.push_back(BBCases[i]);
1243 ++NewSuccessors[BBCases[i].Dest];
1244 if (SuccHasWeights || PredHasWeights) {
1245 // The default weight is at index 0, so weight for the ith case
1246 // should be at index i+1. Scale the cases from successor by
1247 // PredDefaultWeight (Weights[0]).
1248 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1249 ValidTotalSuccWeight += SuccWeights[i + 1];
1250 }
1251 }
1252
1253 if (SuccHasWeights || PredHasWeights) {
1254 ValidTotalSuccWeight += SuccWeights[0];
1255 // Scale the cases from predecessor by ValidTotalSuccWeight.
1256 for (unsigned i = 1; i < CasesFromPred; ++i)
1257 Weights[i] *= ValidTotalSuccWeight;
1258 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1259 Weights[0] *= SuccWeights[0];
1260 }
1261 } else {
1262 // If this is not the default destination from PSI, only the edges
1263 // in SI that occur in PSI with a destination of BB will be
1264 // activated.
1265 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1266 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1267 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1268 if (PredCases[i].Dest == BB) {
1269 PTIHandled.insert(PredCases[i].Value);
1270
1271 if (PredHasWeights || SuccHasWeights) {
1272 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1273 std::swap(Weights[i + 1], Weights.back());
1274 Weights.pop_back();
1275 }
1276
1277 std::swap(PredCases[i], PredCases.back());
1278 PredCases.pop_back();
1279 --i;
1280 --e;
1281 }
1282
1283 // Okay, now we know which constants were sent to BB from the
1284 // predecessor. Figure out where they will all go now.
1285 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1286 if (PTIHandled.count(BBCases[i].Value)) {
1287 // If this is one we are capable of getting...
1288 if (PredHasWeights || SuccHasWeights)
1289 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1290 PredCases.push_back(BBCases[i]);
1291 ++NewSuccessors[BBCases[i].Dest];
1292 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1293 }
1294
1295 // If there are any constants vectored to BB that TI doesn't handle,
1296 // they must go to the default destination of TI.
1297 for (ConstantInt *I : PTIHandled) {
1298 if (PredHasWeights || SuccHasWeights)
1299 Weights.push_back(WeightsForHandled[I]);
1300 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1301 ++NewSuccessors[BBDefault];
1302 }
1303 }
1304
1305 // Okay, at this point, we know which new successor Pred will get. Make
1306 // sure we update the number of entries in the PHI nodes for these
1307 // successors.
1308 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1309 if (DTU) {
1310 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1311 Updates.reserve(Updates.size() + NewSuccessors.size());
1312 }
1313 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1314 NewSuccessors) {
1315 for (auto I : seq(NewSuccessor.second)) {
1316 (void)I;
1317 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1318 }
1319 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1320 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1321 }
1322
1323 Builder.SetInsertPoint(PTI);
1324 // Convert pointer to int before we switch.
1325 if (CV->getType()->isPointerTy()) {
1326 CV =
1327 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1328 }
1329
1330 // Now that the successors are updated, create the new Switch instruction.
1331 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1332 NewSI->setDebugLoc(PTI->getDebugLoc());
1333 for (ValueEqualityComparisonCase &V : PredCases)
1334 NewSI->addCase(V.Value, V.Dest);
1335
1336 if (PredHasWeights || SuccHasWeights) {
1337 // Halve the weights if any of them cannot fit in an uint32_t
1338 FitWeights(Weights);
1339
1340 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1341
1342 setBranchWeights(NewSI, MDWeights);
1343 }
1344
1346
1347 // Okay, last check. If BB is still a successor of PSI, then we must
1348 // have an infinite loop case. If so, add an infinitely looping block
1349 // to handle the case to preserve the behavior of the code.
1350 BasicBlock *InfLoopBlock = nullptr;
1351 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1352 if (NewSI->getSuccessor(i) == BB) {
1353 if (!InfLoopBlock) {
1354 // Insert it at the end of the function, because it's either code,
1355 // or it won't matter if it's hot. :)
1356 InfLoopBlock =
1357 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1358 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1359 if (DTU)
1360 Updates.push_back(
1361 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1362 }
1363 NewSI->setSuccessor(i, InfLoopBlock);
1364 }
1365
1366 if (DTU) {
1367 if (InfLoopBlock)
1368 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1369
1370 Updates.push_back({DominatorTree::Delete, Pred, BB});
1371
1372 DTU->applyUpdates(Updates);
1373 }
1374
1375 ++NumFoldValueComparisonIntoPredecessors;
1376 return true;
1377}
1378
1379/// The specified terminator is a value equality comparison instruction
1380/// (either a switch or a branch on "X == c").
1381/// See if any of the predecessors of the terminator block are value comparisons
1382/// on the same value. If so, and if safe to do so, fold them together.
1383bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1384 IRBuilder<> &Builder) {
1385 BasicBlock *BB = TI->getParent();
1386 Value *CV = isValueEqualityComparison(TI); // CondVal
1387 assert(CV && "Not a comparison?");
1388
1389 bool Changed = false;
1390
1392 while (!Preds.empty()) {
1393 BasicBlock *Pred = Preds.pop_back_val();
1394 Instruction *PTI = Pred->getTerminator();
1395
1396 // Don't try to fold into itself.
1397 if (Pred == BB)
1398 continue;
1399
1400 // See if the predecessor is a comparison with the same value.
1401 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1402 if (PCV != CV)
1403 continue;
1404
1406 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1407 for (auto *Succ : FailBlocks) {
1408 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1409 return false;
1410 }
1411 }
1412
1413 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1414 Changed = true;
1415 }
1416 return Changed;
1417}
1418
1419// If we would need to insert a select that uses the value of this invoke
1420// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1421// need to do this), we can't hoist the invoke, as there is nowhere to put the
1422// select in this case.
1424 Instruction *I1, Instruction *I2) {
1425 for (BasicBlock *Succ : successors(BB1)) {
1426 for (const PHINode &PN : Succ->phis()) {
1427 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1428 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1429 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1430 return false;
1431 }
1432 }
1433 }
1434 return true;
1435}
1436
1437// Get interesting characteristics of instructions that
1438// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1439// instructions can be reordered across.
1445
1447 unsigned Flags = 0;
1448 if (I->mayReadFromMemory())
1449 Flags |= SkipReadMem;
1450 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1451 // inalloca) across stacksave/stackrestore boundaries.
1452 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1453 Flags |= SkipSideEffect;
1455 Flags |= SkipImplicitControlFlow;
1456 return Flags;
1457}
1458
1459// Returns true if it is safe to reorder an instruction across preceding
1460// instructions in a basic block.
1461static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1462 // Don't reorder a store over a load.
1463 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1464 return false;
1465
1466 // If we have seen an instruction with side effects, it's unsafe to reorder an
1467 // instruction which reads memory or itself has side effects.
1468 if ((Flags & SkipSideEffect) &&
1469 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1470 return false;
1471
1472 // Reordering across an instruction which does not necessarily transfer
1473 // control to the next instruction is speculation.
1475 return false;
1476
1477 // Hoisting of llvm.deoptimize is only legal together with the next return
1478 // instruction, which this pass is not always able to do.
1479 if (auto *CB = dyn_cast<CallBase>(I))
1480 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1481 return false;
1482
1483 // It's also unsafe/illegal to hoist an instruction above its instruction
1484 // operands
1485 BasicBlock *BB = I->getParent();
1486 for (Value *Op : I->operands()) {
1487 if (auto *J = dyn_cast<Instruction>(Op))
1488 if (J->getParent() == BB)
1489 return false;
1490 }
1491
1492 return true;
1493}
1494
1495static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1496
1497/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1498/// instructions \p I1 and \p I2 can and should be hoisted.
1500 const TargetTransformInfo &TTI) {
1501 // If we're going to hoist a call, make sure that the two instructions
1502 // we're commoning/hoisting are both marked with musttail, or neither of
1503 // them is marked as such. Otherwise, we might end up in a situation where
1504 // we hoist from a block where the terminator is a `ret` to a block where
1505 // the terminator is a `br`, and `musttail` calls expect to be followed by
1506 // a return.
1507 auto *C1 = dyn_cast<CallInst>(I1);
1508 auto *C2 = dyn_cast<CallInst>(I2);
1509 if (C1 && C2)
1510 if (C1->isMustTailCall() != C2->isMustTailCall())
1511 return false;
1512
1514 return false;
1515
1516 // If any of the two call sites has nomerge or convergent attribute, stop
1517 // hoisting.
1518 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1519 if (CB1->cannotMerge() || CB1->isConvergent())
1520 return false;
1521 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1522 if (CB2->cannotMerge() || CB2->isConvergent())
1523 return false;
1524
1525 return true;
1526}
1527
1528/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1529/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1530/// hoistCommonCodeFromSuccessors. e.g. The input:
1531/// I1 DVRs: { x, z },
1532/// OtherInsts: { I2 DVRs: { x, y, z } }
1533/// would result in hoisting only DbgVariableRecord x.
1535 Instruction *TI, Instruction *I1,
1536 SmallVectorImpl<Instruction *> &OtherInsts) {
1537 if (!I1->hasDbgRecords())
1538 return;
1539 using CurrentAndEndIt =
1540 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1541 // Vector of {Current, End} iterators.
1543 Itrs.reserve(OtherInsts.size() + 1);
1544 // Helper lambdas for lock-step checks:
1545 // Return true if this Current == End.
1546 auto atEnd = [](const CurrentAndEndIt &Pair) {
1547 return Pair.first == Pair.second;
1548 };
1549 // Return true if all Current are identical.
1550 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1551 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1553 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1554 });
1555 };
1556
1557 // Collect the iterators.
1558 Itrs.push_back(
1559 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1560 for (Instruction *Other : OtherInsts) {
1561 if (!Other->hasDbgRecords())
1562 return;
1563 Itrs.push_back(
1564 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1565 }
1566
1567 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1568 // the lock-step DbgRecord are identical, hoist all of them to TI.
1569 // This replicates the dbg.* intrinsic behaviour in
1570 // hoistCommonCodeFromSuccessors.
1571 while (none_of(Itrs, atEnd)) {
1572 bool HoistDVRs = allIdentical(Itrs);
1573 for (CurrentAndEndIt &Pair : Itrs) {
1574 // Increment Current iterator now as we may be about to move the
1575 // DbgRecord.
1576 DbgRecord &DR = *Pair.first++;
1577 if (HoistDVRs) {
1578 DR.removeFromParent();
1579 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1580 }
1581 }
1582 }
1583}
1584
1585/// Hoist any common code in the successor blocks up into the block. This
1586/// function guarantees that BB dominates all successors. If EqTermsOnly is
1587/// given, only perform hoisting in case both blocks only contain a terminator.
1588/// In that case, only the original BI will be replaced and selects for PHIs are
1589/// added.
1590bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1591 bool EqTermsOnly) {
1592 // This does very trivial matching, with limited scanning, to find identical
1593 // instructions in the two blocks. In particular, we don't want to get into
1594 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1595 // such, we currently just scan for obviously identical instructions in an
1596 // identical order, possibly separated by the same number of non-identical
1597 // instructions.
1598 unsigned int SuccSize = succ_size(BB);
1599 if (SuccSize < 2)
1600 return false;
1601
1602 // If either of the blocks has it's address taken, then we can't do this fold,
1603 // because the code we'd hoist would no longer run when we jump into the block
1604 // by it's address.
1605 for (auto *Succ : successors(BB))
1606 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1607 return false;
1608
1609 auto *TI = BB->getTerminator();
1610
1611 // The second of pair is a SkipFlags bitmask.
1612 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1613 SmallVector<SuccIterPair, 8> SuccIterPairs;
1614 for (auto *Succ : successors(BB)) {
1615 BasicBlock::iterator SuccItr = Succ->begin();
1616 if (isa<PHINode>(*SuccItr))
1617 return false;
1618 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1619 }
1620
1621 // Check if only hoisting terminators is allowed. This does not add new
1622 // instructions to the hoist location.
1623 if (EqTermsOnly) {
1624 // Skip any debug intrinsics, as they are free to hoist.
1625 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1626 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1627 if (!INonDbg->isTerminator())
1628 return false;
1629 }
1630 // Now we know that we only need to hoist debug intrinsics and the
1631 // terminator. Let the loop below handle those 2 cases.
1632 }
1633
1634 // Count how many instructions were not hoisted so far. There's a limit on how
1635 // many instructions we skip, serving as a compilation time control as well as
1636 // preventing excessive increase of life ranges.
1637 unsigned NumSkipped = 0;
1638 // If we find an unreachable instruction at the beginning of a basic block, we
1639 // can still hoist instructions from the rest of the basic blocks.
1640 if (SuccIterPairs.size() > 2) {
1641 erase_if(SuccIterPairs,
1642 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1643 if (SuccIterPairs.size() < 2)
1644 return false;
1645 }
1646
1647 bool Changed = false;
1648
1649 for (;;) {
1650 auto *SuccIterPairBegin = SuccIterPairs.begin();
1651 auto &BB1ItrPair = *SuccIterPairBegin++;
1652 auto OtherSuccIterPairRange =
1653 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1654 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1655
1656 Instruction *I1 = &*BB1ItrPair.first;
1657
1658 // Skip debug info if it is not identical.
1659 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1660 Instruction *I2 = &*Iter;
1661 return I1->isIdenticalToWhenDefined(I2);
1662 });
1663 if (!AllDbgInstsAreIdentical) {
1664 while (isa<DbgInfoIntrinsic>(I1))
1665 I1 = &*++BB1ItrPair.first;
1666 for (auto &SuccIter : OtherSuccIterRange) {
1667 Instruction *I2 = &*SuccIter;
1668 while (isa<DbgInfoIntrinsic>(I2))
1669 I2 = &*++SuccIter;
1670 }
1671 }
1672
1673 bool AllInstsAreIdentical = true;
1674 bool HasTerminator = I1->isTerminator();
1675 for (auto &SuccIter : OtherSuccIterRange) {
1676 Instruction *I2 = &*SuccIter;
1677 HasTerminator |= I2->isTerminator();
1678 if (AllInstsAreIdentical && (!I1->isIdenticalToWhenDefined(I2) ||
1679 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1680 AllInstsAreIdentical = false;
1681 }
1682
1684 for (auto &SuccIter : OtherSuccIterRange)
1685 OtherInsts.push_back(&*SuccIter);
1686
1687 // If we are hoisting the terminator instruction, don't move one (making a
1688 // broken BB), instead clone it, and remove BI.
1689 if (HasTerminator) {
1690 // Even if BB, which contains only one unreachable instruction, is ignored
1691 // at the beginning of the loop, we can hoist the terminator instruction.
1692 // If any instructions remain in the block, we cannot hoist terminators.
1693 if (NumSkipped || !AllInstsAreIdentical) {
1694 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1695 return Changed;
1696 }
1697
1698 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1699 Changed;
1700 }
1701
1702 if (AllInstsAreIdentical) {
1703 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1704 AllInstsAreIdentical =
1705 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1706 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1707 Instruction *I2 = &*Pair.first;
1708 unsigned SkipFlagsBB2 = Pair.second;
1709 // Even if the instructions are identical, it may not
1710 // be safe to hoist them if we have skipped over
1711 // instructions with side effects or their operands
1712 // weren't hoisted.
1713 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1715 });
1716 }
1717
1718 if (AllInstsAreIdentical) {
1719 BB1ItrPair.first++;
1720 if (isa<DbgInfoIntrinsic>(I1)) {
1721 // The debug location is an integral part of a debug info intrinsic
1722 // and can't be separated from it or replaced. Instead of attempting
1723 // to merge locations, simply hoist both copies of the intrinsic.
1724 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1725 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1726 // and leave any that were not hoisted behind (by calling moveBefore
1727 // rather than moveBeforePreserving).
1728 I1->moveBefore(TI);
1729 for (auto &SuccIter : OtherSuccIterRange) {
1730 auto *I2 = &*SuccIter++;
1731 assert(isa<DbgInfoIntrinsic>(I2));
1732 I2->moveBefore(TI);
1733 }
1734 } else {
1735 // For a normal instruction, we just move one to right before the
1736 // branch, then replace all uses of the other with the first. Finally,
1737 // we remove the now redundant second instruction.
1738 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1739 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1740 // and leave any that were not hoisted behind (by calling moveBefore
1741 // rather than moveBeforePreserving).
1742 I1->moveBefore(TI);
1743 for (auto &SuccIter : OtherSuccIterRange) {
1744 Instruction *I2 = &*SuccIter++;
1745 assert(I2 != I1);
1746 if (!I2->use_empty())
1747 I2->replaceAllUsesWith(I1);
1748 I1->andIRFlags(I2);
1749 combineMetadataForCSE(I1, I2, true);
1750 // I1 and I2 are being combined into a single instruction. Its debug
1751 // location is the merged locations of the original instructions.
1752 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1753 I2->eraseFromParent();
1754 }
1755 }
1756 if (!Changed)
1757 NumHoistCommonCode += SuccIterPairs.size();
1758 Changed = true;
1759 NumHoistCommonInstrs += SuccIterPairs.size();
1760 } else {
1761 if (NumSkipped >= HoistCommonSkipLimit) {
1762 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1763 return Changed;
1764 }
1765 // We are about to skip over a pair of non-identical instructions. Record
1766 // if any have characteristics that would prevent reordering instructions
1767 // across them.
1768 for (auto &SuccIterPair : SuccIterPairs) {
1769 Instruction *I = &*SuccIterPair.first++;
1770 SuccIterPair.second |= skippedInstrFlags(I);
1771 }
1772 ++NumSkipped;
1773 }
1774 }
1775}
1776
1777bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1778 Instruction *TI, Instruction *I1,
1779 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1780
1781 auto *BI = dyn_cast<BranchInst>(TI);
1782
1783 bool Changed = false;
1784 BasicBlock *TIParent = TI->getParent();
1785 BasicBlock *BB1 = I1->getParent();
1786
1787 // Use only for an if statement.
1788 auto *I2 = *OtherSuccTIs.begin();
1789 auto *BB2 = I2->getParent();
1790 if (BI) {
1791 assert(OtherSuccTIs.size() == 1);
1792 assert(BI->getSuccessor(0) == I1->getParent());
1793 assert(BI->getSuccessor(1) == I2->getParent());
1794 }
1795
1796 // In the case of an if statement, we try to hoist an invoke.
1797 // FIXME: Can we define a safety predicate for CallBr?
1798 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1799 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1800 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1801 return false;
1802
1803 // TODO: callbr hoisting currently disabled pending further study.
1804 if (isa<CallBrInst>(I1))
1805 return false;
1806
1807 for (BasicBlock *Succ : successors(BB1)) {
1808 for (PHINode &PN : Succ->phis()) {
1809 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1810 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1811 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1812 if (BB1V == BB2V)
1813 continue;
1814
1815 // In the case of an if statement, check for
1816 // passingValueIsAlwaysUndefined here because we would rather eliminate
1817 // undefined control flow then converting it to a select.
1818 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1820 return false;
1821 }
1822 }
1823 }
1824
1825 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1826 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1827 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1828 // Clone the terminator and hoist it into the pred, without any debug info.
1829 Instruction *NT = I1->clone();
1830 NT->insertInto(TIParent, TI->getIterator());
1831 if (!NT->getType()->isVoidTy()) {
1832 I1->replaceAllUsesWith(NT);
1833 for (Instruction *OtherSuccTI : OtherSuccTIs)
1834 OtherSuccTI->replaceAllUsesWith(NT);
1835 NT->takeName(I1);
1836 }
1837 Changed = true;
1838 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1839
1840 // Ensure terminator gets a debug location, even an unknown one, in case
1841 // it involves inlinable calls.
1843 Locs.push_back(I1->getDebugLoc());
1844 for (auto *OtherSuccTI : OtherSuccTIs)
1845 Locs.push_back(OtherSuccTI->getDebugLoc());
1846 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1847
1848 // PHIs created below will adopt NT's merged DebugLoc.
1849 IRBuilder<NoFolder> Builder(NT);
1850
1851 // In the case of an if statement, hoisting one of the terminators from our
1852 // successor is a great thing. Unfortunately, the successors of the if/else
1853 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1854 // must agree for all PHI nodes, so we insert select instruction to compute
1855 // the final result.
1856 if (BI) {
1857 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1858 for (BasicBlock *Succ : successors(BB1)) {
1859 for (PHINode &PN : Succ->phis()) {
1860 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1861 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1862 if (BB1V == BB2V)
1863 continue;
1864
1865 // These values do not agree. Insert a select instruction before NT
1866 // that determines the right value.
1867 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1868 if (!SI) {
1869 // Propagate fast-math-flags from phi node to its replacement select.
1870 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1871 if (isa<FPMathOperator>(PN))
1872 Builder.setFastMathFlags(PN.getFastMathFlags());
1873
1874 SI = cast<SelectInst>(Builder.CreateSelect(
1875 BI->getCondition(), BB1V, BB2V,
1876 BB1V->getName() + "." + BB2V->getName(), BI));
1877 }
1878
1879 // Make the PHI node use the select for all incoming values for BB1/BB2
1880 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1881 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1882 PN.setIncomingValue(i, SI);
1883 }
1884 }
1885 }
1886
1888
1889 // Update any PHI nodes in our new successors.
1890 for (BasicBlock *Succ : successors(BB1)) {
1891 AddPredecessorToBlock(Succ, TIParent, BB1);
1892 if (DTU)
1893 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1894 }
1895
1896 if (DTU)
1897 for (BasicBlock *Succ : successors(TI))
1898 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1899
1901 if (DTU)
1902 DTU->applyUpdates(Updates);
1903 return Changed;
1904}
1905
1906// Check lifetime markers.
1907static bool isLifeTimeMarker(const Instruction *I) {
1908 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1909 switch (II->getIntrinsicID()) {
1910 default:
1911 break;
1912 case Intrinsic::lifetime_start:
1913 case Intrinsic::lifetime_end:
1914 return true;
1915 }
1916 }
1917 return false;
1918}
1919
1920// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1921// into variables.
1923 int OpIdx) {
1924 return !isa<IntrinsicInst>(I);
1925}
1926
1927// All instructions in Insts belong to different blocks that all unconditionally
1928// branch to a common successor. Analyze each instruction and return true if it
1929// would be possible to sink them into their successor, creating one common
1930// instruction instead. For every value that would be required to be provided by
1931// PHI node (because an operand varies in each input block), add to PHIOperands.
1934 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1935 // Prune out obviously bad instructions to move. Each instruction must have
1936 // exactly zero or one use, and we check later that use is by a single, common
1937 // PHI instruction in the successor.
1938 bool HasUse = !Insts.front()->user_empty();
1939 for (auto *I : Insts) {
1940 // These instructions may change or break semantics if moved.
1941 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1942 I->getType()->isTokenTy())
1943 return false;
1944
1945 // Do not try to sink an instruction in an infinite loop - it can cause
1946 // this algorithm to infinite loop.
1947 if (I->getParent()->getSingleSuccessor() == I->getParent())
1948 return false;
1949
1950 // Conservatively return false if I is an inline-asm instruction. Sinking
1951 // and merging inline-asm instructions can potentially create arguments
1952 // that cannot satisfy the inline-asm constraints.
1953 // If the instruction has nomerge or convergent attribute, return false.
1954 if (const auto *C = dyn_cast<CallBase>(I))
1955 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1956 return false;
1957
1958 // Each instruction must have zero or one use.
1959 if (HasUse && !I->hasOneUse())
1960 return false;
1961 if (!HasUse && !I->user_empty())
1962 return false;
1963 }
1964
1965 const Instruction *I0 = Insts.front();
1966 const auto I0MMRA = MMRAMetadata(*I0);
1967 for (auto *I : Insts) {
1968 if (!I->isSameOperationAs(I0))
1969 return false;
1970
1971 // swifterror pointers can only be used by a load or store; sinking a load
1972 // or store would require introducing a select for the pointer operand,
1973 // which isn't allowed for swifterror pointers.
1974 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1975 return false;
1976 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1977 return false;
1978
1979 // Treat MMRAs conservatively. This pass can be quite aggressive and
1980 // could drop a lot of MMRAs otherwise.
1981 if (MMRAMetadata(*I) != I0MMRA)
1982 return false;
1983 }
1984
1985 // All instructions in Insts are known to be the same opcode. If they have a
1986 // use, check that the only user is a PHI or in the same block as the
1987 // instruction, because if a user is in the same block as an instruction we're
1988 // contemplating sinking, it must already be determined to be sinkable.
1989 if (HasUse) {
1990 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1991 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1992 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1993 auto *U = cast<Instruction>(*I->user_begin());
1994 return (PNUse &&
1995 PNUse->getParent() == Succ &&
1996 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1997 U->getParent() == I->getParent();
1998 }))
1999 return false;
2000 }
2001
2002 // Because SROA can't handle speculating stores of selects, try not to sink
2003 // loads, stores or lifetime markers of allocas when we'd have to create a
2004 // PHI for the address operand. Also, because it is likely that loads or
2005 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2006 // them.
2007 // This can cause code churn which can have unintended consequences down
2008 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2009 // FIXME: This is a workaround for a deficiency in SROA - see
2010 // https://llvm.org/bugs/show_bug.cgi?id=30188
2011 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2012 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2013 }))
2014 return false;
2015 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2016 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2017 }))
2018 return false;
2019 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2020 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2021 }))
2022 return false;
2023
2024 // For calls to be sinkable, they must all be indirect, or have same callee.
2025 // I.e. if we have two direct calls to different callees, we don't want to
2026 // turn that into an indirect call. Likewise, if we have an indirect call,
2027 // and a direct call, we don't actually want to have a single indirect call.
2028 if (isa<CallBase>(I0)) {
2029 auto IsIndirectCall = [](const Instruction *I) {
2030 return cast<CallBase>(I)->isIndirectCall();
2031 };
2032 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2033 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2034 if (HaveIndirectCalls) {
2035 if (!AllCallsAreIndirect)
2036 return false;
2037 } else {
2038 // All callees must be identical.
2039 Value *Callee = nullptr;
2040 for (const Instruction *I : Insts) {
2041 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2042 if (!Callee)
2043 Callee = CurrCallee;
2044 else if (Callee != CurrCallee)
2045 return false;
2046 }
2047 }
2048 }
2049
2050 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2051 Value *Op = I0->getOperand(OI);
2052 if (Op->getType()->isTokenTy())
2053 // Don't touch any operand of token type.
2054 return false;
2055
2056 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2057 assert(I->getNumOperands() == I0->getNumOperands());
2058 return I->getOperand(OI) == I0->getOperand(OI);
2059 };
2060 if (!all_of(Insts, SameAsI0)) {
2061 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2063 // We can't create a PHI from this GEP.
2064 return false;
2065 for (auto *I : Insts)
2066 PHIOperands[I].push_back(I->getOperand(OI));
2067 }
2068 }
2069 return true;
2070}
2071
2072// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2073// instruction of every block in Blocks to their common successor, commoning
2074// into one instruction.
2076 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2077
2078 // canSinkInstructions returning true guarantees that every block has at
2079 // least one non-terminator instruction.
2081 for (auto *BB : Blocks) {
2082 Instruction *I = BB->getTerminator();
2083 do {
2084 I = I->getPrevNode();
2085 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2086 if (!isa<DbgInfoIntrinsic>(I))
2087 Insts.push_back(I);
2088 }
2089
2090 // The only checking we need to do now is that all users of all instructions
2091 // are the same PHI node. canSinkInstructions should have checked this but
2092 // it is slightly over-aggressive - it gets confused by commutative
2093 // instructions so double-check it here.
2094 Instruction *I0 = Insts.front();
2095 if (!I0->user_empty()) {
2096 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2097 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2098 auto *U = cast<Instruction>(*I->user_begin());
2099 return U == PNUse;
2100 }))
2101 return false;
2102 }
2103
2104 // We don't need to do any more checking here; canSinkInstructions should
2105 // have done it all for us.
2106 SmallVector<Value*, 4> NewOperands;
2107 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2108 // This check is different to that in canSinkInstructions. There, we
2109 // cared about the global view once simplifycfg (and instcombine) have
2110 // completed - it takes into account PHIs that become trivially
2111 // simplifiable. However here we need a more local view; if an operand
2112 // differs we create a PHI and rely on instcombine to clean up the very
2113 // small mess we may make.
2114 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2115 return I->getOperand(O) != I0->getOperand(O);
2116 });
2117 if (!NeedPHI) {
2118 NewOperands.push_back(I0->getOperand(O));
2119 continue;
2120 }
2121
2122 // Create a new PHI in the successor block and populate it.
2123 auto *Op = I0->getOperand(O);
2124 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2125 auto *PN =
2126 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2127 PN->insertBefore(BBEnd->begin());
2128 for (auto *I : Insts)
2129 PN->addIncoming(I->getOperand(O), I->getParent());
2130 NewOperands.push_back(PN);
2131 }
2132
2133 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2134 // and move it to the start of the successor block.
2135 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2136 I0->getOperandUse(O).set(NewOperands[O]);
2137
2138 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2139
2140 // Update metadata and IR flags, and merge debug locations.
2141 for (auto *I : Insts)
2142 if (I != I0) {
2143 // The debug location for the "common" instruction is the merged locations
2144 // of all the commoned instructions. We start with the original location
2145 // of the "common" instruction and iteratively merge each location in the
2146 // loop below.
2147 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2148 // However, as N-way merge for CallInst is rare, so we use simplified API
2149 // instead of using complex API for N-way merge.
2150 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2151 combineMetadataForCSE(I0, I, true);
2152 I0->andIRFlags(I);
2153 }
2154
2155 if (!I0->user_empty()) {
2156 // canSinkLastInstruction checked that all instructions were used by
2157 // one and only one PHI node. Find that now, RAUW it to our common
2158 // instruction and nuke it.
2159 auto *PN = cast<PHINode>(*I0->user_begin());
2160 PN->replaceAllUsesWith(I0);
2161 PN->eraseFromParent();
2162 }
2163
2164 // Finally nuke all instructions apart from the common instruction.
2165 for (auto *I : Insts) {
2166 if (I == I0)
2167 continue;
2168 // The remaining uses are debug users, replace those with the common inst.
2169 // In most (all?) cases this just introduces a use-before-def.
2170 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2171 I->replaceAllUsesWith(I0);
2172 I->eraseFromParent();
2173 }
2174
2175 return true;
2176}
2177
2178namespace {
2179
2180 // LockstepReverseIterator - Iterates through instructions
2181 // in a set of blocks in reverse order from the first non-terminator.
2182 // For example (assume all blocks have size n):
2183 // LockstepReverseIterator I([B1, B2, B3]);
2184 // *I-- = [B1[n], B2[n], B3[n]];
2185 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2186 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2187 // ...
2188 class LockstepReverseIterator {
2191 bool Fail;
2192
2193 public:
2194 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2195 reset();
2196 }
2197
2198 void reset() {
2199 Fail = false;
2200 Insts.clear();
2201 for (auto *BB : Blocks) {
2202 Instruction *Inst = BB->getTerminator();
2203 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2204 Inst = Inst->getPrevNode();
2205 if (!Inst) {
2206 // Block wasn't big enough.
2207 Fail = true;
2208 return;
2209 }
2210 Insts.push_back(Inst);
2211 }
2212 }
2213
2214 bool isValid() const {
2215 return !Fail;
2216 }
2217
2218 void operator--() {
2219 if (Fail)
2220 return;
2221 for (auto *&Inst : Insts) {
2222 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2223 Inst = Inst->getPrevNode();
2224 // Already at beginning of block.
2225 if (!Inst) {
2226 Fail = true;
2227 return;
2228 }
2229 }
2230 }
2231
2232 void operator++() {
2233 if (Fail)
2234 return;
2235 for (auto *&Inst : Insts) {
2236 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2237 Inst = Inst->getNextNode();
2238 // Already at end of block.
2239 if (!Inst) {
2240 Fail = true;
2241 return;
2242 }
2243 }
2244 }
2245
2247 return Insts;
2248 }
2249 };
2250
2251} // end anonymous namespace
2252
2253/// Check whether BB's predecessors end with unconditional branches. If it is
2254/// true, sink any common code from the predecessors to BB.
2256 DomTreeUpdater *DTU) {
2257 // We support two situations:
2258 // (1) all incoming arcs are unconditional
2259 // (2) there are non-unconditional incoming arcs
2260 //
2261 // (2) is very common in switch defaults and
2262 // else-if patterns;
2263 //
2264 // if (a) f(1);
2265 // else if (b) f(2);
2266 //
2267 // produces:
2268 //
2269 // [if]
2270 // / \
2271 // [f(1)] [if]
2272 // | | \
2273 // | | |
2274 // | [f(2)]|
2275 // \ | /
2276 // [ end ]
2277 //
2278 // [end] has two unconditional predecessor arcs and one conditional. The
2279 // conditional refers to the implicit empty 'else' arc. This conditional
2280 // arc can also be caused by an empty default block in a switch.
2281 //
2282 // In this case, we attempt to sink code from all *unconditional* arcs.
2283 // If we can sink instructions from these arcs (determined during the scan
2284 // phase below) we insert a common successor for all unconditional arcs and
2285 // connect that to [end], to enable sinking:
2286 //
2287 // [if]
2288 // / \
2289 // [x(1)] [if]
2290 // | | \
2291 // | | \
2292 // | [x(2)] |
2293 // \ / |
2294 // [sink.split] |
2295 // \ /
2296 // [ end ]
2297 //
2298 SmallVector<BasicBlock*,4> UnconditionalPreds;
2299 bool HaveNonUnconditionalPredecessors = false;
2300 for (auto *PredBB : predecessors(BB)) {
2301 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2302 if (PredBr && PredBr->isUnconditional())
2303 UnconditionalPreds.push_back(PredBB);
2304 else
2305 HaveNonUnconditionalPredecessors = true;
2306 }
2307 if (UnconditionalPreds.size() < 2)
2308 return false;
2309
2310 // We take a two-step approach to tail sinking. First we scan from the end of
2311 // each block upwards in lockstep. If the n'th instruction from the end of each
2312 // block can be sunk, those instructions are added to ValuesToSink and we
2313 // carry on. If we can sink an instruction but need to PHI-merge some operands
2314 // (because they're not identical in each instruction) we add these to
2315 // PHIOperands.
2316 int ScanIdx = 0;
2317 SmallPtrSet<Value*,4> InstructionsToSink;
2319 LockstepReverseIterator LRI(UnconditionalPreds);
2320 while (LRI.isValid() &&
2321 canSinkInstructions(*LRI, PHIOperands)) {
2322 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2323 << "\n");
2324 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2325 ++ScanIdx;
2326 --LRI;
2327 }
2328
2329 // If no instructions can be sunk, early-return.
2330 if (ScanIdx == 0)
2331 return false;
2332
2333 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2334
2335 if (!followedByDeoptOrUnreachable) {
2336 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2337 // actually sink before encountering instruction that is unprofitable to
2338 // sink?
2339 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2340 unsigned NumPHIdValues = 0;
2341 for (auto *I : *LRI)
2342 for (auto *V : PHIOperands[I]) {
2343 if (!InstructionsToSink.contains(V))
2344 ++NumPHIdValues;
2345 // FIXME: this check is overly optimistic. We may end up not sinking
2346 // said instruction, due to the very same profitability check.
2347 // See @creating_too_many_phis in sink-common-code.ll.
2348 }
2349 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2350 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2351 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2352 NumPHIInsts++;
2353
2354 return NumPHIInsts <= 1;
2355 };
2356
2357 // We've determined that we are going to sink last ScanIdx instructions,
2358 // and recorded them in InstructionsToSink. Now, some instructions may be
2359 // unprofitable to sink. But that determination depends on the instructions
2360 // that we are going to sink.
2361
2362 // First, forward scan: find the first instruction unprofitable to sink,
2363 // recording all the ones that are profitable to sink.
2364 // FIXME: would it be better, after we detect that not all are profitable.
2365 // to either record the profitable ones, or erase the unprofitable ones?
2366 // Maybe we need to choose (at runtime) the one that will touch least
2367 // instrs?
2368 LRI.reset();
2369 int Idx = 0;
2370 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2371 while (Idx < ScanIdx) {
2372 if (!ProfitableToSinkInstruction(LRI)) {
2373 // Too many PHIs would be created.
2374 LLVM_DEBUG(
2375 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2376 break;
2377 }
2378 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2379 --LRI;
2380 ++Idx;
2381 }
2382
2383 // If no instructions can be sunk, early-return.
2384 if (Idx == 0)
2385 return false;
2386
2387 // Did we determine that (only) some instructions are unprofitable to sink?
2388 if (Idx < ScanIdx) {
2389 // Okay, some instructions are unprofitable.
2390 ScanIdx = Idx;
2391 InstructionsToSink = InstructionsProfitableToSink;
2392
2393 // But, that may make other instructions unprofitable, too.
2394 // So, do a backward scan, do any earlier instructions become
2395 // unprofitable?
2396 assert(
2397 !ProfitableToSinkInstruction(LRI) &&
2398 "We already know that the last instruction is unprofitable to sink");
2399 ++LRI;
2400 --Idx;
2401 while (Idx >= 0) {
2402 // If we detect that an instruction becomes unprofitable to sink,
2403 // all earlier instructions won't be sunk either,
2404 // so preemptively keep InstructionsProfitableToSink in sync.
2405 // FIXME: is this the most performant approach?
2406 for (auto *I : *LRI)
2407 InstructionsProfitableToSink.erase(I);
2408 if (!ProfitableToSinkInstruction(LRI)) {
2409 // Everything starting with this instruction won't be sunk.
2410 ScanIdx = Idx;
2411 InstructionsToSink = InstructionsProfitableToSink;
2412 }
2413 ++LRI;
2414 --Idx;
2415 }
2416 }
2417
2418 // If no instructions can be sunk, early-return.
2419 if (ScanIdx == 0)
2420 return false;
2421 }
2422
2423 bool Changed = false;
2424
2425 if (HaveNonUnconditionalPredecessors) {
2426 if (!followedByDeoptOrUnreachable) {
2427 // It is always legal to sink common instructions from unconditional
2428 // predecessors. However, if not all predecessors are unconditional,
2429 // this transformation might be pessimizing. So as a rule of thumb,
2430 // don't do it unless we'd sink at least one non-speculatable instruction.
2431 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2432 LRI.reset();
2433 int Idx = 0;
2434 bool Profitable = false;
2435 while (Idx < ScanIdx) {
2436 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2437 Profitable = true;
2438 break;
2439 }
2440 --LRI;
2441 ++Idx;
2442 }
2443 if (!Profitable)
2444 return false;
2445 }
2446
2447 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2448 // We have a conditional edge and we're going to sink some instructions.
2449 // Insert a new block postdominating all blocks we're going to sink from.
2450 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2451 // Edges couldn't be split.
2452 return false;
2453 Changed = true;
2454 }
2455
2456 // Now that we've analyzed all potential sinking candidates, perform the
2457 // actual sink. We iteratively sink the last non-terminator of the source
2458 // blocks into their common successor unless doing so would require too
2459 // many PHI instructions to be generated (currently only one PHI is allowed
2460 // per sunk instruction).
2461 //
2462 // We can use InstructionsToSink to discount values needing PHI-merging that will
2463 // actually be sunk in a later iteration. This allows us to be more
2464 // aggressive in what we sink. This does allow a false positive where we
2465 // sink presuming a later value will also be sunk, but stop half way through
2466 // and never actually sink it which means we produce more PHIs than intended.
2467 // This is unlikely in practice though.
2468 int SinkIdx = 0;
2469 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2470 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2471 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2472 << "\n");
2473
2474 // Because we've sunk every instruction in turn, the current instruction to
2475 // sink is always at index 0.
2476 LRI.reset();
2477
2478 if (!sinkLastInstruction(UnconditionalPreds)) {
2479 LLVM_DEBUG(
2480 dbgs()
2481 << "SINK: stopping here, failed to actually sink instruction!\n");
2482 break;
2483 }
2484
2485 NumSinkCommonInstrs++;
2486 Changed = true;
2487 }
2488 if (SinkIdx != 0)
2489 ++NumSinkCommonCode;
2490 return Changed;
2491}
2492
2493namespace {
2494
2495struct CompatibleSets {
2496 using SetTy = SmallVector<InvokeInst *, 2>;
2497
2499
2500 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2501
2502 SetTy &getCompatibleSet(InvokeInst *II);
2503
2504 void insert(InvokeInst *II);
2505};
2506
2507CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2508 // Perform a linear scan over all the existing sets, see if the new `invoke`
2509 // is compatible with any particular set. Since we know that all the `invokes`
2510 // within a set are compatible, only check the first `invoke` in each set.
2511 // WARNING: at worst, this has quadratic complexity.
2512 for (CompatibleSets::SetTy &Set : Sets) {
2513 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2514 return Set;
2515 }
2516
2517 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2518 return Sets.emplace_back();
2519}
2520
2521void CompatibleSets::insert(InvokeInst *II) {
2522 getCompatibleSet(II).emplace_back(II);
2523}
2524
2525bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2526 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2527
2528 // Can we theoretically merge these `invoke`s?
2529 auto IsIllegalToMerge = [](InvokeInst *II) {
2530 return II->cannotMerge() || II->isInlineAsm();
2531 };
2532 if (any_of(Invokes, IsIllegalToMerge))
2533 return false;
2534
2535 // Either both `invoke`s must be direct,
2536 // or both `invoke`s must be indirect.
2537 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2538 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2539 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2540 if (HaveIndirectCalls) {
2541 if (!AllCallsAreIndirect)
2542 return false;
2543 } else {
2544 // All callees must be identical.
2545 Value *Callee = nullptr;
2546 for (InvokeInst *II : Invokes) {
2547 Value *CurrCallee = II->getCalledOperand();
2548 assert(CurrCallee && "There is always a called operand.");
2549 if (!Callee)
2550 Callee = CurrCallee;
2551 else if (Callee != CurrCallee)
2552 return false;
2553 }
2554 }
2555
2556 // Either both `invoke`s must not have a normal destination,
2557 // or both `invoke`s must have a normal destination,
2558 auto HasNormalDest = [](InvokeInst *II) {
2559 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2560 };
2561 if (any_of(Invokes, HasNormalDest)) {
2562 // Do not merge `invoke` that does not have a normal destination with one
2563 // that does have a normal destination, even though doing so would be legal.
2564 if (!all_of(Invokes, HasNormalDest))
2565 return false;
2566
2567 // All normal destinations must be identical.
2568 BasicBlock *NormalBB = nullptr;
2569 for (InvokeInst *II : Invokes) {
2570 BasicBlock *CurrNormalBB = II->getNormalDest();
2571 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2572 if (!NormalBB)
2573 NormalBB = CurrNormalBB;
2574 else if (NormalBB != CurrNormalBB)
2575 return false;
2576 }
2577
2578 // In the normal destination, the incoming values for these two `invoke`s
2579 // must be compatible.
2580 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2582 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2583 &EquivalenceSet))
2584 return false;
2585 }
2586
2587#ifndef NDEBUG
2588 // All unwind destinations must be identical.
2589 // We know that because we have started from said unwind destination.
2590 BasicBlock *UnwindBB = nullptr;
2591 for (InvokeInst *II : Invokes) {
2592 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2593 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2594 if (!UnwindBB)
2595 UnwindBB = CurrUnwindBB;
2596 else
2597 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2598 }
2599#endif
2600
2601 // In the unwind destination, the incoming values for these two `invoke`s
2602 // must be compatible.
2604 Invokes.front()->getUnwindDest(),
2605 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2606 return false;
2607
2608 // Ignoring arguments, these `invoke`s must be identical,
2609 // including operand bundles.
2610 const InvokeInst *II0 = Invokes.front();
2611 for (auto *II : Invokes.drop_front())
2612 if (!II->isSameOperationAs(II0))
2613 return false;
2614
2615 // Can we theoretically form the data operands for the merged `invoke`?
2616 auto IsIllegalToMergeArguments = [](auto Ops) {
2617 Use &U0 = std::get<0>(Ops);
2618 Use &U1 = std::get<1>(Ops);
2619 if (U0 == U1)
2620 return false;
2621 return U0->getType()->isTokenTy() ||
2622 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2623 U0.getOperandNo());
2624 };
2625 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2626 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2627 IsIllegalToMergeArguments))
2628 return false;
2629
2630 return true;
2631}
2632
2633} // namespace
2634
2635// Merge all invokes in the provided set, all of which are compatible
2636// as per the `CompatibleSets::shouldBelongToSameSet()`.
2638 DomTreeUpdater *DTU) {
2639 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2640
2642 if (DTU)
2643 Updates.reserve(2 + 3 * Invokes.size());
2644
2645 bool HasNormalDest =
2646 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2647
2648 // Clone one of the invokes into a new basic block.
2649 // Since they are all compatible, it doesn't matter which invoke is cloned.
2650 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2651 InvokeInst *II0 = Invokes.front();
2652 BasicBlock *II0BB = II0->getParent();
2653 BasicBlock *InsertBeforeBlock =
2654 II0->getParent()->getIterator()->getNextNode();
2655 Function *Func = II0BB->getParent();
2656 LLVMContext &Ctx = II0->getContext();
2657
2658 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2659 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2660
2661 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2662 // NOTE: all invokes have the same attributes, so no handling needed.
2663 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2664
2665 if (!HasNormalDest) {
2666 // This set does not have a normal destination,
2667 // so just form a new block with unreachable terminator.
2668 BasicBlock *MergedNormalDest = BasicBlock::Create(
2669 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2670 new UnreachableInst(Ctx, MergedNormalDest);
2671 MergedInvoke->setNormalDest(MergedNormalDest);
2672 }
2673
2674 // The unwind destination, however, remainds identical for all invokes here.
2675
2676 return MergedInvoke;
2677 }();
2678
2679 if (DTU) {
2680 // Predecessor blocks that contained these invokes will now branch to
2681 // the new block that contains the merged invoke, ...
2682 for (InvokeInst *II : Invokes)
2683 Updates.push_back(
2684 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2685
2686 // ... which has the new `unreachable` block as normal destination,
2687 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2688 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2689 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2690 SuccBBOfMergedInvoke});
2691
2692 // Since predecessor blocks now unconditionally branch to a new block,
2693 // they no longer branch to their original successors.
2694 for (InvokeInst *II : Invokes)
2695 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2696 Updates.push_back(
2697 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2698 }
2699
2700 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2701
2702 // Form the merged operands for the merged invoke.
2703 for (Use &U : MergedInvoke->operands()) {
2704 // Only PHI together the indirect callees and data operands.
2705 if (MergedInvoke->isCallee(&U)) {
2706 if (!IsIndirectCall)
2707 continue;
2708 } else if (!MergedInvoke->isDataOperand(&U))
2709 continue;
2710
2711 // Don't create trivial PHI's with all-identical incoming values.
2712 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2713 return II->getOperand(U.getOperandNo()) != U.get();
2714 });
2715 if (!NeedPHI)
2716 continue;
2717
2718 // Form a PHI out of all the data ops under this index.
2720 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2721 for (InvokeInst *II : Invokes)
2722 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2723
2724 U.set(PN);
2725 }
2726
2727 // We've ensured that each PHI node has compatible (identical) incoming values
2728 // when coming from each of the `invoke`s in the current merge set,
2729 // so update the PHI nodes accordingly.
2730 for (BasicBlock *Succ : successors(MergedInvoke))
2731 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2732 /*ExistPred=*/Invokes.front()->getParent());
2733
2734 // And finally, replace the original `invoke`s with an unconditional branch
2735 // to the block with the merged `invoke`. Also, give that merged `invoke`
2736 // the merged debugloc of all the original `invoke`s.
2737 DILocation *MergedDebugLoc = nullptr;
2738 for (InvokeInst *II : Invokes) {
2739 // Compute the debug location common to all the original `invoke`s.
2740 if (!MergedDebugLoc)
2741 MergedDebugLoc = II->getDebugLoc();
2742 else
2743 MergedDebugLoc =
2744 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2745
2746 // And replace the old `invoke` with an unconditionally branch
2747 // to the block with the merged `invoke`.
2748 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2749 OrigSuccBB->removePredecessor(II->getParent());
2750 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2751 II->replaceAllUsesWith(MergedInvoke);
2752 II->eraseFromParent();
2753 ++NumInvokesMerged;
2754 }
2755 MergedInvoke->setDebugLoc(MergedDebugLoc);
2756 ++NumInvokeSetsFormed;
2757
2758 if (DTU)
2759 DTU->applyUpdates(Updates);
2760}
2761
2762/// If this block is a `landingpad` exception handling block, categorize all
2763/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2764/// being "mergeable" together, and then merge invokes in each set together.
2765///
2766/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2767/// [...] [...]
2768/// | |
2769/// [invoke0] [invoke1]
2770/// / \ / \
2771/// [cont0] [landingpad] [cont1]
2772/// to:
2773/// [...] [...]
2774/// \ /
2775/// [invoke]
2776/// / \
2777/// [cont] [landingpad]
2778///
2779/// But of course we can only do that if the invokes share the `landingpad`,
2780/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2781/// and the invoked functions are "compatible".
2784 return false;
2785
2786 bool Changed = false;
2787
2788 // FIXME: generalize to all exception handling blocks?
2789 if (!BB->isLandingPad())
2790 return Changed;
2791
2792 CompatibleSets Grouper;
2793
2794 // Record all the predecessors of this `landingpad`. As per verifier,
2795 // the only allowed predecessor is the unwind edge of an `invoke`.
2796 // We want to group "compatible" `invokes` into the same set to be merged.
2797 for (BasicBlock *PredBB : predecessors(BB))
2798 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2799
2800 // And now, merge `invoke`s that were grouped togeter.
2801 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2802 if (Invokes.size() < 2)
2803 continue;
2804 Changed = true;
2805 MergeCompatibleInvokesImpl(Invokes, DTU);
2806 }
2807
2808 return Changed;
2809}
2810
2811namespace {
2812/// Track ephemeral values, which should be ignored for cost-modelling
2813/// purposes. Requires walking instructions in reverse order.
2814class EphemeralValueTracker {
2816
2817 bool isEphemeral(const Instruction *I) {
2818 if (isa<AssumeInst>(I))
2819 return true;
2820 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2821 all_of(I->users(), [&](const User *U) {
2822 return EphValues.count(cast<Instruction>(U));
2823 });
2824 }
2825
2826public:
2827 bool track(const Instruction *I) {
2828 if (isEphemeral(I)) {
2829 EphValues.insert(I);
2830 return true;
2831 }
2832 return false;
2833 }
2834
2835 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2836};
2837} // namespace
2838
2839/// Determine if we can hoist sink a sole store instruction out of a
2840/// conditional block.
2841///
2842/// We are looking for code like the following:
2843/// BrBB:
2844/// store i32 %add, i32* %arrayidx2
2845/// ... // No other stores or function calls (we could be calling a memory
2846/// ... // function).
2847/// %cmp = icmp ult %x, %y
2848/// br i1 %cmp, label %EndBB, label %ThenBB
2849/// ThenBB:
2850/// store i32 %add5, i32* %arrayidx2
2851/// br label EndBB
2852/// EndBB:
2853/// ...
2854/// We are going to transform this into:
2855/// BrBB:
2856/// store i32 %add, i32* %arrayidx2
2857/// ... //
2858/// %cmp = icmp ult %x, %y
2859/// %add.add5 = select i1 %cmp, i32 %add, %add5
2860/// store i32 %add.add5, i32* %arrayidx2
2861/// ...
2862///
2863/// \return The pointer to the value of the previous store if the store can be
2864/// hoisted into the predecessor block. 0 otherwise.
2866 BasicBlock *StoreBB, BasicBlock *EndBB) {
2867 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2868 if (!StoreToHoist)
2869 return nullptr;
2870
2871 // Volatile or atomic.
2872 if (!StoreToHoist->isSimple())
2873 return nullptr;
2874
2875 Value *StorePtr = StoreToHoist->getPointerOperand();
2876 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2877
2878 // Look for a store to the same pointer in BrBB.
2879 unsigned MaxNumInstToLookAt = 9;
2880 // Skip pseudo probe intrinsic calls which are not really killing any memory
2881 // accesses.
2882 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2883 if (!MaxNumInstToLookAt)
2884 break;
2885 --MaxNumInstToLookAt;
2886
2887 // Could be calling an instruction that affects memory like free().
2888 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2889 return nullptr;
2890
2891 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2892 // Found the previous store to same location and type. Make sure it is
2893 // simple, to avoid introducing a spurious non-atomic write after an
2894 // atomic write.
2895 if (SI->getPointerOperand() == StorePtr &&
2896 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2897 SI->getAlign() >= StoreToHoist->getAlign())
2898 // Found the previous store, return its value operand.
2899 return SI->getValueOperand();
2900 return nullptr; // Unknown store.
2901 }
2902
2903 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2904 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2905 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2906 // Local objects (created by an `alloca` instruction) are always
2907 // writable, so once we are past a read from a location it is valid to
2908 // also write to that same location.
2909 // If the address of the local object never escapes the function, that
2910 // means it's never concurrently read or written, hence moving the store
2911 // from under the condition will not introduce a data race.
2912 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2913 if (AI && !PointerMayBeCaptured(AI, false, true))
2914 // Found a previous load, return it.
2915 return LI;
2916 }
2917 // The load didn't work out, but we may still find a store.
2918 }
2919 }
2920
2921 return nullptr;
2922}
2923
2924/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2925/// converted to selects.
2927 BasicBlock *EndBB,
2928 unsigned &SpeculatedInstructions,
2930 const TargetTransformInfo &TTI) {
2932 BB->getParent()->hasMinSize()
2935
2936 bool HaveRewritablePHIs = false;
2937 for (PHINode &PN : EndBB->phis()) {
2938 Value *OrigV = PN.getIncomingValueForBlock(BB);
2939 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2940
2941 // FIXME: Try to remove some of the duplication with
2942 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2943 if (ThenV == OrigV)
2944 continue;
2945
2946 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2948
2949 // Don't convert to selects if we could remove undefined behavior instead.
2950 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2952 return false;
2953
2954 HaveRewritablePHIs = true;
2955 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2956 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2957 if (!OrigCE && !ThenCE)
2958 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2959
2960 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2961 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2962 InstructionCost MaxCost =
2964 if (OrigCost + ThenCost > MaxCost)
2965 return false;
2966
2967 // Account for the cost of an unfolded ConstantExpr which could end up
2968 // getting expanded into Instructions.
2969 // FIXME: This doesn't account for how many operations are combined in the
2970 // constant expression.
2971 ++SpeculatedInstructions;
2972 if (SpeculatedInstructions > 1)
2973 return false;
2974 }
2975
2976 return HaveRewritablePHIs;
2977}
2978
2979/// Speculate a conditional basic block flattening the CFG.
2980///
2981/// Note that this is a very risky transform currently. Speculating
2982/// instructions like this is most often not desirable. Instead, there is an MI
2983/// pass which can do it with full awareness of the resource constraints.
2984/// However, some cases are "obvious" and we should do directly. An example of
2985/// this is speculating a single, reasonably cheap instruction.
2986///
2987/// There is only one distinct advantage to flattening the CFG at the IR level:
2988/// it makes very common but simplistic optimizations such as are common in
2989/// instcombine and the DAG combiner more powerful by removing CFG edges and
2990/// modeling their effects with easier to reason about SSA value graphs.
2991///
2992///
2993/// An illustration of this transform is turning this IR:
2994/// \code
2995/// BB:
2996/// %cmp = icmp ult %x, %y
2997/// br i1 %cmp, label %EndBB, label %ThenBB
2998/// ThenBB:
2999/// %sub = sub %x, %y
3000/// br label BB2
3001/// EndBB:
3002/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3003/// ...
3004/// \endcode
3005///
3006/// Into this IR:
3007/// \code
3008/// BB:
3009/// %cmp = icmp ult %x, %y
3010/// %sub = sub %x, %y
3011/// %cond = select i1 %cmp, 0, %sub
3012/// ...
3013/// \endcode
3014///
3015/// \returns true if the conditional block is removed.
3016bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3017 BasicBlock *ThenBB) {
3018 if (!Options.SpeculateBlocks)
3019 return false;
3020
3021 // Be conservative for now. FP select instruction can often be expensive.
3022 Value *BrCond = BI->getCondition();
3023 if (isa<FCmpInst>(BrCond))
3024 return false;
3025
3026 BasicBlock *BB = BI->getParent();
3027 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3028 InstructionCost Budget =
3030
3031 // If ThenBB is actually on the false edge of the conditional branch, remember
3032 // to swap the select operands later.
3033 bool Invert = false;
3034 if (ThenBB != BI->getSuccessor(0)) {
3035 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3036 Invert = true;
3037 }
3038 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3039
3040 // If the branch is non-unpredictable, and is predicted to *not* branch to
3041 // the `then` block, then avoid speculating it.
3042 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3043 uint64_t TWeight, FWeight;
3044 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3045 (TWeight + FWeight) != 0) {
3046 uint64_t EndWeight = Invert ? TWeight : FWeight;
3047 BranchProbability BIEndProb =
3048 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3050 if (BIEndProb >= Likely)
3051 return false;
3052 }
3053 }
3054
3055 // Keep a count of how many times instructions are used within ThenBB when
3056 // they are candidates for sinking into ThenBB. Specifically:
3057 // - They are defined in BB, and
3058 // - They have no side effects, and
3059 // - All of their uses are in ThenBB.
3060 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3061
3062 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3063
3064 unsigned SpeculatedInstructions = 0;
3065 Value *SpeculatedStoreValue = nullptr;
3066 StoreInst *SpeculatedStore = nullptr;
3067 EphemeralValueTracker EphTracker;
3068 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3069 // Skip debug info.
3070 if (isa<DbgInfoIntrinsic>(I)) {
3071 SpeculatedDbgIntrinsics.push_back(&I);
3072 continue;
3073 }
3074
3075 // Skip pseudo probes. The consequence is we lose track of the branch
3076 // probability for ThenBB, which is fine since the optimization here takes
3077 // place regardless of the branch probability.
3078 if (isa<PseudoProbeInst>(I)) {
3079 // The probe should be deleted so that it will not be over-counted when
3080 // the samples collected on the non-conditional path are counted towards
3081 // the conditional path. We leave it for the counts inference algorithm to
3082 // figure out a proper count for an unknown probe.
3083 SpeculatedDbgIntrinsics.push_back(&I);
3084 continue;
3085 }
3086
3087 // Ignore ephemeral values, they will be dropped by the transform.
3088 if (EphTracker.track(&I))
3089 continue;
3090
3091 // Only speculatively execute a single instruction (not counting the
3092 // terminator) for now.
3093 ++SpeculatedInstructions;
3094 if (SpeculatedInstructions > 1)
3095 return false;
3096
3097 // Don't hoist the instruction if it's unsafe or expensive.
3099 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3100 &I, BB, ThenBB, EndBB))))
3101 return false;
3102 if (!SpeculatedStoreValue &&
3105 return false;
3106
3107 // Store the store speculation candidate.
3108 if (SpeculatedStoreValue)
3109 SpeculatedStore = cast<StoreInst>(&I);
3110
3111 // Do not hoist the instruction if any of its operands are defined but not
3112 // used in BB. The transformation will prevent the operand from
3113 // being sunk into the use block.
3114 for (Use &Op : I.operands()) {
3115 Instruction *OpI = dyn_cast<Instruction>(Op);
3116 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3117 continue; // Not a candidate for sinking.
3118
3119 ++SinkCandidateUseCounts[OpI];
3120 }
3121 }
3122
3123 // Consider any sink candidates which are only used in ThenBB as costs for
3124 // speculation. Note, while we iterate over a DenseMap here, we are summing
3125 // and so iteration order isn't significant.
3126 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3127 if (Inst->hasNUses(Count)) {
3128 ++SpeculatedInstructions;
3129 if (SpeculatedInstructions > 1)
3130 return false;
3131 }
3132
3133 // Check that we can insert the selects and that it's not too expensive to do
3134 // so.
3135 bool Convert = SpeculatedStore != nullptr;
3137 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3138 SpeculatedInstructions,
3139 Cost, TTI);
3140 if (!Convert || Cost > Budget)
3141 return false;
3142
3143 // If we get here, we can hoist the instruction and if-convert.
3144 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3145
3146 // Insert a select of the value of the speculated store.
3147 if (SpeculatedStoreValue) {
3148 IRBuilder<NoFolder> Builder(BI);
3149 Value *OrigV = SpeculatedStore->getValueOperand();
3150 Value *TrueV = SpeculatedStore->getValueOperand();
3151 Value *FalseV = SpeculatedStoreValue;
3152 if (Invert)
3153 std::swap(TrueV, FalseV);
3154 Value *S = Builder.CreateSelect(
3155 BrCond, TrueV, FalseV, "spec.store.select", BI);
3156 SpeculatedStore->setOperand(0, S);
3157 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3158 SpeculatedStore->getDebugLoc());
3159 // The value stored is still conditional, but the store itself is now
3160 // unconditonally executed, so we must be sure that any linked dbg.assign
3161 // intrinsics are tracking the new stored value (the result of the
3162 // select). If we don't, and the store were to be removed by another pass
3163 // (e.g. DSE), then we'd eventually end up emitting a location describing
3164 // the conditional value, unconditionally.
3165 //
3166 // === Before this transformation ===
3167 // pred:
3168 // store %one, %x.dest, !DIAssignID !1
3169 // dbg.assign %one, "x", ..., !1, ...
3170 // br %cond if.then
3171 //
3172 // if.then:
3173 // store %two, %x.dest, !DIAssignID !2
3174 // dbg.assign %two, "x", ..., !2, ...
3175 //
3176 // === After this transformation ===
3177 // pred:
3178 // store %one, %x.dest, !DIAssignID !1
3179 // dbg.assign %one, "x", ..., !1
3180 /// ...
3181 // %merge = select %cond, %two, %one
3182 // store %merge, %x.dest, !DIAssignID !2
3183 // dbg.assign %merge, "x", ..., !2
3184 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3185 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3186 DbgAssign->replaceVariableLocationOp(OrigV, S);
3187 };
3188 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3189 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3190 }
3191
3192 // Metadata can be dependent on the condition we are hoisting above.
3193 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3194 // to avoid making it appear as if the condition is a constant, which would
3195 // be misleading while debugging.
3196 // Similarly strip attributes that maybe dependent on condition we are
3197 // hoisting above.
3198 for (auto &I : make_early_inc_range(*ThenBB)) {
3199 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3200 // Don't update the DILocation of dbg.assign intrinsics.
3201 if (!isa<DbgAssignIntrinsic>(&I))
3202 I.setDebugLoc(DebugLoc());
3203 }
3204 I.dropUBImplyingAttrsAndMetadata();
3205
3206 // Drop ephemeral values.
3207 if (EphTracker.contains(&I)) {
3208 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3209 I.eraseFromParent();
3210 }
3211 }
3212
3213 // Hoist the instructions.
3214 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3215 // to these instructions, in the same way that dbg.value intrinsics are
3216 // dropped at the end of this block.
3217 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3218 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3219 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3220 // equivalent).
3221 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3222 !DVR || !DVR->isDbgAssign())
3223 It.dropOneDbgRecord(&DR);
3224 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3225 std::prev(ThenBB->end()));
3226
3227 // Insert selects and rewrite the PHI operands.
3228 IRBuilder<NoFolder> Builder(BI);
3229 for (PHINode &PN : EndBB->phis()) {
3230 unsigned OrigI = PN.getBasicBlockIndex(BB);
3231 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3232 Value *OrigV = PN.getIncomingValue(OrigI);
3233 Value *ThenV = PN.getIncomingValue(ThenI);
3234
3235 // Skip PHIs which are trivial.
3236 if (OrigV == ThenV)
3237 continue;
3238
3239 // Create a select whose true value is the speculatively executed value and
3240 // false value is the pre-existing value. Swap them if the branch
3241 // destinations were inverted.
3242 Value *TrueV = ThenV, *FalseV = OrigV;
3243 if (Invert)
3244 std::swap(TrueV, FalseV);
3245 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3246 PN.setIncomingValue(OrigI, V);
3247 PN.setIncomingValue(ThenI, V);
3248 }
3249
3250 // Remove speculated dbg intrinsics.
3251 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3252 // dbg value for the different flows and inserting it after the select.
3253 for (Instruction *I : SpeculatedDbgIntrinsics) {
3254 // We still want to know that an assignment took place so don't remove
3255 // dbg.assign intrinsics.
3256 if (!isa<DbgAssignIntrinsic>(I))
3257 I->eraseFromParent();
3258 }
3259
3260 ++NumSpeculations;
3261 return true;
3262}
3263
3264/// Return true if we can thread a branch across this block.
3266 int Size = 0;
3267 EphemeralValueTracker EphTracker;
3268
3269 // Walk the loop in reverse so that we can identify ephemeral values properly
3270 // (values only feeding assumes).
3271 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3272 // Can't fold blocks that contain noduplicate or convergent calls.
3273 if (CallInst *CI = dyn_cast<CallInst>(&I))
3274 if (CI->cannotDuplicate() || CI->isConvergent())
3275 return false;
3276
3277 // Ignore ephemeral values which are deleted during codegen.
3278 // We will delete Phis while threading, so Phis should not be accounted in
3279 // block's size.
3280 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3281 if (Size++ > MaxSmallBlockSize)
3282 return false; // Don't clone large BB's.
3283 }
3284
3285 // We can only support instructions that do not define values that are
3286 // live outside of the current basic block.
3287 for (User *U : I.users()) {
3288 Instruction *UI = cast<Instruction>(U);
3289 if (UI->getParent() != BB || isa<PHINode>(UI))
3290 return false;
3291 }
3292
3293 // Looks ok, continue checking.
3294 }
3295
3296 return true;
3297}
3298
3300 BasicBlock *To) {
3301 // Don't look past the block defining the value, we might get the value from
3302 // a previous loop iteration.
3303 auto *I = dyn_cast<Instruction>(V);
3304 if (I && I->getParent() == To)
3305 return nullptr;
3306
3307 // We know the value if the From block branches on it.
3308 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3309 if (BI && BI->isConditional() && BI->getCondition() == V &&
3310 BI->getSuccessor(0) != BI->getSuccessor(1))
3311 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3313
3314 return nullptr;
3315}
3316
3317/// If we have a conditional branch on something for which we know the constant
3318/// value in predecessors (e.g. a phi node in the current block), thread edges
3319/// from the predecessor to their ultimate destination.
3320static std::optional<bool>
3322 const DataLayout &DL,
3323 AssumptionCache *AC) {
3325 BasicBlock *BB = BI->getParent();
3326 Value *Cond = BI->getCondition();
3327 PHINode *PN = dyn_cast<PHINode>(Cond);
3328 if (PN && PN->getParent() == BB) {
3329 // Degenerate case of a single entry PHI.
3330 if (PN->getNumIncomingValues() == 1) {
3332 return true;
3333 }
3334
3335 for (Use &U : PN->incoming_values())
3336 if (auto *CB = dyn_cast<ConstantInt>(U))
3337 KnownValues[CB].insert(PN->getIncomingBlock(U));
3338 } else {
3339 for (BasicBlock *Pred : predecessors(BB)) {
3340 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3341 KnownValues[CB].insert(Pred);
3342 }
3343 }
3344
3345 if (KnownValues.empty())
3346 return false;
3347
3348 // Now we know that this block has multiple preds and two succs.
3349 // Check that the block is small enough and values defined in the block are
3350 // not used outside of it.
3352 return false;
3353
3354 for (const auto &Pair : KnownValues) {
3355 // Okay, we now know that all edges from PredBB should be revectored to
3356 // branch to RealDest.
3357 ConstantInt *CB = Pair.first;
3358 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3359 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3360
3361 if (RealDest == BB)
3362 continue; // Skip self loops.
3363
3364 // Skip if the predecessor's terminator is an indirect branch.
3365 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3366 return isa<IndirectBrInst>(PredBB->getTerminator());
3367 }))
3368 continue;
3369
3370 LLVM_DEBUG({
3371 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3372 << " has value " << *Pair.first << " in predecessors:\n";
3373 for (const BasicBlock *PredBB : Pair.second)
3374 dbgs() << " " << PredBB->getName() << "\n";
3375 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3376 });
3377
3378 // Split the predecessors we are threading into a new edge block. We'll
3379 // clone the instructions into this block, and then redirect it to RealDest.
3380 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3381
3382 // TODO: These just exist to reduce test diff, we can drop them if we like.
3383 EdgeBB->setName(RealDest->getName() + ".critedge");
3384 EdgeBB->moveBefore(RealDest);
3385
3386 // Update PHI nodes.
3387 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3388
3389 // BB may have instructions that are being threaded over. Clone these
3390 // instructions into EdgeBB. We know that there will be no uses of the
3391 // cloned instructions outside of EdgeBB.
3392 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3393 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3394 TranslateMap[Cond] = CB;
3395
3396 // RemoveDIs: track instructions that we optimise away while folding, so
3397 // that we can copy DbgVariableRecords from them later.
3398 BasicBlock::iterator SrcDbgCursor = BB->begin();
3399 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3400 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3401 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3402 continue;
3403 }
3404 // Clone the instruction.
3405 Instruction *N = BBI->clone();
3406 // Insert the new instruction into its new home.
3407 N->insertInto(EdgeBB, InsertPt);
3408
3409 if (BBI->hasName())
3410 N->setName(BBI->getName() + ".c");
3411
3412 // Update operands due to translation.
3413 for (Use &Op : N->operands()) {
3414 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3415 if (PI != TranslateMap.end())
3416 Op = PI->second;
3417 }
3418
3419 // Check for trivial simplification.
3420 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3421 if (!BBI->use_empty())
3422 TranslateMap[&*BBI] = V;
3423 if (!N->mayHaveSideEffects()) {
3424 N->eraseFromParent(); // Instruction folded away, don't need actual
3425 // inst
3426 N = nullptr;
3427 }
3428 } else {
3429 if (!BBI->use_empty())
3430 TranslateMap[&*BBI] = N;
3431 }
3432 if (N) {
3433 // Copy all debug-info attached to instructions from the last we
3434 // successfully clone, up to this instruction (they might have been
3435 // folded away).
3436 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3437 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3438 SrcDbgCursor = std::next(BBI);
3439 // Clone debug-info on this instruction too.
3440 N->cloneDebugInfoFrom(&*BBI);
3441
3442 // Register the new instruction with the assumption cache if necessary.
3443 if (auto *Assume = dyn_cast<AssumeInst>(N))
3444 if (AC)
3445 AC->registerAssumption(Assume);
3446 }
3447 }
3448
3449 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3450 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3451 InsertPt->cloneDebugInfoFrom(BI);
3452
3453 BB->removePredecessor(EdgeBB);
3454 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3455 EdgeBI->setSuccessor(0, RealDest);
3456 EdgeBI->setDebugLoc(BI->getDebugLoc());
3457
3458 if (DTU) {
3460 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3461 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3462 DTU->applyUpdates(Updates);
3463 }
3464
3465 // For simplicity, we created a separate basic block for the edge. Merge
3466 // it back into the predecessor if possible. This not only avoids
3467 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3468 // bypass the check for trivial cycles above.
3469 MergeBlockIntoPredecessor(EdgeBB, DTU);
3470
3471 // Signal repeat, simplifying any other constants.
3472 return std::nullopt;
3473 }
3474
3475 return false;
3476}
3477
3479 DomTreeUpdater *DTU,
3480 const DataLayout &DL,
3481 AssumptionCache *AC) {
3482 std::optional<bool> Result;
3483 bool EverChanged = false;
3484 do {
3485 // Note that None means "we changed things, but recurse further."
3486 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3487 EverChanged |= Result == std::nullopt || *Result;
3488 } while (Result == std::nullopt);
3489 return EverChanged;
3490}
3491
3492/// Given a BB that starts with the specified two-entry PHI node,
3493/// see if we can eliminate it.
3495 DomTreeUpdater *DTU, const DataLayout &DL) {
3496 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3497 // statement", which has a very simple dominance structure. Basically, we
3498 // are trying to find the condition that is being branched on, which
3499 // subsequently causes this merge to happen. We really want control
3500 // dependence information for this check, but simplifycfg can't keep it up
3501 // to date, and this catches most of the cases we care about anyway.
3502 BasicBlock *BB = PN->getParent();
3503
3504 BasicBlock *IfTrue, *IfFalse;
3505 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3506 if (!DomBI)
3507 return false;
3508 Value *IfCond = DomBI->getCondition();
3509 // Don't bother if the branch will be constant folded trivially.
3510 if (isa<ConstantInt>(IfCond))
3511 return false;
3512
3513 BasicBlock *DomBlock = DomBI->getParent();
3516 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3517 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3518 });
3519 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3520 "Will have either one or two blocks to speculate.");
3521
3522 // If the branch is non-unpredictable, see if we either predictably jump to
3523 // the merge bb (if we have only a single 'then' block), or if we predictably
3524 // jump to one specific 'then' block (if we have two of them).
3525 // It isn't beneficial to speculatively execute the code
3526 // from the block that we know is predictably not entered.
3527 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3528 uint64_t TWeight, FWeight;
3529 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3530 (TWeight + FWeight) != 0) {
3531 BranchProbability BITrueProb =
3532 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3534 BranchProbability BIFalseProb = BITrueProb.getCompl();
3535 if (IfBlocks.size() == 1) {
3536 BranchProbability BIBBProb =
3537 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3538 if (BIBBProb >= Likely)
3539 return false;
3540 } else {
3541 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3542 return false;
3543 }
3544 }
3545 }
3546
3547 // Don't try to fold an unreachable block. For example, the phi node itself
3548 // can't be the candidate if-condition for a select that we want to form.
3549 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3550 if (IfCondPhiInst->getParent() == BB)
3551 return false;
3552
3553 // Okay, we found that we can merge this two-entry phi node into a select.
3554 // Doing so would require us to fold *all* two entry phi nodes in this block.
3555 // At some point this becomes non-profitable (particularly if the target
3556 // doesn't support cmov's). Only do this transformation if there are two or
3557 // fewer PHI nodes in this block.
3558 unsigned NumPhis = 0;
3559 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3560 if (NumPhis > 2)
3561 return false;
3562
3563 // Loop over the PHI's seeing if we can promote them all to select
3564 // instructions. While we are at it, keep track of the instructions
3565 // that need to be moved to the dominating block.
3566 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3568 InstructionCost Budget =
3570
3571 bool Changed = false;
3572 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3573 PHINode *PN = cast<PHINode>(II++);
3574 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3575 PN->replaceAllUsesWith(V);
3576 PN->eraseFromParent();
3577 Changed = true;
3578 continue;
3579 }
3580
3581 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3582 Cost, Budget, TTI) ||
3583 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3584 Cost, Budget, TTI))
3585 return Changed;
3586 }
3587
3588 // If we folded the first phi, PN dangles at this point. Refresh it. If
3589 // we ran out of PHIs then we simplified them all.
3590 PN = dyn_cast<PHINode>(BB->begin());
3591 if (!PN)
3592 return true;
3593
3594 // Return true if at least one of these is a 'not', and another is either
3595 // a 'not' too, or a constant.
3596 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3597 if (!match(V0, m_Not(m_Value())))
3598 std::swap(V0, V1);
3599 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3600 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3601 };
3602
3603 // Don't fold i1 branches on PHIs which contain binary operators or
3604 // (possibly inverted) select form of or/ands, unless one of
3605 // the incoming values is an 'not' and another one is freely invertible.
3606 // These can often be turned into switches and other things.
3607 auto IsBinOpOrAnd = [](Value *V) {
3608 return match(
3609 V, m_CombineOr(
3610 m_BinOp(),
3613 };
3614 if (PN->getType()->isIntegerTy(1) &&
3615 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3616 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3617 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3618 PN->getIncomingValue(1)))
3619 return Changed;
3620
3621 // If all PHI nodes are promotable, check to make sure that all instructions
3622 // in the predecessor blocks can be promoted as well. If not, we won't be able
3623 // to get rid of the control flow, so it's not worth promoting to select
3624 // instructions.
3625 for (BasicBlock *IfBlock : IfBlocks)
3626 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3627 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3628 // This is not an aggressive instruction that we can promote.
3629 // Because of this, we won't be able to get rid of the control flow, so
3630 // the xform is not worth it.
3631 return Changed;
3632 }
3633
3634 // If either of the blocks has it's address taken, we can't do this fold.
3635 if (any_of(IfBlocks,
3636 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3637 return Changed;
3638
3639 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3640 << " T: " << IfTrue->getName()
3641 << " F: " << IfFalse->getName() << "\n");
3642
3643 // If we can still promote the PHI nodes after this gauntlet of tests,
3644 // do all of the PHI's now.
3645
3646 // Move all 'aggressive' instructions, which are defined in the
3647 // conditional parts of the if's up to the dominating block.
3648 for (BasicBlock *IfBlock : IfBlocks)
3649 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3650
3651 IRBuilder<NoFolder> Builder(DomBI);
3652 // Propagate fast-math-flags from phi nodes to replacement selects.
3653 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3654 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3655 if (isa<FPMathOperator>(PN))
3656 Builder.setFastMathFlags(PN->getFastMathFlags());
3657
3658 // Change the PHI node into a select instruction.
3659 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3660 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3661
3662 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3663 PN->replaceAllUsesWith(Sel);
3664 Sel->takeName(PN);
3665 PN->eraseFromParent();
3666 }
3667
3668 // At this point, all IfBlocks are empty, so our if statement
3669 // has been flattened. Change DomBlock to jump directly to our new block to
3670 // avoid other simplifycfg's kicking in on the diamond.
3671 Builder.CreateBr(BB);
3672
3674 if (DTU) {
3675 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3676 for (auto *Successor : successors(DomBlock))
3677 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3678 }
3679
3680 DomBI->eraseFromParent();
3681 if (DTU)
3682 DTU->applyUpdates(Updates);
3683
3684 return true;
3685}
3686
3688 Instruction::BinaryOps Opc, Value *LHS,
3689 Value *RHS, const Twine &Name = "") {
3690 // Try to relax logical op to binary op.
3691 if (impliesPoison(RHS, LHS))
3692 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3693 if (Opc == Instruction::And)
3694 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3695 if (Opc == Instruction::Or)
3696 return Builder.CreateLogicalOr(LHS, RHS, Name);
3697 llvm_unreachable("Invalid logical opcode");
3698}
3699
3700/// Return true if either PBI or BI has branch weight available, and store
3701/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3702/// not have branch weight, use 1:1 as its weight.
3704 uint64_t &PredTrueWeight,
3705 uint64_t &PredFalseWeight,
3706 uint64_t &SuccTrueWeight,
3707 uint64_t &SuccFalseWeight) {
3708 bool PredHasWeights =
3709 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3710 bool SuccHasWeights =
3711 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3712 if (PredHasWeights || SuccHasWeights) {
3713 if (!PredHasWeights)
3714 PredTrueWeight = PredFalseWeight = 1;
3715 if (!SuccHasWeights)
3716 SuccTrueWeight = SuccFalseWeight = 1;
3717 return true;
3718 } else {
3719 return false;
3720 }
3721}
3722
3723/// Determine if the two branches share a common destination and deduce a glue
3724/// that joins the branches' conditions to arrive at the common destination if
3725/// that would be profitable.
3726static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3728 const TargetTransformInfo *TTI) {
3729 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3730 "Both blocks must end with a conditional branches.");
3732 "PredBB must be a predecessor of BB.");
3733
3734 // We have the potential to fold the conditions together, but if the
3735 // predecessor branch is predictable, we may not want to merge them.
3736 uint64_t PTWeight, PFWeight;
3737 BranchProbability PBITrueProb, Likely;
3738 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3739 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3740 (PTWeight + PFWeight) != 0) {
3741 PBITrueProb =
3742 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3744 }
3745
3746 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3747 // Speculate the 2nd condition unless the 1st is probably true.
3748 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3749 return {{BI->getSuccessor(0), Instruction::Or, false}};
3750 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3751 // Speculate the 2nd condition unless the 1st is probably false.
3752 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3753 return {{BI->getSuccessor(1), Instruction::And, false}};
3754 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3755 // Speculate the 2nd condition unless the 1st is probably true.
3756 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3757 return {{BI->getSuccessor(1), Instruction::And, true}};
3758 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3759 // Speculate the 2nd condition unless the 1st is probably false.
3760 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3761 return {{BI->getSuccessor(0), Instruction::Or, true}};
3762 }
3763 return std::nullopt;
3764}
3765
3767 DomTreeUpdater *DTU,
3768 MemorySSAUpdater *MSSAU,
3769 const TargetTransformInfo *TTI) {
3770 BasicBlock *BB = BI->getParent();
3771 BasicBlock *PredBlock = PBI->getParent();
3772
3773 // Determine if the two branches share a common destination.
3774 BasicBlock *CommonSucc;
3776 bool InvertPredCond;
3777 std::tie(CommonSucc, Opc, InvertPredCond) =
3779
3780 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3781
3782 IRBuilder<> Builder(PBI);
3783 // The builder is used to create instructions to eliminate the branch in BB.
3784 // If BB's terminator has !annotation metadata, add it to the new
3785 // instructions.
3787 {LLVMContext::MD_annotation});
3788
3789 // If we need to invert the condition in the pred block to match, do so now.
3790 if (InvertPredCond) {
3791 InvertBranch(PBI, Builder);
3792 }
3793
3794 BasicBlock *UniqueSucc =
3795 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3796
3797 // Before cloning instructions, notify the successor basic block that it
3798 // is about to have a new predecessor. This will update PHI nodes,
3799 // which will allow us to update live-out uses of bonus instructions.
3800 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3801
3802 // Try to update branch weights.
3803 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3804 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3805 SuccTrueWeight, SuccFalseWeight)) {
3806 SmallVector<uint64_t, 8> NewWeights;
3807
3808 if (PBI->getSuccessor(0) == BB) {
3809 // PBI: br i1 %x, BB, FalseDest
3810 // BI: br i1 %y, UniqueSucc, FalseDest
3811 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3812 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3813 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3814 // TrueWeight for PBI * FalseWeight for BI.
3815 // We assume that total weights of a BranchInst can fit into 32 bits.
3816 // Therefore, we will not have overflow using 64-bit arithmetic.
3817 NewWeights.push_back(PredFalseWeight *
3818 (SuccFalseWeight + SuccTrueWeight) +
3819 PredTrueWeight * SuccFalseWeight);
3820 } else {
3821 // PBI: br i1 %x, TrueDest, BB
3822 // BI: br i1 %y, TrueDest, UniqueSucc
3823 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3824 // FalseWeight for PBI * TrueWeight for BI.
3825 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3826 PredFalseWeight * SuccTrueWeight);
3827 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3828 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3829 }
3830
3831 // Halve the weights if any of them cannot fit in an uint32_t
3832 FitWeights(NewWeights);
3833
3834 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3835 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3836
3837 // TODO: If BB is reachable from all paths through PredBlock, then we
3838 // could replace PBI's branch probabilities with BI's.
3839 } else
3840 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3841
3842 // Now, update the CFG.
3843 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3844
3845 if (DTU)
3846 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3847 {DominatorTree::Delete, PredBlock, BB}});
3848
3849 // If BI was a loop latch, it may have had associated loop metadata.
3850 // We need to copy it to the new latch, that is, PBI.
3851 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3852 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3853
3854 ValueToValueMapTy VMap; // maps original values to cloned values
3856
3857 Module *M = BB->getModule();
3858
3859 if (PredBlock->IsNewDbgInfoFormat) {
3860 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3861 for (DbgVariableRecord &DVR :
3863 RemapDbgVariableRecord(M, &DVR, VMap,
3865 }
3866 }
3867
3868 // Now that the Cond was cloned into the predecessor basic block,
3869 // or/and the two conditions together.
3870 Value *BICond = VMap[BI->getCondition()];
3871 PBI->setCondition(
3872 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3873
3874 ++NumFoldBranchToCommonDest;
3875 return true;
3876}
3877
3878/// Return if an instruction's type or any of its operands' types are a vector
3879/// type.
3880static bool isVectorOp(Instruction &I) {
3881 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3882 return U->getType()->isVectorTy();
3883 });
3884}
3885
3886/// If this basic block is simple enough, and if a predecessor branches to us
3887/// and one of our successors, fold the block into the predecessor and use
3888/// logical operations to pick the right destination.
3890 MemorySSAUpdater *MSSAU,
3891 const TargetTransformInfo *TTI,
3892 unsigned BonusInstThreshold) {
3893 // If this block ends with an unconditional branch,
3894 // let SpeculativelyExecuteBB() deal with it.
3895 if (!BI->isConditional())
3896 return false;
3897
3898 BasicBlock *BB = BI->getParent();
3902
3903 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3904
3905 if (!Cond ||
3906 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3907 !isa<SelectInst>(Cond)) ||
3908 Cond->getParent() != BB || !Cond->hasOneUse())
3909 return false;
3910
3911 // Finally, don't infinitely unroll conditional loops.
3912 if (is_contained(successors(BB), BB))
3913 return false;
3914
3915 // With which predecessors will we want to deal with?
3917 for (BasicBlock *PredBlock : predecessors(BB)) {
3918 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3919
3920 // Check that we have two conditional branches. If there is a PHI node in
3921 // the common successor, verify that the same value flows in from both
3922 // blocks.
3923 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3924 continue;
3925
3926 // Determine if the two branches share a common destination.
3927 BasicBlock *CommonSucc;
3929 bool InvertPredCond;
3930 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3931 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3932 else
3933 continue;
3934
3935 // Check the cost of inserting the necessary logic before performing the
3936 // transformation.
3937 if (TTI) {
3938 Type *Ty = BI->getCondition()->getType();
3940 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3941 !isa<CmpInst>(PBI->getCondition())))
3942 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3943
3945 continue;
3946 }
3947
3948 // Ok, we do want to deal with this predecessor. Record it.
3949 Preds.emplace_back(PredBlock);
3950 }
3951
3952 // If there aren't any predecessors into which we can fold,
3953 // don't bother checking the cost.
3954 if (Preds.empty())
3955 return false;
3956
3957 // Only allow this transformation if computing the condition doesn't involve
3958 // too many instructions and these involved instructions can be executed
3959 // unconditionally. We denote all involved instructions except the condition
3960 // as "bonus instructions", and only allow this transformation when the
3961 // number of the bonus instructions we'll need to create when cloning into
3962 // each predecessor does not exceed a certain threshold.
3963 unsigned NumBonusInsts = 0;
3964 bool SawVectorOp = false;
3965 const unsigned PredCount = Preds.size();
3966 for (Instruction &I : *BB) {
3967 // Don't check the branch condition comparison itself.
3968 if (&I == Cond)
3969 continue;
3970 // Ignore dbg intrinsics, and the terminator.
3971 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3972 continue;
3973 // I must be safe to execute unconditionally.
3975 return false;
3976 SawVectorOp |= isVectorOp(I);
3977
3978 // Account for the cost of duplicating this instruction into each
3979 // predecessor. Ignore free instructions.
3980 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3982 NumBonusInsts += PredCount;
3983
3984 // Early exits once we reach the limit.
3985 if (NumBonusInsts >
3986 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3987 return false;
3988 }
3989
3990 auto IsBCSSAUse = [BB, &I](Use &U) {
3991 auto *UI = cast<Instruction>(U.getUser());
3992 if (auto *PN = dyn_cast<PHINode>(UI))
3993 return PN->getIncomingBlock(U) == BB;
3994 return UI->getParent() == BB && I.comesBefore(UI);
3995 };
3996
3997 // Does this instruction require rewriting of uses?
3998 if (!all_of(I.uses(), IsBCSSAUse))
3999 return false;
4000 }
4001 if (NumBonusInsts >
4002 BonusInstThreshold *
4003 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4004 return false;
4005
4006 // Ok, we have the budget. Perform the transformation.
4007 for (BasicBlock *PredBlock : Preds) {
4008 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4009 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4010 }
4011 return false;
4012}
4013
4014// If there is only one store in BB1 and BB2, return it, otherwise return
4015// nullptr.
4017 StoreInst *S = nullptr;
4018 for (auto *BB : {BB1, BB2}) {
4019 if (!BB)
4020 continue;
4021 for (auto &I : *BB)
4022 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4023 if (S)
4024 // Multiple stores seen.
4025 return nullptr;
4026 else
4027 S = SI;
4028 }
4029 }
4030 return S;
4031}
4032
4034 Value *AlternativeV = nullptr) {
4035 // PHI is going to be a PHI node that allows the value V that is defined in
4036 // BB to be referenced in BB's only successor.
4037 //
4038 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4039 // doesn't matter to us what the other operand is (it'll never get used). We
4040 // could just create a new PHI with an undef incoming value, but that could
4041 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4042 // other PHI. So here we directly look for some PHI in BB's successor with V
4043 // as an incoming operand. If we find one, we use it, else we create a new
4044 // one.
4045 //
4046 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4047 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4048 // where OtherBB is the single other predecessor of BB's only successor.
4049 PHINode *PHI = nullptr;
4050 BasicBlock *Succ = BB->getSingleSuccessor();
4051
4052 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4053 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4054 PHI = cast<PHINode>(I);
4055 if (!AlternativeV)
4056 break;
4057
4058 assert(Succ->hasNPredecessors(2));
4059 auto PredI = pred_begin(Succ);
4060 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4061 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4062 break;
4063 PHI = nullptr;
4064 }
4065 if (PHI)
4066 return PHI;
4067
4068 // If V is not an instruction defined in BB, just return it.
4069 if (!AlternativeV &&
4070 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4071 return V;
4072
4073 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4074 PHI->insertBefore(Succ->begin());
4075 PHI->addIncoming(V, BB);
4076 for (BasicBlock *PredBB : predecessors(Succ))
4077 if (PredBB != BB)
4078 PHI->addIncoming(
4079 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4080 return PHI;
4081}
4082
4084 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4085 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4086 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4087 // For every pointer, there must be exactly two stores, one coming from
4088 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4089 // store (to any address) in PTB,PFB or QTB,QFB.
4090 // FIXME: We could relax this restriction with a bit more work and performance
4091 // testing.
4092 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4093 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4094 if (!PStore || !QStore)
4095 return false;
4096
4097 // Now check the stores are compatible.
4098 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4099 PStore->getValueOperand()->getType() !=
4100 QStore->getValueOperand()->getType())
4101 return false;
4102
4103 // Check that sinking the store won't cause program behavior changes. Sinking
4104 // the store out of the Q blocks won't change any behavior as we're sinking
4105 // from a block to its unconditional successor. But we're moving a store from
4106 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4107 // So we need to check that there are no aliasing loads or stores in
4108 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4109 // operations between PStore and the end of its parent block.
4110 //
4111 // The ideal way to do this is to query AliasAnalysis, but we don't
4112 // preserve AA currently so that is dangerous. Be super safe and just
4113 // check there are no other memory operations at all.
4114 for (auto &I : *QFB->getSinglePredecessor())
4115 if (I.mayReadOrWriteMemory())
4116 return false;
4117 for (auto &I : *QFB)
4118 if (&I != QStore && I.mayReadOrWriteMemory())
4119 return false;
4120 if (QTB)
4121 for (auto &I : *QTB)
4122 if (&I != QStore && I.mayReadOrWriteMemory())
4123 return false;
4124 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4125 I != E; ++I)
4126 if (&*I != PStore && I->mayReadOrWriteMemory())
4127 return false;
4128
4129 // If we're not in aggressive mode, we only optimize if we have some
4130 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4131 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4132 if (!BB)
4133 return true;
4134 // Heuristic: if the block can be if-converted/phi-folded and the
4135 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4136 // thread this store.
4138 InstructionCost Budget =
4140 for (auto &I : BB->instructionsWithoutDebug(false)) {
4141 // Consider terminator instruction to be free.
4142 if (I.isTerminator())
4143 continue;
4144 // If this is one the stores that we want to speculate out of this BB,
4145 // then don't count it's cost, consider it to be free.
4146 if (auto *S = dyn_cast<StoreInst>(&I))
4147 if (llvm::find(FreeStores, S))
4148 continue;
4149 // Else, we have a white-list of instructions that we are ak speculating.
4150 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4151 return false; // Not in white-list - not worthwhile folding.
4152 // And finally, if this is a non-free instruction that we are okay
4153 // speculating, ensure that we consider the speculation budget.
4154 Cost +=
4156 if (Cost > Budget)
4157 return false; // Eagerly refuse to fold as soon as we're out of budget.
4158 }
4159 assert(Cost <= Budget &&
4160 "When we run out of budget we will eagerly return from within the "
4161 "per-instruction loop.");
4162 return true;
4163 };
4164
4165 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4167 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4168 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4169 return false;
4170
4171 // If PostBB has more than two predecessors, we need to split it so we can
4172 // sink the store.
4173 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4174 // We know that QFB's only successor is PostBB. And QFB has a single
4175 // predecessor. If QTB exists, then its only successor is also PostBB.
4176 // If QTB does not exist, then QFB's only predecessor has a conditional
4177 // branch to QFB and PostBB.
4178 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4179 BasicBlock *NewBB =
4180 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4181 if (!NewBB)
4182 return false;
4183 PostBB = NewBB;
4184 }
4185
4186 // OK, we're going to sink the stores to PostBB. The store has to be
4187 // conditional though, so first create the predicate.
4188 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4189 ->getCondition();
4190 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4191 ->getCondition();
4192
4194 PStore->getParent());
4196 QStore->getParent(), PPHI);
4197
4198 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4199 IRBuilder<> QB(PostBB, PostBBFirst);
4200 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4201
4202 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4203 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4204
4205 if (InvertPCond)
4206 PPred = QB.CreateNot(PPred);
4207 if (InvertQCond)
4208 QPred = QB.CreateNot(QPred);
4209 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4210
4211 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4212 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4213 /*Unreachable=*/false,
4214 /*BranchWeights=*/nullptr, DTU);
4215
4216 QB.SetInsertPoint(T);
4217 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4218 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4219 // Choose the minimum alignment. If we could prove both stores execute, we
4220 // could use biggest one. In this case, though, we only know that one of the
4221 // stores executes. And we don't know it's safe to take the alignment from a
4222 // store that doesn't execute.
4223 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4224
4225 QStore->eraseFromParent();
4226 PStore->eraseFromParent();
4227
4228 return true;
4229}
4230
4232 DomTreeUpdater *DTU, const DataLayout &DL,
4233 const TargetTransformInfo &TTI) {
4234 // The intention here is to find diamonds or triangles (see below) where each
4235 // conditional block contains a store to the same address. Both of these
4236 // stores are conditional, so they can't be unconditionally sunk. But it may
4237 // be profitable to speculatively sink the stores into one merged store at the
4238 // end, and predicate the merged store on the union of the two conditions of
4239 // PBI and QBI.
4240 //
4241 // This can reduce the number of stores executed if both of the conditions are
4242 // true, and can allow the blocks to become small enough to be if-converted.
4243 // This optimization will also chain, so that ladders of test-and-set
4244 // sequences can be if-converted away.
4245 //
4246 // We only deal with simple diamonds or triangles:
4247 //
4248 // PBI or PBI or a combination of the two
4249 // / \ | \
4250 // PTB PFB | PFB
4251 // \ / | /
4252 // QBI QBI
4253 // / \ | \
4254 // QTB QFB | QFB
4255 // \ / | /
4256 // PostBB PostBB
4257 //
4258 // We model triangles as a type of diamond with a nullptr "true" block.
4259 // Triangles are canonicalized so that the fallthrough edge is represented by
4260 // a true condition, as in the diagram above.
4261 BasicBlock *PTB = PBI->getSuccessor(0);
4262 BasicBlock *PFB = PBI->getSuccessor(1);
4263 BasicBlock *QTB = QBI->getSuccessor(0);
4264 BasicBlock *QFB = QBI->getSuccessor(1);
4265 BasicBlock *PostBB = QFB->getSingleSuccessor();
4266
4267 // Make sure we have a good guess for PostBB. If QTB's only successor is
4268 // QFB, then QFB is a better PostBB.
4269 if (QTB->getSingleSuccessor() == QFB)
4270 PostBB = QFB;
4271
4272 // If we couldn't find a good PostBB, stop.
4273 if (!PostBB)
4274 return false;
4275
4276 bool InvertPCond = false, InvertQCond = false;
4277 // Canonicalize fallthroughs to the true branches.
4278 if (PFB == QBI->getParent()) {
4279 std::swap(PFB, PTB);
4280 InvertPCond = true;
4281 }
4282 if (QFB == PostBB) {
4283 std::swap(QFB, QTB);
4284 InvertQCond = true;
4285 }
4286
4287 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4288 // and QFB may not. Model fallthroughs as a nullptr block.
4289 if (PTB == QBI->getParent())
4290 PTB = nullptr;
4291 if (QTB == PostBB)
4292 QTB = nullptr;
4293
4294 // Legality bailouts. We must have at least the non-fallthrough blocks and
4295 // the post-dominating block, and the non-fallthroughs must only have one
4296 // predecessor.
4297 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4298 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4299 };
4300 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4301 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4302 return false;
4303 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4304 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4305 return false;
4306 if (!QBI->getParent()->hasNUses(2))
4307 return false;
4308
4309 // OK, this is a sequence of two diamonds or triangles.
4310 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4311 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4312 for (auto *BB : {PTB, PFB}) {
4313 if (!BB)
4314 continue;
4315 for (auto &I : *BB)
4316 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4317 PStoreAddresses.insert(SI->getPointerOperand());
4318 }
4319 for (auto *BB : {QTB, QFB}) {
4320 if (!BB)
4321 continue;
4322 for (auto &I : *BB)
4323 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4324 QStoreAddresses.insert(SI->getPointerOperand());
4325 }
4326
4327 set_intersect(PStoreAddresses, QStoreAddresses);
4328 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4329 // clear what it contains.
4330 auto &CommonAddresses = PStoreAddresses;
4331
4332 bool Changed = false;
4333 for (auto *Address : CommonAddresses)
4334 Changed |=
4335 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4336 InvertPCond, InvertQCond, DTU, DL, TTI);
4337 return Changed;
4338}
4339
4340/// If the previous block ended with a widenable branch, determine if reusing
4341/// the target block is profitable and legal. This will have the effect of
4342/// "widening" PBI, but doesn't require us to reason about hosting safety.
4344 DomTreeUpdater *DTU) {
4345 // TODO: This can be generalized in two important ways:
4346 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4347 // values from the PBI edge.
4348 // 2) We can sink side effecting instructions into BI's fallthrough
4349 // successor provided they doesn't contribute to computation of
4350 // BI's condition.
4351 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4352 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4353 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4355 return false;
4356 if (!IfFalseBB->phis().empty())
4357 return false; // TODO
4358 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4359 // may undo the transform done here.
4360 // TODO: There might be a more fine-grained solution to this.
4361 if (!llvm::succ_empty(IfFalseBB))
4362 return false;
4363 // Use lambda to lazily compute expensive condition after cheap ones.
4364 auto NoSideEffects = [](BasicBlock &BB) {
4365 return llvm::none_of(BB, [](const Instruction &I) {
4366 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4367 });
4368 };
4369 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4370 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4371 NoSideEffects(*BI->getParent())) {
4372 auto *OldSuccessor = BI->getSuccessor(1);
4373 OldSuccessor->removePredecessor(BI->getParent());
4374 BI->setSuccessor(1, IfFalseBB);
4375 if (DTU)
4376 DTU->applyUpdates(
4377 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4378 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4379 return true;
4380 }
4381 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4382 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4383 NoSideEffects(*BI->getParent())) {
4384 auto *OldSuccessor = BI->getSuccessor(0);
4385 OldSuccessor->removePredecessor(BI->getParent());
4386 BI->setSuccessor(0, IfFalseBB);
4387 if (DTU)
4388 DTU->applyUpdates(
4389 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4390 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4391 return true;
4392 }
4393 return false;
4394}
4395
4396/// If we have a conditional branch as a predecessor of another block,
4397/// this function tries to simplify it. We know
4398/// that PBI and BI are both conditional branches, and BI is in one of the
4399/// successor blocks of PBI - PBI branches to BI.
4401 DomTreeUpdater *DTU,
4402 const DataLayout &DL,
4403 const TargetTransformInfo &TTI) {
4404 assert(PBI->isConditional() && BI->isConditional());
4405 BasicBlock *BB = BI->getParent();
4406
4407 // If this block ends with a branch instruction, and if there is a
4408 // predecessor that ends on a branch of the same condition, make
4409 // this conditional branch redundant.
4410 if (PBI->getCondition() == BI->getCondition() &&
4411 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4412 // Okay, the outcome of this conditional branch is statically
4413 // knowable. If this block had a single pred, handle specially, otherwise
4414 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4415 if (BB->getSinglePredecessor()) {
4416 // Turn this into a branch on constant.
4417 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4418 BI->setCondition(
4419 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4420 return true; // Nuke the branch on constant.
4421 }
4422 }
4423
4424 // If the previous block ended with a widenable branch, determine if reusing
4425 // the target block is profitable and legal. This will have the effect of
4426 // "widening" PBI, but doesn't require us to reason about hosting safety.
4427 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4428 return true;
4429
4430 // If both branches are conditional and both contain stores to the same
4431 // address, remove the stores from the conditionals and create a conditional
4432 // merged store at the end.
4433 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4434 return true;
4435
4436 // If this is a conditional branch in an empty block, and if any
4437 // predecessors are a conditional branch to one of our destinations,
4438 // fold the conditions into logical ops and one cond br.
4439
4440 // Ignore dbg intrinsics.
4441 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4442 return false;
4443
4444 int PBIOp, BIOp;
4445 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4446 PBIOp = 0;
4447 BIOp = 0;
4448 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4449 PBIOp = 0;
4450 BIOp = 1;
4451 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4452 PBIOp = 1;
4453 BIOp = 0;
4454 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4455 PBIOp = 1;
4456 BIOp = 1;
4457 } else {
4458 return false;
4459 }
4460
4461 // Check to make sure that the other destination of this branch
4462 // isn't BB itself. If so, this is an infinite loop that will
4463 // keep getting unwound.
4464 if (PBI->getSuccessor(PBIOp) == BB)
4465 return false;
4466
4467 // If predecessor's branch probability to BB is too low don't merge branches.
4468 SmallVector<uint32_t, 2> PredWeights;
4469 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4470 extractBranchWeights(*PBI, PredWeights) &&
4471 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4472
4474 PredWeights[PBIOp],
4475 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4476
4478 if (CommonDestProb >= Likely)
4479 return false;
4480 }
4481
4482 // Do not perform this transformation if it would require
4483 // insertion of a large number of select instructions. For targets
4484 // without predication/cmovs, this is a big pessimization.
4485
4486 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4487 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4488 unsigned NumPhis = 0;
4489 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4490 ++II, ++NumPhis) {
4491 if (NumPhis > 2) // Disable this xform.
4492 return false;
4493 }
4494
4495 // Finally, if everything is ok, fold the branches to logical ops.
4496 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4497
4498 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4499 << "AND: " << *BI->getParent());
4500
4502
4503 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4504 // branch in it, where one edge (OtherDest) goes back to itself but the other
4505 // exits. We don't *know* that the program avoids the infinite loop
4506 // (even though that seems likely). If we do this xform naively, we'll end up
4507 // recursively unpeeling the loop. Since we know that (after the xform is
4508 // done) that the block *is* infinite if reached, we just make it an obviously
4509 // infinite loop with no cond branch.
4510 if (OtherDest == BB) {
4511 // Insert it at the end of the function, because it's either code,
4512 // or it won't matter if it's hot. :)
4513 BasicBlock *InfLoopBlock =
4514 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4515 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4516 if (DTU)
4517 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4518 OtherDest = InfLoopBlock;
4519 }
4520
4521 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4522
4523 // BI may have other predecessors. Because of this, we leave
4524 // it alone, but modify PBI.
4525
4526 // Make sure we get to CommonDest on True&True directions.
4527 Value *PBICond = PBI->getCondition();
4528 IRBuilder<NoFolder> Builder(PBI);
4529 if (PBIOp)
4530 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4531
4532 Value *BICond = BI->getCondition();
4533 if (BIOp)
4534 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4535
4536 // Merge the conditions.
4537 Value *Cond =
4538 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4539
4540 // Modify PBI to branch on the new condition to the new dests.
4541 PBI->setCondition(Cond);
4542 PBI->setSuccessor(0, CommonDest);
4543 PBI->setSuccessor(1, OtherDest);
4544
4545 if (DTU) {
4546 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4547 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4548
4549 DTU->applyUpdates(Updates);
4550 }
4551
4552 // Update branch weight for PBI.
4553 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4554 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4555 bool HasWeights =
4556 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4557 SuccTrueWeight, SuccFalseWeight);
4558 if (HasWeights) {
4559 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4560 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4561 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4562 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4563 // The weight to CommonDest should be PredCommon * SuccTotal +
4564 // PredOther * SuccCommon.
4565 // The weight to OtherDest should be PredOther * SuccOther.
4566 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4567 PredOther * SuccCommon,
4568 PredOther * SuccOther};
4569 // Halve the weights if any of them cannot fit in an uint32_t
4570 FitWeights(NewWeights);
4571
4572 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4573 }
4574
4575 // OtherDest may have phi nodes. If so, add an entry from PBI's
4576 // block that are identical to the entries for BI's block.
4577 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4578
4579 // We know that the CommonDest already had an edge from PBI to
4580 // it. If it has PHIs though, the PHIs may have different
4581 // entries for BB and PBI's BB. If so, insert a select to make
4582 // them agree.
4583 for (PHINode &PN : CommonDest->phis()) {
4584 Value *BIV = PN.getIncomingValueForBlock(BB);
4585 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4586 Value *PBIV = PN.getIncomingValue(PBBIdx);
4587 if (BIV != PBIV) {
4588 // Insert a select in PBI to pick the right value.
4589 SelectInst *NV = cast<SelectInst>(
4590 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4591 PN.setIncomingValue(PBBIdx, NV);
4592 // Although the select has the same condition as PBI, the original branch
4593 // weights for PBI do not apply to the new select because the select's
4594 // 'logical' edges are incoming edges of the phi that is eliminated, not
4595 // the outgoing edges of PBI.
4596 if (HasWeights) {
4597 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4598 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4599 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4600 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4601 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4602 // The weight to PredOtherDest should be PredOther * SuccCommon.
4603 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4604 PredOther * SuccCommon};
4605
4606 FitWeights(NewWeights);
4607
4608 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4609 }
4610 }
4611 }
4612
4613 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4614 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4615
4616 // This basic block is probably dead. We know it has at least
4617 // one fewer predecessor.
4618 return true;
4619}
4620
4621// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4622// true or to FalseBB if Cond is false.
4623// Takes care of updating the successors and removing the old terminator.
4624// Also makes sure not to introduce new successors by assuming that edges to
4625// non-successor TrueBBs and FalseBBs aren't reachable.
4626bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4627 Value *Cond, BasicBlock *TrueBB,
4628 BasicBlock *FalseBB,
4629 uint32_t TrueWeight,
4630 uint32_t FalseWeight) {
4631 auto *BB = OldTerm->getParent();
4632 // Remove any superfluous successor edges from the CFG.
4633 // First, figure out which successors to preserve.
4634 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4635 // successor.
4636 BasicBlock *KeepEdge1 = TrueBB;
4637 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4638
4639 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4640
4641 // Then remove the rest.
4642 for (BasicBlock *Succ : successors(OldTerm)) {
4643 // Make sure only to keep exactly one copy of each edge.
4644 if (Succ == KeepEdge1)
4645 KeepEdge1 = nullptr;
4646 else if (Succ == KeepEdge2)
4647 KeepEdge2 = nullptr;
4648 else {
4649 Succ->removePredecessor(BB,
4650 /*KeepOneInputPHIs=*/true);
4651
4652 if (Succ != TrueBB && Succ != FalseBB)
4653 RemovedSuccessors.insert(Succ);
4654 }
4655 }
4656
4657 IRBuilder<> Builder(OldTerm);
4658 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4659
4660 // Insert an appropriate new terminator.
4661 if (!KeepEdge1 && !KeepEdge2) {
4662 if (TrueBB == FalseBB) {
4663 // We were only looking for one successor, and it was present.
4664 // Create an unconditional branch to it.
4665 Builder.CreateBr(TrueBB);
4666 } else {
4667 // We found both of the successors we were looking for.
4668 // Create a conditional branch sharing the condition of the select.
4669 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4670 if (TrueWeight != FalseWeight)
4671 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4672 }
4673 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4674 // Neither of the selected blocks were successors, so this
4675 // terminator must be unreachable.
4676 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4677 } else {
4678 // One of the selected values was a successor, but the other wasn't.
4679 // Insert an unconditional branch to the one that was found;
4680 // the edge to the one that wasn't must be unreachable.
4681 if (!KeepEdge1) {
4682 // Only TrueBB was found.
4683 Builder.CreateBr(TrueBB);
4684 } else {
4685 // Only FalseBB was found.
4686 Builder.CreateBr(FalseBB);
4687 }
4688 }
4689
4691
4692 if (DTU) {
4694 Updates.reserve(RemovedSuccessors.size());
4695 for (auto *RemovedSuccessor : RemovedSuccessors)
4696 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4697 DTU->applyUpdates(Updates);
4698 }
4699
4700 return true;
4701}
4702
4703// Replaces
4704// (switch (select cond, X, Y)) on constant X, Y
4705// with a branch - conditional if X and Y lead to distinct BBs,
4706// unconditional otherwise.
4707bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4708 SelectInst *Select) {
4709 // Check for constant integer values in the select.
4710 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4711 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4712 if (!TrueVal || !FalseVal)
4713 return false;
4714
4715 // Find the relevant condition and destinations.
4716 Value *Condition = Select->getCondition();
4717 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4718 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4719
4720 // Get weight for TrueBB and FalseBB.
4721 uint32_t TrueWeight = 0, FalseWeight = 0;
4723 bool HasWeights = hasBranchWeightMD(*SI);
4724 if (HasWeights) {
4725 GetBranchWeights(SI, Weights);
4726 if (Weights.size() == 1 + SI->getNumCases()) {
4727 TrueWeight =
4728 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4729 FalseWeight =
4730 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4731 }
4732 }
4733
4734 // Perform the actual simplification.
4735 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4736 FalseWeight);
4737}
4738
4739// Replaces
4740// (indirectbr (select cond, blockaddress(@fn, BlockA),
4741// blockaddress(@fn, BlockB)))
4742// with
4743// (br cond, BlockA, BlockB).
4744bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4745 SelectInst *SI) {
4746 // Check that both operands of the select are block addresses.
4747 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4748 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4749 if (!TBA || !FBA)
4750 return false;
4751
4752 // Extract the actual blocks.
4753 BasicBlock *TrueBB = TBA->getBasicBlock();
4754 BasicBlock *FalseBB = FBA->getBasicBlock();
4755
4756 // Perform the actual simplification.
4757 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4758 0);
4759}
4760
4761/// This is called when we find an icmp instruction
4762/// (a seteq/setne with a constant) as the only instruction in a
4763/// block that ends with an uncond branch. We are looking for a very specific
4764/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4765/// this case, we merge the first two "or's of icmp" into a switch, but then the
4766/// default value goes to an uncond block with a seteq in it, we get something
4767/// like:
4768///
4769/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4770/// DEFAULT:
4771/// %tmp = icmp eq i8 %A, 92
4772/// br label %end
4773/// end:
4774/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4775///
4776/// We prefer to split the edge to 'end' so that there is a true/false entry to
4777/// the PHI, merging the third icmp into the switch.
4778bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4779 ICmpInst *ICI, IRBuilder<> &Builder) {
4780 BasicBlock *BB = ICI->getParent();
4781
4782 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4783 // complex.
4784 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4785 return false;
4786
4787 Value *V = ICI->getOperand(0);
4788 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4789
4790 // The pattern we're looking for is where our only predecessor is a switch on
4791 // 'V' and this block is the default case for the switch. In this case we can
4792 // fold the compared value into the switch to simplify things.
4793 BasicBlock *Pred = BB->getSinglePredecessor();
4794 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4795 return false;
4796
4797 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4798 if (SI->getCondition() != V)
4799 return false;
4800
4801 // If BB is reachable on a non-default case, then we simply know the value of
4802 // V in this block. Substitute it and constant fold the icmp instruction
4803 // away.
4804 if (SI->getDefaultDest() != BB) {
4805 ConstantInt *VVal = SI->findCaseDest(BB);
4806 assert(VVal && "Should have a unique destination value");
4807 ICI->setOperand(0, VVal);
4808
4809 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4810 ICI->replaceAllUsesWith(V);
4811 ICI->eraseFromParent();
4812 }
4813 // BB is now empty, so it is likely to simplify away.
4814 return requestResimplify();
4815 }
4816
4817 // Ok, the block is reachable from the default dest. If the constant we're
4818 // comparing exists in one of the other edges, then we can constant fold ICI
4819 // and zap it.
4820 if (SI->findCaseValue(Cst) != SI->case_default()) {
4821 Value *V;
4822 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4824 else
4826
4827 ICI->replaceAllUsesWith(V);
4828 ICI->eraseFromParent();
4829 // BB is now empty, so it is likely to simplify away.
4830 return requestResimplify();
4831 }
4832
4833 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4834 // the block.
4835 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4836 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4837 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4838 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4839 return false;
4840
4841 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4842 // true in the PHI.
4843 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4844 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4845
4846 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4847 std::swap(DefaultCst, NewCst);
4848
4849 // Replace ICI (which is used by the PHI for the default value) with true or
4850 // false depending on if it is EQ or NE.
4851 ICI->replaceAllUsesWith(DefaultCst);
4852 ICI->eraseFromParent();
4853
4855
4856 // Okay, the switch goes to this block on a default value. Add an edge from
4857 // the switch to the merge point on the compared value.
4858 BasicBlock *NewBB =
4859 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4860 {
4862 auto W0 = SIW.getSuccessorWeight(0);
4864 if (W0) {
4865 NewW = ((uint64_t(*W0) + 1) >> 1);
4866 SIW.setSuccessorWeight(0, *NewW);
4867 }
4868 SIW.addCase(Cst, NewBB, NewW);
4869 if (DTU)
4870 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4871 }
4872
4873 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4874 Builder.SetInsertPoint(NewBB);
4875 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4876 Builder.CreateBr(SuccBlock);
4877 PHIUse->addIncoming(NewCst, NewBB);
4878 if (DTU) {
4879 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4880 DTU->applyUpdates(Updates);
4881 }
4882 return true;
4883}
4884
4885/// The specified branch is a conditional branch.
4886/// Check to see if it is branching on an or/and chain of icmp instructions, and
4887/// fold it into a switch instruction if so.
4888bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4889 IRBuilder<> &Builder,
4890 const DataLayout &DL) {
4891 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4892 if (!Cond)
4893 return false;
4894
4895 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4896 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4897 // 'setne's and'ed together, collect them.
4898
4899 // Try to gather values from a chain of and/or to be turned into a switch
4900 ConstantComparesGatherer ConstantCompare(Cond, DL);
4901 // Unpack the result
4902 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4903 Value *CompVal = ConstantCompare.CompValue;
4904 unsigned UsedICmps = ConstantCompare.UsedICmps;
4905 Value *ExtraCase = ConstantCompare.Extra;
4906
4907 // If we didn't have a multiply compared value, fail.
4908 if (!CompVal)
4909 return false;
4910
4911 // Avoid turning single icmps into a switch.
4912 if (UsedICmps <= 1)
4913 return false;
4914
4915 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4916
4917 // There might be duplicate constants in the list, which the switch
4918 // instruction can't handle, remove them now.
4919 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4920 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4921
4922 // If Extra was used, we require at least two switch values to do the
4923 // transformation. A switch with one value is just a conditional branch.
4924 if (ExtraCase && Values.size() < 2)
4925 return false;
4926
4927 // TODO: Preserve branch weight metadata, similarly to how
4928 // FoldValueComparisonIntoPredecessors preserves it.
4929
4930 // Figure out which block is which destination.
4931 BasicBlock *DefaultBB = BI->getSuccessor(1);
4932 BasicBlock *EdgeBB = BI->getSuccessor(0);
4933 if (!TrueWhenEqual)
4934 std::swap(DefaultBB, EdgeBB);
4935
4936 BasicBlock *BB = BI->getParent();
4937
4938 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4939 << " cases into SWITCH. BB is:\n"
4940 << *BB);
4941
4943
4944 // If there are any extra values that couldn't be folded into the switch
4945 // then we evaluate them with an explicit branch first. Split the block
4946 // right before the condbr to handle it.
4947 if (ExtraCase) {
4948 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4949 /*MSSAU=*/nullptr, "switch.early.test");
4950
4951 // Remove the uncond branch added to the old block.
4952 Instruction *OldTI = BB->getTerminator();
4953 Builder.SetInsertPoint(OldTI);
4954
4955 // There can be an unintended UB if extra values are Poison. Before the
4956 // transformation, extra values may not be evaluated according to the
4957 // condition, and it will not raise UB. But after transformation, we are
4958 // evaluating extra values before checking the condition, and it will raise
4959 // UB. It can be solved by adding freeze instruction to extra values.
4960 AssumptionCache *AC = Options.AC;
4961
4962 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4963 ExtraCase = Builder.CreateFreeze(ExtraCase);
4964
4965 if (TrueWhenEqual)
4966 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4967 else
4968 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4969
4970 OldTI->eraseFromParent();
4971
4972 if (DTU)
4973 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4974
4975 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4976 // for the edge we just added.
4977 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4978
4979 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4980 << "\nEXTRABB = " << *BB);
4981 BB = NewBB;
4982 }
4983
4984 Builder.SetInsertPoint(BI);
4985 // Convert pointer to int before we switch.
4986 if (CompVal->getType()->isPointerTy()) {
4987 CompVal = Builder.CreatePtrToInt(
4988 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4989 }
4990
4991 // Create the new switch instruction now.
4992 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4993
4994 // Add all of the 'cases' to the switch instruction.
4995 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4996 New->addCase(Values[i], EdgeBB);
4997
4998 // We added edges from PI to the EdgeBB. As such, if there were any
4999 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5000 // the number of edges added.
5001 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5002 PHINode *PN = cast<PHINode>(BBI);
5003 Value *InVal = PN->getIncomingValueForBlock(BB);
5004 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5005 PN->addIncoming(InVal, BB);
5006 }
5007
5008 // Erase the old branch instruction.
5010 if (DTU)
5011 DTU->applyUpdates(Updates);
5012
5013 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5014 return true;
5015}
5016
5017bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5018 if (isa<PHINode>(RI->getValue()))
5019 return simplifyCommonResume(RI);
5020 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5021 RI->getValue() == RI->getParent()->getFirstNonPHI())
5022 // The resume must unwind the exception that caused control to branch here.
5023 return simplifySingleResume(RI);
5024
5025 return false;
5026}
5027
5028// Check if cleanup block is empty
5030 for (Instruction &I : R) {
5031 auto *II = dyn_cast<IntrinsicInst>(&I);
5032 if (!II)
5033 return false;
5034
5035 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5036 switch (IntrinsicID) {
5037 case Intrinsic::dbg_declare:
5038 case Intrinsic::dbg_value:
5039 case Intrinsic::dbg_label:
5040 case Intrinsic::lifetime_end:
5041 break;
5042 default:
5043 return false;
5044 }
5045 }
5046 return true;
5047}
5048
5049// Simplify resume that is shared by several landing pads (phi of landing pad).
5050bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5051 BasicBlock *BB = RI->getParent();
5052
5053 // Check that there are no other instructions except for debug and lifetime
5054 // intrinsics between the phi's and resume instruction.
5057 return false;
5058
5059 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5060 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5061
5062 // Check incoming blocks to see if any of them are trivial.
5063 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5064 Idx++) {
5065 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5066 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5067
5068 // If the block has other successors, we can not delete it because
5069 // it has other dependents.
5070 if (IncomingBB->getUniqueSuccessor() != BB)
5071 continue;
5072
5073 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5074 // Not the landing pad that caused the control to branch here.
5075 if (IncomingValue != LandingPad)
5076 continue;
5077
5079 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5080 TrivialUnwindBlocks.insert(IncomingBB);
5081 }
5082
5083 // If no trivial unwind blocks, don't do any simplifications.
5084 if (TrivialUnwindBlocks.empty())
5085 return false;
5086
5087 // Turn all invokes that unwind here into calls.
5088 for (auto *TrivialBB : TrivialUnwindBlocks) {
5089 // Blocks that will be simplified should be removed from the phi node.
5090 // Note there could be multiple edges to the resume block, and we need
5091 // to remove them all.
5092 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5093 BB->removePredecessor(TrivialBB, true);
5094
5095 for (BasicBlock *Pred :
5097 removeUnwindEdge(Pred, DTU);
5098 ++NumInvokes;
5099 }
5100
5101 // In each SimplifyCFG run, only the current processed block can be erased.
5102 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5103 // of erasing TrivialBB, we only remove the branch to the common resume
5104 // block so that we can later erase the resume block since it has no
5105 // predecessors.
5106 TrivialBB->getTerminator()->eraseFromParent();
5107 new UnreachableInst(RI->getContext(), TrivialBB);
5108 if (DTU)
5109 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5110 }
5111
5112 // Delete the resume block if all its predecessors have been removed.
5113 if (pred_empty(BB))
5114 DeleteDeadBlock(BB, DTU);
5115
5116 return !TrivialUnwindBlocks.empty();
5117}
5118
5119// Simplify resume that is only used by a single (non-phi) landing pad.
5120bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5121 BasicBlock *BB = RI->getParent();
5122 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5123 assert(RI->getValue() == LPInst &&
5124 "Resume must unwind the exception that caused control to here");
5125
5126 // Check that there are no other instructions except for debug intrinsics.
5128 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5129 return false;
5130
5131 // Turn all invokes that unwind here into calls and delete the basic block.
5133 removeUnwindEdge(Pred, DTU);
5134 ++NumInvokes;
5135 }
5136
5137 // The landingpad is now unreachable. Zap it.
5138 DeleteDeadBlock(BB, DTU);
5139 return true;
5140}
5141
5143 // If this is a trivial cleanup pad that executes no instructions, it can be
5144 // eliminated. If the cleanup pad continues to the caller, any predecessor
5145 // that is an EH pad will be updated to continue to the caller and any
5146 // predecessor that terminates with an invoke instruction will have its invoke
5147 // instruction converted to a call instruction. If the cleanup pad being
5148 // simplified does not continue to the caller, each predecessor will be
5149 // updated to continue to the unwind destination of the cleanup pad being
5150 // simplified.
5151 BasicBlock *BB = RI->getParent();
5152 CleanupPadInst *CPInst = RI->getCleanupPad();
5153 if (CPInst->getParent() != BB)
5154 // This isn't an empty cleanup.
5155 return false;
5156
5157 // We cannot kill the pad if it has multiple uses. This typically arises
5158 // from unreachable basic blocks.
5159 if (!CPInst->hasOneUse())
5160 return false;
5161
5162 // Check that there are no other instructions except for benign intrinsics.
5164 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5165 return false;
5166
5167 // If the cleanup return we are simplifying unwinds to the caller, this will
5168 // set UnwindDest to nullptr.
5169 BasicBlock *UnwindDest = RI->getUnwindDest();
5170 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5171
5172 // We're about to remove BB from the control flow. Before we do, sink any
5173 // PHINodes into the unwind destination. Doing this before changing the
5174 // control flow avoids some potentially slow checks, since we can currently
5175 // be certain that UnwindDest and BB have no common predecessors (since they
5176 // are both EH pads).
5177 if (UnwindDest) {
5178 // First, go through the PHI nodes in UnwindDest and update any nodes that
5179 // reference the block we are removing
5180 for (PHINode &DestPN : UnwindDest->phis()) {
5181 int Idx = DestPN.getBasicBlockIndex(BB);
5182 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5183 assert(Idx != -1);
5184 // This PHI node has an incoming value that corresponds to a control
5185 // path through the cleanup pad we are removing. If the incoming
5186 // value is in the cleanup pad, it must be a PHINode (because we
5187 // verified above that the block is otherwise empty). Otherwise, the
5188 // value is either a constant or a value that dominates the cleanup
5189 // pad being removed.
5190 //
5191 // Because BB and UnwindDest are both EH pads, all of their
5192 // predecessors must unwind to these blocks, and since no instruction
5193 // can have multiple unwind destinations, there will be no overlap in
5194 // incoming blocks between SrcPN and DestPN.
5195 Value *SrcVal = DestPN.getIncomingValue(Idx);
5196 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5197
5198 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5199 for (auto *Pred : predecessors(BB)) {
5200 Value *Incoming =
5201 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5202 DestPN.addIncoming(Incoming, Pred);
5203 }
5204 }
5205
5206 // Sink any remaining PHI nodes directly into UnwindDest.
5207 Instruction *InsertPt = DestEHPad;
5208 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5209 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5210 // If the PHI node has no uses or all of its uses are in this basic
5211 // block (meaning they are debug or lifetime intrinsics), just leave
5212 // it. It will be erased when we erase BB below.
5213 continue;
5214
5215 // Otherwise, sink this PHI node into UnwindDest.
5216 // Any predecessors to UnwindDest which are not already represented
5217 // must be back edges which inherit the value from the path through
5218 // BB. In this case, the PHI value must reference itself.
5219 for (auto *pred : predecessors(UnwindDest))
5220 if (pred != BB)
5221 PN.addIncoming(&PN, pred);
5222 PN.moveBefore(InsertPt);
5223 // Also, add a dummy incoming value for the original BB itself,
5224 // so that the PHI is well-formed until we drop said predecessor.
5225 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5226 }
5227 }
5228
5229 std::vector<DominatorTree::UpdateType> Updates;
5230
5231 // We use make_early_inc_range here because we will remove all predecessors.
5233 if (UnwindDest == nullptr) {
5234 if (DTU) {
5235 DTU->applyUpdates(Updates);
5236 Updates.clear();
5237 }
5238 removeUnwindEdge(PredBB, DTU);
5239 ++NumInvokes;
5240 } else {
5241 BB->removePredecessor(PredBB);
5242 Instruction *TI = PredBB->getTerminator();
5243 TI->replaceUsesOfWith(BB, UnwindDest);
5244 if (DTU) {
5245 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5246 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5247 }
5248 }
5249 }
5250
5251 if (DTU)
5252 DTU->applyUpdates(Updates);
5253
5254 DeleteDeadBlock(BB, DTU);
5255
5256 return true;
5257}
5258
5259// Try to merge two cleanuppads together.
5261 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5262 // with.
5263 BasicBlock *UnwindDest = RI->getUnwindDest();
5264 if (!UnwindDest)
5265 return false;
5266
5267 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5268 // be safe to merge without code duplication.
5269 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5270 return false;
5271
5272 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5273 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5274 if (!SuccessorCleanupPad)
5275 return false;
5276
5277 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5278 // Replace any uses of the successor cleanupad with the predecessor pad
5279 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5280 // funclet bundle operands.
5281 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5282 // Remove the old cleanuppad.
5283 SuccessorCleanupPad->eraseFromParent();
5284 // Now, we simply replace the cleanupret with a branch to the unwind
5285 // destination.
5286 BranchInst::Create(UnwindDest, RI->getParent());
5287 RI->eraseFromParent();
5288
5289 return true;
5290}
5291
5292bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5293 // It is possible to transiantly have an undef cleanuppad operand because we
5294 // have deleted some, but not all, dead blocks.
5295 // Eventually, this block will be deleted.
5296 if (isa<UndefValue>(RI->getOperand(0)))
5297 return false;
5298
5299 if (mergeCleanupPad(RI))
5300 return true;
5301
5302 if (removeEmptyCleanup(RI, DTU))
5303 return true;
5304
5305 return false;
5306}
5307
5308// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5309bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5310 BasicBlock *BB = UI->getParent();
5311
5312 bool Changed = false;
5313
5314 // Ensure that any debug-info records that used to occur after the Unreachable
5315 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5316 // the block.
5318
5319 // Debug-info records on the unreachable inst itself should be deleted, as
5320 // below we delete everything past the final executable instruction.
5321 UI->dropDbgRecords();
5322
5323 // If there are any instructions immediately before the unreachable that can
5324 // be removed, do so.
5325 while (UI->getIterator() != BB->begin()) {
5327 --BBI;
5328
5330 break; // Can not drop any more instructions. We're done here.
5331 // Otherwise, this instruction can be freely erased,
5332 // even if it is not side-effect free.
5333
5334 // Note that deleting EH's here is in fact okay, although it involves a bit
5335 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5336 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5337 // and we can therefore guarantee this block will be erased.
5338
5339 // If we're deleting this, we're deleting any subsequent debug info, so
5340 // delete DbgRecords.
5341 BBI->dropDbgRecords();
5342
5343 // Delete this instruction (any uses are guaranteed to be dead)
5344 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5345 BBI->eraseFromParent();
5346 Changed = true;
5347 }
5348
5349 // If the unreachable instruction is the first in the block, take a gander
5350 // at all of the predecessors of this instruction, and simplify them.
5351 if (&BB->front() != UI)
5352 return Changed;
5353
5354 std::vector<DominatorTree::UpdateType> Updates;
5355
5357 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5358 auto *Predecessor = Preds[i];
5359 Instruction *TI = Predecessor->getTerminator();
5360 IRBuilder<> Builder(TI);
5361 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5362 // We could either have a proper unconditional branch,
5363 // or a degenerate conditional branch with matching destinations.
5364 if (all_of(BI->successors(),
5365 [BB](auto *Successor) { return Successor == BB; })) {
5366 new UnreachableInst(TI->getContext(), TI->getIterator());
5367 TI->eraseFromParent();
5368 Changed = true;
5369 } else {
5370 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5371 Value* Cond = BI->getCondition();
5372 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5373 "The destinations are guaranteed to be different here.");
5374 CallInst *Assumption;
5375 if (BI->getSuccessor(0) == BB) {
5376 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5377 Builder.CreateBr(BI->getSuccessor(1));
5378 } else {
5379 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5380 Assumption = Builder.CreateAssumption(Cond);
5381 Builder.CreateBr(BI->getSuccessor(0));
5382 }
5383 if (Options.AC)
5384 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5385
5387 Changed = true;
5388 }
5389 if (DTU)
5390 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5391 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5393 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5394 if (i->getCaseSuccessor() != BB) {
5395 ++i;
5396 continue;
5397 }
5398 BB->removePredecessor(SU->getParent());
5399 i = SU.removeCase(i);
5400 e = SU->case_end();
5401 Changed = true;
5402 }
5403 // Note that the default destination can't be removed!
5404 if (DTU && SI->getDefaultDest() != BB)
5405 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5406 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5407 if (II->getUnwindDest() == BB) {
5408 if (DTU) {
5409 DTU->applyUpdates(Updates);
5410 Updates.clear();
5411 }
5412 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5413 if (!CI->doesNotThrow())
5414 CI->setDoesNotThrow();
5415 Changed = true;
5416 }
5417 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5418 if (CSI->getUnwindDest() == BB) {
5419 if (DTU) {
5420 DTU->applyUpdates(Updates);
5421 Updates.clear();
5422 }
5423 removeUnwindEdge(TI->getParent(), DTU);
5424 Changed = true;
5425 continue;
5426 }
5427
5428 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5429 E = CSI->handler_end();
5430 I != E; ++I) {
5431 if (*I == BB) {
5432 CSI->removeHandler(I);
5433 --I;
5434 --E;
5435 Changed = true;
5436 }
5437 }
5438 if (DTU)
5439 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5440 if (CSI->getNumHandlers() == 0) {
5441 if (CSI->hasUnwindDest()) {
5442 // Redirect all predecessors of the block containing CatchSwitchInst
5443 // to instead branch to the CatchSwitchInst's unwind destination.
5444 if (DTU) {
5445 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5446 Updates.push_back({DominatorTree::Insert,
5447 PredecessorOfPredecessor,
5448 CSI->getUnwindDest()});
5449 Updates.push_back({DominatorTree::Delete,
5450 PredecessorOfPredecessor, Predecessor});
5451 }
5452 }
5453 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5454 } else {
5455 // Rewrite all preds to unwind to caller (or from invoke to call).
5456 if (DTU) {
5457 DTU->applyUpdates(Updates);
5458 Updates.clear();
5459 }
5460 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5461 for (BasicBlock *EHPred : EHPreds)
5462 removeUnwindEdge(EHPred, DTU);
5463 }
5464 // The catchswitch is no longer reachable.
5465 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5466 CSI->eraseFromParent();
5467 Changed = true;
5468 }
5469 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5470 (void)CRI;
5471 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5472 "Expected to always have an unwind to BB.");
5473 if (DTU)
5474 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5475 new UnreachableInst(TI->getContext(), TI->getIterator());
5476 TI->eraseFromParent();
5477 Changed = true;
5478 }
5479 }
5480
5481 if (DTU)
5482 DTU->applyUpdates(Updates);
5483
5484 // If this block is now dead, remove it.
5485 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5486 DeleteDeadBlock(BB, DTU);
5487 return true;
5488 }
5489
5490 return Changed;
5491}
5492
5494 assert(Cases.size() >= 1);
5495
5497 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5498 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5499 return false;
5500 }
5501 return true;
5502}
5503
5505 DomTreeUpdater *DTU) {
5506 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5507 auto *BB = Switch->getParent();
5508 auto *OrigDefaultBlock = Switch->getDefaultDest();
5509 OrigDefaultBlock->removePredecessor(BB);
5510 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5511 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5512 OrigDefaultBlock);
5513 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5514 Switch->setDefaultDest(&*NewDefaultBlock);
5515 if (DTU) {
5517 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5518 if (!is_contained(successors(BB), OrigDefaultBlock))
5519 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5520 DTU->applyUpdates(Updates);
5521 }
5522}
5523
5524/// Turn a switch into an integer range comparison and branch.
5525/// Switches with more than 2 destinations are ignored.
5526/// Switches with 1 destination are also ignored.
5527bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5528 IRBuilder<> &Builder) {
5529 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5530
5531 bool HasDefault =
5532 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5533
5534 auto *BB = SI->getParent();
5535
5536 // Partition the cases into two sets with different destinations.
5537 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5538 BasicBlock *DestB = nullptr;
5541
5542 for (auto Case : SI->cases()) {
5543 BasicBlock *Dest = Case.getCaseSuccessor();
5544 if (!DestA)
5545 DestA = Dest;
5546 if (Dest == DestA) {
5547 CasesA.push_back(Case.getCaseValue());
5548 continue;
5549 }
5550 if (!DestB)
5551 DestB = Dest;
5552 if (Dest == DestB) {
5553 CasesB.push_back(Case.getCaseValue());
5554 continue;
5555 }
5556 return false; // More than two destinations.
5557 }
5558 if (!DestB)
5559 return false; // All destinations are the same and the default is unreachable
5560
5561 assert(DestA && DestB &&
5562 "Single-destination switch should have been folded.");
5563 assert(DestA != DestB);
5564 assert(DestB != SI->getDefaultDest());
5565 assert(!CasesB.empty() && "There must be non-default cases.");
5566 assert(!CasesA.empty() || HasDefault);
5567
5568 // Figure out if one of the sets of cases form a contiguous range.
5569 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5570 BasicBlock *ContiguousDest = nullptr;
5571 BasicBlock *OtherDest = nullptr;
5572 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5573 ContiguousCases = &CasesA;
5574 ContiguousDest = DestA;
5575 OtherDest = DestB;
5576 } else if (CasesAreContiguous(CasesB)) {
5577 ContiguousCases = &CasesB;
5578 ContiguousDest = DestB;
5579 OtherDest = DestA;
5580 } else
5581 return false;
5582
5583 // Start building the compare and branch.
5584
5585 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5586 Constant *NumCases =
5587 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5588
5589 Value *Sub = SI->getCondition();
5590 if (!Offset->isNullValue())
5591 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5592
5593 Value *Cmp;
5594 // If NumCases overflowed, then all possible values jump to the successor.
5595 if (NumCases->isNullValue() && !ContiguousCases->empty())
5596 Cmp = ConstantInt::getTrue(SI->getContext());
5597 else
5598 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5599 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5600
5601 // Update weight for the newly-created conditional branch.
5602 if (hasBranchWeightMD(*SI)) {
5604 GetBranchWeights(SI, Weights);
5605 if (Weights.size() == 1 + SI->getNumCases()) {
5606 uint64_t TrueWeight = 0;
5607 uint64_t FalseWeight = 0;
5608 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5609 if (SI->getSuccessor(I) == ContiguousDest)
5610 TrueWeight += Weights[I];
5611 else
5612 FalseWeight += Weights[I];
5613 }
5614 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5615 TrueWeight /= 2;
5616 FalseWeight /= 2;
5617 }
5618 setBranchWeights(NewBI, TrueWeight, FalseWeight);
5619 }
5620 }
5621
5622 // Prune obsolete incoming values off the successors' PHI nodes.
5623 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5624 unsigned PreviousEdges = ContiguousCases->size();
5625 if (ContiguousDest == SI->getDefaultDest())
5626 ++PreviousEdges;
5627 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5628 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5629 }
5630 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5631 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5632 if (OtherDest == SI->getDefaultDest())
5633 ++PreviousEdges;
5634 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5635 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5636 }
5637
5638 // Clean up the default block - it may have phis or other instructions before
5639 // the unreachable terminator.
5640 if (!HasDefault)
5642
5643 auto *UnreachableDefault = SI->getDefaultDest();
5644
5645 // Drop the switch.
5646 SI->eraseFromParent();
5647
5648 if (!HasDefault && DTU)
5649 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5650
5651 return true;
5652}
5653
5654/// Compute masked bits for the condition of a switch
5655/// and use it to remove dead cases.
5657 AssumptionCache *AC,
5658 const DataLayout &DL) {
5659 Value *Cond = SI->getCondition();
5660 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5661
5662 // We can also eliminate cases by determining that their values are outside of
5663 // the limited range of the condition based on how many significant (non-sign)
5664 // bits are in the condition value.
5665 unsigned MaxSignificantBitsInCond =
5666 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5667
5668 // Gather dead cases.
5670 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5671 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5672 for (const auto &Case : SI->cases()) {
5673 auto *Successor = Case.getCaseSuccessor();
5674 if (DTU) {
5675 if (!NumPerSuccessorCases.count(Successor))
5676 UniqueSuccessors.push_back(Successor);
5677 ++NumPerSuccessorCases[Successor];
5678 }
5679 const APInt &CaseVal = Case.getCaseValue()->getValue();
5680 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5681 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5682 DeadCases.push_back(Case.getCaseValue());
5683 if (DTU)
5684 --NumPerSuccessorCases[Successor];
5685 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5686 << " is dead.\n");
5687 }
5688 }
5689
5690 // If we can prove that the cases must cover all possible values, the
5691 // default destination becomes dead and we can remove it. If we know some
5692 // of the bits in the value, we can use that to more precisely compute the
5693 // number of possible unique case values.
5694 bool HasDefault =
5695 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5696 const unsigned NumUnknownBits =
5697 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5698 assert(NumUnknownBits <= Known.getBitWidth());
5699 if (HasDefault && DeadCases.empty() &&
5700 NumUnknownBits < 64 /* avoid overflow */ &&
5701 SI->getNumCases() == (1ULL << NumUnknownBits)) {
5703 return true;
5704 }
5705
5706 if (DeadCases.empty())
5707 return false;
5708
5710 for (ConstantInt *DeadCase : DeadCases) {
5711 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5712 assert(CaseI != SI->case_default() &&
5713 "Case was not found. Probably mistake in DeadCases forming.");
5714 // Prune unused values from PHI nodes.
5715 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5716 SIW.removeCase(CaseI);
5717 }
5718
5719 if (DTU) {
5720 std::vector<DominatorTree::UpdateType> Updates;
5721 for (auto *Successor : UniqueSuccessors)
5722 if (NumPerSuccessorCases[Successor] == 0)
5723 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5724 DTU->applyUpdates(Updates);
5725 }
5726
5727 return true;
5728}
5729
5730/// If BB would be eligible for simplification by
5731/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5732/// by an unconditional branch), look at the phi node for BB in the successor
5733/// block and see if the incoming value is equal to CaseValue. If so, return
5734/// the phi node, and set PhiIndex to BB's index in the phi node.
5736 BasicBlock *BB, int *PhiIndex) {
5737 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5738 return nullptr; // BB must be empty to be a candidate for simplification.
5739 if (!BB->getSinglePredecessor())
5740 return nullptr; // BB must be dominated by the switch.
5741
5742 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5743 if (!Branch || !Branch->isUnconditional())
5744 return nullptr; // Terminator must be unconditional branch.
5745
5746 BasicBlock *Succ = Branch->getSuccessor(0);
5747
5748 for (PHINode &PHI : Succ->phis()) {
5749 int Idx = PHI.getBasicBlockIndex(BB);
5750 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5751
5752 Value *InValue = PHI.getIncomingValue(Idx);
5753 if (InValue != CaseValue)
5754 continue;
5755
5756 *PhiIndex = Idx;
5757 return &PHI;
5758 }
5759
5760 return nullptr;
5761}
5762
5763/// Try to forward the condition of a switch instruction to a phi node
5764/// dominated by the switch, if that would mean that some of the destination
5765/// blocks of the switch can be folded away. Return true if a change is made.
5767 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5768
5769 ForwardingNodesMap ForwardingNodes;
5770 BasicBlock *SwitchBlock = SI->getParent();
5771 bool Changed = false;
5772 for (const auto &Case : SI->cases()) {
5773 ConstantInt *CaseValue = Case.getCaseValue();
5774 BasicBlock *CaseDest = Case.getCaseSuccessor();
5775
5776 // Replace phi operands in successor blocks that are using the constant case
5777 // value rather than the switch condition variable:
5778 // switchbb:
5779 // switch i32 %x, label %default [
5780 // i32 17, label %succ
5781 // ...
5782 // succ:
5783 // %r = phi i32 ... [ 17, %switchbb ] ...
5784 // -->
5785 // %r = phi i32 ... [ %x, %switchbb ] ...
5786
5787 for (PHINode &Phi : CaseDest->phis()) {
5788 // This only works if there is exactly 1 incoming edge from the switch to
5789 // a phi. If there is >1, that means multiple cases of the switch map to 1
5790 // value in the phi, and that phi value is not the switch condition. Thus,
5791 // this transform would not make sense (the phi would be invalid because
5792 // a phi can't have different incoming values from the same block).
5793 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5794 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5795 count(Phi.blocks(), SwitchBlock) == 1) {
5796 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5797 Changed = true;
5798 }
5799 }
5800
5801 // Collect phi nodes that are indirectly using this switch's case constants.
5802 int PhiIdx;
5803 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5804 ForwardingNodes[Phi].push_back(PhiIdx);
5805 }
5806
5807 for (auto &ForwardingNode : ForwardingNodes) {
5808 PHINode *Phi = ForwardingNode.first;
5809 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5810 if (Indexes.size() < 2)
5811 continue;
5812
5813 for (int Index : Indexes)
5814 Phi->setIncomingValue(Index, SI->getCondition());
5815 Changed = true;
5816 }
5817
5818 return Changed;
5819}
5820
5821/// Return true if the backend will be able to handle
5822/// initializing an array of constants like C.
5824 if (C->isThreadDependent())
5825 return false;
5826 if (C->isDLLImportDependent())
5827 return false;
5828
5829 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5830 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5831 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5832 return false;
5833
5834 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5835 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5836 // materializing the array of constants.
5837 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5838 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5839 return false;
5840 }
5841
5843 return false;
5844
5845 return true;
5846}
5847
5848/// If V is a Constant, return it. Otherwise, try to look up
5849/// its constant value in ConstantPool, returning 0 if it's not there.
5850static Constant *
5853 if (Constant *C = dyn_cast<Constant>(V))
5854 return C;
5855 return ConstantPool.lookup(V);
5856}
5857
5858/// Try to fold instruction I into a constant. This works for
5859/// simple instructions such as binary operations where both operands are
5860/// constant or can be replaced by constants from the ConstantPool. Returns the
5861/// resulting constant on success, 0 otherwise.
5862static Constant *
5865 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5866 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5867 if (!A)
5868 return nullptr;
5869 if (A->isAllOnesValue())
5870 return LookupConstant(Select->getTrueValue(), ConstantPool);
5871 if (A->isNullValue())
5872 return LookupConstant(Select->getFalseValue(), ConstantPool);
5873 return nullptr;
5874 }
5875
5877 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5878 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5879 COps.push_back(A);
5880 else
5881 return nullptr;
5882 }
5883
5884 return ConstantFoldInstOperands(I, COps, DL);
5885}
5886
5887/// Try to determine the resulting constant values in phi nodes
5888/// at the common destination basic block, *CommonDest, for one of the case
5889/// destionations CaseDest corresponding to value CaseVal (0 for the default
5890/// case), of a switch instruction SI.
5891static bool
5893 BasicBlock **CommonDest,
5894 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5895 const DataLayout &DL, const TargetTransformInfo &TTI) {
5896 // The block from which we enter the common destination.
5897 BasicBlock *Pred = SI->getParent();
5898
5899 // If CaseDest is empty except for some side-effect free instructions through
5900 // which we can constant-propagate the CaseVal, continue to its successor.
5902 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5903 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5904 if (I.isTerminator()) {
5905 // If the terminator is a simple branch, continue to the next block.
5906 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5907 return false;
5908 Pred = CaseDest;
5909 CaseDest = I.getSuccessor(0);
5910 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5911 // Instruction is side-effect free and constant.
5912
5913 // If the instruction has uses outside this block or a phi node slot for
5914 // the block, it is not safe to bypass the instruction since it would then
5915 // no longer dominate all its uses.
5916 for (auto &Use : I.uses()) {
5917 User *User = Use.getUser();
5918 if (Instruction *I = dyn_cast<Instruction>(User))
5919 if (I->getParent() == CaseDest)
5920 continue;
5921 if (PHINode *Phi = dyn_cast<PHINode>(User))
5922 if (Phi->getIncomingBlock(Use) == CaseDest)
5923 continue;
5924 return false;
5925 }
5926
5927 ConstantPool.insert(std::make_pair(&I, C));
5928 } else {
5929 break;
5930 }
5931 }
5932
5933 // If we did not have a CommonDest before, use the current one.
5934 if (!*CommonDest)
5935 *CommonDest = CaseDest;
5936 // If the destination isn't the common one, abort.
5937 if (CaseDest != *CommonDest)
5938 return false;
5939
5940 // Get the values for this case from phi nodes in the destination block.
5941 for (PHINode &PHI : (*CommonDest)->phis()) {
5942 int Idx = PHI.getBasicBlockIndex(Pred);
5943 if (Idx == -1)
5944 continue;
5945
5946 Constant *ConstVal =
5947 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5948 if (!ConstVal)
5949 return false;
5950
5951 // Be conservative about which kinds of constants we support.
5952 if (!ValidLookupTableConstant(ConstVal, TTI))
5953 return false;
5954
5955 Res.push_back(std::make_pair(&PHI, ConstVal));
5956 }
5957
5958 return Res.size() > 0;
5959}
5960
5961// Helper function used to add CaseVal to the list of cases that generate
5962// Result. Returns the updated number of cases that generate this result.
5963static size_t mapCaseToResult(ConstantInt *CaseVal,
5964 SwitchCaseResultVectorTy &UniqueResults,
5965 Constant *Result) {
5966 for (auto &I : UniqueResults) {
5967 if (I.first == Result) {
5968 I.second.push_back(CaseVal);
5969 return I.second.size();
5970 }
5971 }
5972 UniqueResults.push_back(
5973 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5974 return 1;
5975}
5976
5977// Helper function that initializes a map containing
5978// results for the PHI node of the common destination block for a switch
5979// instruction. Returns false if multiple PHI nodes have been found or if
5980// there is not a common destination block for the switch.
5982 BasicBlock *&CommonDest,
5983 SwitchCaseResultVectorTy &UniqueResults,
5984 Constant *&DefaultResult,
5985 const DataLayout &DL,
5986 const TargetTransformInfo &TTI,
5987 uintptr_t MaxUniqueResults) {
5988 for (const auto &I : SI->cases()) {
5989 ConstantInt *CaseVal = I.getCaseValue();
5990
5991 // Resulting value at phi nodes for this case value.
5992 SwitchCaseResultsTy Results;
5993 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5994 DL, TTI))
5995 return false;
5996
5997 // Only one value per case is permitted.
5998 if (Results.size() > 1)
5999 return false;
6000
6001 // Add the case->result mapping to UniqueResults.
6002 const size_t NumCasesForResult =
6003 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6004
6005 // Early out if there are too many cases for this result.
6006 if (NumCasesForResult > MaxSwitchCasesPerResult)
6007 return false;
6008
6009 // Early out if there are too many unique results.
6010 if (UniqueResults.size() > MaxUniqueResults)
6011 return false;
6012
6013 // Check the PHI consistency.
6014 if (!PHI)
6015 PHI = Results[0].first;
6016 else if (PHI != Results[0].first)
6017 return false;
6018 }
6019 // Find the default result value.
6021 BasicBlock *DefaultDest = SI->getDefaultDest();
6022 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6023 DL, TTI);
6024 // If the default value is not found abort unless the default destination
6025 // is unreachable.
6026 DefaultResult =
6027 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6028 if ((!DefaultResult &&
6029 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6030 return false;
6031
6032 return true;
6033}
6034
6035// Helper function that checks if it is possible to transform a switch with only
6036// two cases (or two cases + default) that produces a result into a select.
6037// TODO: Handle switches with more than 2 cases that map to the same result.
6038static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6039 Constant *DefaultResult, Value *Condition,
6040 IRBuilder<> &Builder) {
6041 // If we are selecting between only two cases transform into a simple
6042 // select or a two-way select if default is possible.
6043 // Example:
6044 // switch (a) { %0 = icmp eq i32 %a, 10
6045 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6046 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6047 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6048 // }
6049 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6050 ResultVector[1].second.size() == 1) {
6051 ConstantInt *FirstCase = ResultVector[0].second[0];
6052 ConstantInt *SecondCase = ResultVector[1].second[0];
6053 Value *SelectValue = ResultVector[1].first;
6054 if (DefaultResult) {
6055 Value *ValueCompare =
6056 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6057 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6058 DefaultResult, "switch.select");
6059 }
6060 Value *ValueCompare =
6061 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6062 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6063 SelectValue, "switch.select");
6064 }
6065
6066 // Handle the degenerate case where two cases have the same result value.
6067 if (ResultVector.size() == 1 && DefaultResult) {
6068 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6069 unsigned CaseCount = CaseValues.size();
6070 // n bits group cases map to the same result:
6071 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6072 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6073 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6074 if (isPowerOf2_32(CaseCount)) {
6075 ConstantInt *MinCaseVal = CaseValues[0];
6076 // Find mininal value.
6077 for (auto *Case : CaseValues)
6078 if (Case->getValue().slt(MinCaseVal->getValue()))
6079 MinCaseVal = Case;
6080
6081 // Mark the bits case number touched.
6082 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6083 for (auto *Case : CaseValues)
6084 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6085
6086 // Check if cases with the same result can cover all number
6087 // in touched bits.
6088 if (BitMask.popcount() == Log2_32(CaseCount)) {
6089 if (!MinCaseVal->isNullValue())
6090 Condition = Builder.CreateSub(Condition, MinCaseVal);
6091 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6092 Value *Cmp = Builder.CreateICmpEQ(
6093 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6094 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6095 }
6096 }
6097
6098 // Handle the degenerate case where two cases have the same value.
6099 if (CaseValues.size() == 2) {
6100 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6101 "switch.selectcmp.case1");
6102 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6103 "switch.selectcmp.case2");
6104 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6105 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6106 }
6107 }
6108
6109 return nullptr;
6110}
6111
6112// Helper function to cleanup a switch instruction that has been converted into
6113// a select, fixing up PHI nodes and basic blocks.
6115 Value *SelectValue,
6116 IRBuilder<> &Builder,
6117 DomTreeUpdater *DTU) {
6118 std::vector<DominatorTree::UpdateType> Updates;
6119
6120 BasicBlock *SelectBB = SI->getParent();
6121 BasicBlock *DestBB = PHI->getParent();
6122
6123 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6124 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6125 Builder.CreateBr(DestBB);
6126
6127 // Remove the switch.
6128
6129 PHI->removeIncomingValueIf(
6130 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6131 PHI->addIncoming(SelectValue, SelectBB);
6132
6133 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6134 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6135 BasicBlock *Succ = SI->getSuccessor(i);
6136
6137 if (Succ == DestBB)
6138 continue;
6139 Succ->removePredecessor(SelectBB);
6140 if (DTU && RemovedSuccessors.insert(Succ).second)
6141 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6142 }
6143 SI->eraseFromParent();
6144 if (DTU)
6145 DTU->applyUpdates(Updates);
6146}
6147
6148/// If a switch is only used to initialize one or more phi nodes in a common
6149/// successor block with only two different constant values, try to replace the
6150/// switch with a select. Returns true if the fold was made.
6151static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6152 DomTreeUpdater *DTU, const DataLayout &DL,
6153 const TargetTransformInfo &TTI) {
6154 Value *const Cond = SI->getCondition();
6155 PHINode *PHI = nullptr;
6156 BasicBlock *CommonDest = nullptr;
6157 Constant *DefaultResult;
6158 SwitchCaseResultVectorTy UniqueResults;
6159 // Collect all the cases that will deliver the same value from the switch.
6160 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6161 DL, TTI, /*MaxUniqueResults*/ 2))
6162 return false;
6163
6164 assert(PHI != nullptr && "PHI for value select not found");
6165 Builder.SetInsertPoint(SI);
6166 Value *SelectValue =
6167 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6168 if (!SelectValue)
6169 return false;
6170
6171 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6172 return true;
6173}
6174
6175namespace {
6176
6177/// This class represents a lookup table that can be used to replace a switch.
6178class SwitchLookupTable {
6179public:
6180 /// Create a lookup table to use as a switch replacement with the contents
6181 /// of Values, using DefaultValue to fill any holes in the table.
6182 SwitchLookupTable(
6183 Module &M, uint64_t TableSize, ConstantInt *Offset,
6184 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6185 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6186
6187 /// Build instructions with Builder to retrieve the value at
6188 /// the position given by Index in the lookup table.
6189 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6190
6191 /// Return true if a table with TableSize elements of
6192 /// type ElementType would fit in a target-legal register.
6193 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6194 Type *ElementType);
6195
6196private:
6197 // Depending on the contents of the table, it can be represented in
6198 // different ways.
6199 enum {
6200 // For tables where each element contains the same value, we just have to
6201 // store that single value and return it for each lookup.
6202 SingleValueKind,
6203
6204 // For tables where there is a linear relationship between table index
6205 // and values. We calculate the result with a simple multiplication
6206 // and addition instead of a table lookup.
6207 LinearMapKind,
6208
6209 // For small tables with integer elements, we can pack them into a bitmap
6210 // that fits into a target-legal register. Values are retrieved by
6211 // shift and mask operations.
6212 BitMapKind,
6213
6214 // The table is stored as an array of values. Values are retrieved by load
6215 // instructions from the table.
6216 ArrayKind
6217 } Kind;
6218
6219 // For SingleValueKind, this is the single value.
6220 Constant *SingleValue = nullptr;
6221
6222 // For BitMapKind, this is the bitmap.
6223 ConstantInt *BitMap = nullptr;
6224 IntegerType *BitMapElementTy = nullptr;
6225
6226 // For LinearMapKind, these are the constants used to derive the value.
6227 ConstantInt *LinearOffset = nullptr;
6228 ConstantInt *LinearMultiplier = nullptr;
6229 bool LinearMapValWrapped = false;
6230
6231 // For ArrayKind, this is the array.
6232 GlobalVariable *Array = nullptr;
6233};
6234
6235} // end anonymous namespace
6236
6237SwitchLookupTable::SwitchLookupTable(
6238 Module &M, uint64_t TableSize, ConstantInt *Offset,
6239 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6240 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6241 assert(Values.size() && "Can't build lookup table without values!");
6242 assert(TableSize >= Values.size() && "Can't fit values in table!");
6243
6244 // If all values in the table are equal, this is that value.
6245 SingleValue = Values.begin()->second;
6246
6247 Type *ValueType = Values.begin()->second->getType();
6248
6249 // Build up the table contents.
6250 SmallVector<Constant *, 64> TableContents(TableSize);
6251 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6252 ConstantInt *CaseVal = Values[I].first;
6253 Constant *CaseRes = Values[I].second;
6254 assert(CaseRes->getType() == ValueType);
6255
6256 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6257 TableContents[Idx] = CaseRes;
6258
6259 if (CaseRes != SingleValue)
6260 SingleValue = nullptr;
6261 }
6262
6263 // Fill in any holes in the table with the default result.
6264 if (Values.size() < TableSize) {
6265 assert(DefaultValue &&
6266 "Need a default value to fill the lookup table holes.");
6267 assert(DefaultValue->getType() == ValueType);
6268 for (uint64_t I = 0; I < TableSize; ++I) {
6269 if (!TableContents[I])
6270 TableContents[I] = DefaultValue;
6271 }
6272
6273 if (DefaultValue != SingleValue)
6274 SingleValue = nullptr;
6275 }
6276
6277 // If each element in the table contains the same value, we only need to store
6278 // that single value.
6279 if (SingleValue) {
6280 Kind = SingleValueKind;
6281 return;
6282 }
6283
6284 // Check if we can derive the value with a linear transformation from the
6285 // table index.
6286 if (isa<IntegerType>(ValueType)) {
6287 bool LinearMappingPossible = true;
6288 APInt PrevVal;
6289 APInt DistToPrev;
6290 // When linear map is monotonic and signed overflow doesn't happen on
6291 // maximum index, we can attach nsw on Add and Mul.
6292 bool NonMonotonic = false;
6293 assert(TableSize >= 2 && "Should be a SingleValue table.");
6294 // Check if there is the same distance between two consecutive values.
6295 for (uint64_t I = 0; I < TableSize; ++I) {
6296 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6297 if (!ConstVal) {
6298 // This is an undef. We could deal with it, but undefs in lookup tables
6299 // are very seldom. It's probably not worth the additional complexity.
6300 LinearMappingPossible = false;
6301 break;
6302 }
6303 const APInt &Val = ConstVal->getValue();
6304 if (I != 0) {
6305 APInt Dist = Val - PrevVal;
6306 if (I == 1) {
6307 DistToPrev = Dist;
6308 } else if (Dist != DistToPrev) {
6309 LinearMappingPossible = false;
6310 break;
6311 }
6312 NonMonotonic |=
6313 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6314 }
6315 PrevVal = Val;
6316 }
6317 if (LinearMappingPossible) {
6318 LinearOffset = cast<ConstantInt>(TableContents[0]);
6319 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6320 bool MayWrap = false;
6321 APInt M = LinearMultiplier->getValue();
6322 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6323 LinearMapValWrapped = NonMonotonic || MayWrap;
6324 Kind = LinearMapKind;
6325 ++NumLinearMaps;
6326 return;
6327 }
6328 }
6329
6330 // If the type is integer and the table fits in a register, build a bitmap.
6331 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6332 IntegerType *IT = cast<IntegerType>(ValueType);
6333 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6334 for (uint64_t I = TableSize; I > 0; --I) {
6335 TableInt <<= IT->getBitWidth();
6336 // Insert values into the bitmap. Undef values are set to zero.
6337 if (!isa<UndefValue>(TableContents[I - 1])) {
6338 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6339 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6340 }
6341 }
6342 BitMap = ConstantInt::get(M.getContext(), TableInt);
6343 BitMapElementTy = IT;
6344 Kind = BitMapKind;
6345 ++NumBitMaps;
6346 return;
6347 }
6348
6349 // Store the table in an array.
6350 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6351 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6352
6353 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6354 GlobalVariable::PrivateLinkage, Initializer,
6355 "switch.table." + FuncName);
6356 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6357 // Set the alignment to that of an array items. We will be only loading one
6358 // value out of it.
6359 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6360 Kind = ArrayKind;
6361}
6362
6363Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6364 switch (Kind) {
6365 case SingleValueKind:
6366 return SingleValue;
6367 case LinearMapKind: {
6368 // Derive the result value from the input value.
6369 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6370 false, "switch.idx.cast");
6371 if (!LinearMultiplier->isOne())
6372 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6373 /*HasNUW = */ false,
6374 /*HasNSW = */ !LinearMapValWrapped);
6375
6376 if (!LinearOffset->isZero())
6377 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6378 /*HasNUW = */ false,
6379 /*HasNSW = */ !LinearMapValWrapped);
6380 return Result;
6381 }
6382 case BitMapKind: {
6383 // Type of the bitmap (e.g. i59).
6384 IntegerType *MapTy = BitMap->getIntegerType();
6385
6386 // Cast Index to the same type as the bitmap.
6387 // Note: The Index is <= the number of elements in the table, so
6388 // truncating it to the width of the bitmask is safe.
6389 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6390
6391 // Multiply the shift amount by the element width. NUW/NSW can always be
6392 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6393 // BitMap's bit width.
6394 ShiftAmt = Builder.CreateMul(
6395 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6396 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6397
6398 // Shift down.
6399 Value *DownShifted =
6400 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6401 // Mask off.
6402 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6403 }
6404 case ArrayKind: {
6405 // Make sure the table index will not overflow when treated as signed.
6406 IntegerType *IT = cast<IntegerType>(Index->getType());
6407 uint64_t TableSize =
6408 Array->getInitializer()->getType()->getArrayNumElements();
6409 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6410 Index = Builder.CreateZExt(
6411 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6412 "switch.tableidx.zext");
6413
6414 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6415 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6416 GEPIndices, "switch.gep");
6417 return Builder.CreateLoad(
6418 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6419 "switch.load");
6420 }
6421 }
6422 llvm_unreachable("Unknown lookup table kind!");
6423}
6424
6425bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6426 uint64_t TableSize,
6427 Type *ElementType) {
6428 auto *IT = dyn_cast<IntegerType>(ElementType);
6429 if (!IT)
6430 return false;
6431 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6432 // are <= 15, we could try to narrow the type.
6433
6434 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6435 if (TableSize >= UINT_MAX / IT->getBitWidth())
6436 return false;
6437 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6438}
6439
6441 const DataLayout &DL) {
6442 // Allow any legal type.
6443 if (TTI.isTypeLegal(Ty))
6444 return true;
6445
6446 auto *IT = dyn_cast<IntegerType>(Ty);
6447 if (!IT)
6448 return false;
6449
6450 // Also allow power of 2 integer types that have at least 8 bits and fit in
6451 // a register. These types are common in frontend languages and targets
6452 // usually support loads of these types.
6453 // TODO: We could relax this to any integer that fits in a register and rely
6454 // on ABI alignment and padding in the table to allow the load to be widened.
6455 // Or we could widen the constants and truncate the load.
6456 unsigned BitWidth = IT->getBitWidth();
6457 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6458 DL.fitsInLegalInteger(IT->getBitWidth());
6459}
6460
6461static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6462 // 40% is the default density for building a jump table in optsize/minsize
6463 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6464 // function was based on.
6465 const uint64_t MinDensity = 40;
6466
6467 if (CaseRange >= UINT64_MAX / 100)
6468 return false; // Avoid multiplication overflows below.
6469
6470 return NumCases * 100 >= CaseRange * MinDensity;
6471}
6472
6474 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6475 uint64_t Range = Diff + 1;
6476 if (Range < Diff)
6477 return false; // Overflow.
6478
6479 return isSwitchDense(Values.size(), Range);
6480}
6481
6482/// Determine whether a lookup table should be built for this switch, based on
6483/// the number of cases, size of the table, and the types of the results.
6484// TODO: We could support larger than legal types by limiting based on the
6485// number of loads required and/or table size. If the constants are small we
6486// could use smaller table entries and extend after the load.
6487static bool
6489 const TargetTransformInfo &TTI, const DataLayout &DL,
6490 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6491 if (SI->getNumCases() > TableSize)
6492 return false; // TableSize overflowed.
6493
6494 bool AllTablesFitInRegister = true;
6495 bool HasIllegalType = false;
6496 for (const auto &I : ResultTypes) {
6497 Type *Ty = I.second;
6498
6499 // Saturate this flag to true.
6500 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6501
6502 // Saturate this flag to false.
6503 AllTablesFitInRegister =
6504 AllTablesFitInRegister &&
6505 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6506
6507 // If both flags saturate, we're done. NOTE: This *only* works with
6508 // saturating flags, and all flags have to saturate first due to the
6509 // non-deterministic behavior of iterating over a dense map.
6510 if (HasIllegalType && !AllTablesFitInRegister)
6511 break;
6512 }
6513
6514 // If each table would fit in a register, we should build it anyway.
6515 if (AllTablesFitInRegister)
6516 return true;
6517
6518 // Don't build a table that doesn't fit in-register if it has illegal types.
6519 if (HasIllegalType)
6520 return false;
6521
6522 return isSwitchDense(SI->getNumCases(), TableSize);
6523}
6524
6526 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6527 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6528 const DataLayout &DL, const TargetTransformInfo &TTI) {
6529 if (MinCaseVal.isNullValue())
6530 return true;
6531 if (MinCaseVal.isNegative() ||
6532 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6533 !HasDefaultResults)
6534 return false;
6535 return all_of(ResultTypes, [&](const auto &KV) {
6536 return SwitchLookupTable::WouldFitInRegister(
6537 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6538 KV.second /* ResultType */);
6539 });
6540}
6541
6542/// Try to reuse the switch table index compare. Following pattern:
6543/// \code
6544/// if (idx < tablesize)
6545/// r = table[idx]; // table does not contain default_value
6546/// else
6547/// r = default_value;
6548/// if (r != default_value)
6549/// ...
6550/// \endcode
6551/// Is optimized to:
6552/// \code
6553/// cond = idx < tablesize;
6554/// if (cond)
6555/// r = table[idx];
6556/// else
6557/// r = default_value;
6558/// if (cond)
6559/// ...
6560/// \endcode
6561/// Jump threading will then eliminate the second if(cond).
6563 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6564 Constant *DefaultValue,
6565 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6566 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6567 if (!CmpInst)
6568 return;
6569
6570 // We require that the compare is in the same block as the phi so that jump
6571 // threading can do its work afterwards.
6572 if (CmpInst->getParent() != PhiBlock)
6573 return;
6574
6575 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6576 if (!CmpOp1)
6577 return;
6578
6579 Value *RangeCmp = RangeCheckBranch->getCondition();
6580 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6581 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6582
6583 // Check if the compare with the default value is constant true or false.
6585 DefaultValue, CmpOp1, true);
6586 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6587 return;
6588
6589 // Check if the compare with the case values is distinct from the default
6590 // compare result.
6591 for (auto ValuePair : Values) {
6593 ValuePair.second, CmpOp1, true);
6594 if (!CaseConst || CaseConst == DefaultConst ||
6595 (CaseConst != TrueConst && CaseConst != FalseConst))
6596 return;
6597 }
6598
6599 // Check if the branch instruction dominates the phi node. It's a simple
6600 // dominance check, but sufficient for our needs.
6601 // Although this check is invariant in the calling loops, it's better to do it
6602 // at this late stage. Practically we do it at most once for a switch.
6603 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6604 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6605 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6606 return;
6607 }
6608
6609 if (DefaultConst == FalseConst) {
6610 // The compare yields the same result. We can replace it.
6611 CmpInst->replaceAllUsesWith(RangeCmp);
6612 ++NumTableCmpReuses;
6613 } else {
6614 // The compare yields the same result, just inverted. We can replace it.
6615 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6616 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6617 RangeCheckBranch->getIterator());
6618 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6619 ++NumTableCmpReuses;
6620 }
6621}
6622
6623/// If the switch is only used to initialize one or more phi nodes in a common
6624/// successor block with different constant values, replace the switch with
6625/// lookup tables.
6627 DomTreeUpdater *DTU, const DataLayout &DL,
6628 const TargetTransformInfo &TTI) {
6629 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6630
6631 BasicBlock *BB = SI->getParent();
6632 Function *Fn = BB->getParent();
6633 // Only build lookup table when we have a target that supports it or the
6634 // attribute is not set.
6636 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6637 return false;
6638
6639 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6640 // split off a dense part and build a lookup table for that.
6641
6642 // FIXME: This creates arrays of GEPs to constant strings, which means each
6643 // GEP needs a runtime relocation in PIC code. We should just build one big
6644 // string and lookup indices into that.
6645
6646 // Ignore switches with less than three cases. Lookup tables will not make
6647 // them faster, so we don't analyze them.
6648 if (SI->getNumCases() < 3)
6649 return false;
6650
6651 // Figure out the corresponding result for each case value and phi node in the
6652 // common destination, as well as the min and max case values.
6653 assert(!SI->cases().empty());
6654 SwitchInst::CaseIt CI = SI->case_begin();
6655 ConstantInt *MinCaseVal = CI->getCaseValue();
6656 ConstantInt *MaxCaseVal = CI->getCaseValue();
6657
6658 BasicBlock *CommonDest = nullptr;
6659
6660 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6662
6666
6667 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6668 ConstantInt *CaseVal = CI->getCaseValue();
6669 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6670 MinCaseVal = CaseVal;
6671 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6672 MaxCaseVal = CaseVal;
6673
6674 // Resulting value at phi nodes for this case value.
6676 ResultsTy Results;
6677 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6678 Results, DL, TTI))
6679 return false;
6680
6681 // Append the result from this case to the list for each phi.
6682 for (const auto &I : Results) {
6683 PHINode *PHI = I.first;
6684 Constant *Value = I.second;
6685 if (!ResultLists.count(PHI))
6686 PHIs.push_back(PHI);
6687 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6688 }
6689 }
6690
6691 // Keep track of the result types.
6692 for (PHINode *PHI : PHIs) {
6693 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6694 }
6695
6696 uint64_t NumResults = ResultLists[PHIs[0]].size();
6697
6698 // If the table has holes, we need a constant result for the default case
6699 // or a bitmask that fits in a register.
6700 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6701 bool HasDefaultResults =
6702 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6703 DefaultResultsList, DL, TTI);
6704
6705 for (const auto &I : DefaultResultsList) {
6706 PHINode *PHI = I.first;
6707 Constant *Result = I.second;
6708 DefaultResults[PHI] = Result;
6709 }
6710
6711 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6712 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6713 uint64_t TableSize;
6714 if (UseSwitchConditionAsTableIndex)
6715 TableSize = MaxCaseVal->getLimitedValue() + 1;
6716 else
6717 TableSize =
6718 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6719
6720 bool TableHasHoles = (NumResults < TableSize);
6721 bool NeedMask = (TableHasHoles && !HasDefaultResults);
6722 if (NeedMask) {
6723 // As an extra penalty for the validity test we require more cases.
6724 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6725 return false;
6726 if (!DL.fitsInLegalInteger(TableSize))
6727 return false;
6728 }
6729
6730 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6731 return false;
6732
6733 std::vector<DominatorTree::UpdateType> Updates;
6734
6735 // Compute the maximum table size representable by the integer type we are
6736 // switching upon.
6737 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6738 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6739 assert(MaxTableSize >= TableSize &&
6740 "It is impossible for a switch to have more entries than the max "
6741 "representable value of its input integer type's size.");
6742
6743 // If the default destination is unreachable, or if the lookup table covers
6744 // all values of the conditional variable, branch directly to the lookup table
6745 // BB. Otherwise, check that the condition is within the case range.
6746 bool DefaultIsReachable =
6747 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6748
6749 // Create the BB that does the lookups.
6750 Module &Mod = *CommonDest->getParent()->getParent();
6751 BasicBlock *LookupBB = BasicBlock::Create(
6752 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6753
6754 // Compute the table index value.
6755 Builder.SetInsertPoint(SI);
6756 Value *TableIndex;
6757 ConstantInt *TableIndexOffset;
6758 if (UseSwitchConditionAsTableIndex) {
6759 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6760 TableIndex = SI->getCondition();
6761 } else {
6762 TableIndexOffset = MinCaseVal;
6763 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6764 // we can try to attach nsw.
6765 bool MayWrap = true;
6766 if (!DefaultIsReachable) {
6767 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6768 (void)Res;
6769 }
6770
6771 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6772 "switch.tableidx", /*HasNUW =*/false,
6773 /*HasNSW =*/!MayWrap);
6774 }
6775
6776 BranchInst *RangeCheckBranch = nullptr;
6777
6778 // Grow the table to cover all possible index values to avoid the range check.
6779 // It will use the default result to fill in the table hole later, so make
6780 // sure it exist.
6781 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6782 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6783 // Grow the table shouldn't have any size impact by checking
6784 // WouldFitInRegister.
6785 // TODO: Consider growing the table also when it doesn't fit in a register
6786 // if no optsize is specified.
6787 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6788 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6789 return SwitchLookupTable::WouldFitInRegister(
6790 DL, UpperBound, KV.second /* ResultType */);
6791 })) {
6792 // There may be some case index larger than the UpperBound (unreachable
6793 // case), so make sure the table size does not get smaller.
6794 TableSize = std::max(UpperBound, TableSize);
6795 // The default branch is unreachable after we enlarge the lookup table.
6796 // Adjust DefaultIsReachable to reuse code path.
6797 DefaultIsReachable = false;
6798 }
6799 }
6800
6801 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6802 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6803 Builder.CreateBr(LookupBB);
6804 if (DTU)
6805 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6806 // Note: We call removeProdecessor later since we need to be able to get the
6807 // PHI value for the default case in case we're using a bit mask.
6808 } else {
6809 Value *Cmp = Builder.CreateICmpULT(
6810 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6811 RangeCheckBranch =
6812 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6813 if (DTU)
6814 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6815 }
6816
6817 // Populate the BB that does the lookups.
6818 Builder.SetInsertPoint(LookupBB);
6819
6820 if (NeedMask) {
6821 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6822 // re-purposed to do the hole check, and we create a new LookupBB.
6823 BasicBlock *MaskBB = LookupBB;
6824 MaskBB->setName("switch.hole_check");
6825 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6826 CommonDest->getParent(), CommonDest);
6827
6828 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6829 // unnecessary illegal types.
6830 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6831 APInt MaskInt(TableSizePowOf2, 0);
6832 APInt One(TableSizePowOf2, 1);
6833 // Build bitmask; fill in a 1 bit for every case.
6834 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6835 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6836 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6837 .getLimitedValue();
6838 MaskInt |= One << Idx;
6839 }
6840 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6841
6842 // Get the TableIndex'th bit of the bitmask.
6843 // If this bit is 0 (meaning hole) jump to the default destination,
6844 // else continue with table lookup.
6845 IntegerType *MapTy = TableMask->getIntegerType();
6846 Value *MaskIndex =
6847 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6848 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6849 Value *LoBit = Builder.CreateTrunc(
6850 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6851 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6852 if (DTU) {
6853 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6854 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6855 }
6856 Builder.SetInsertPoint(LookupBB);
6857 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6858 }
6859
6860 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6861 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6862 // do not delete PHINodes here.
6863 SI->getDefaultDest()->removePredecessor(BB,
6864 /*KeepOneInputPHIs=*/true);
6865 if (DTU)
6866 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6867 }
6868
6869 for (PHINode *PHI : PHIs) {
6870 const ResultListTy &ResultList = ResultLists[PHI];
6871
6872 // If using a bitmask, use any value to fill the lookup table holes.
6873 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6874 StringRef FuncName = Fn->getName();
6875 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6876 DL, FuncName);
6877
6878 Value *Result = Table.BuildLookup(TableIndex, Builder);
6879
6880 // Do a small peephole optimization: re-use the switch table compare if
6881 // possible.
6882 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6883 BasicBlock *PhiBlock = PHI->getParent();
6884 // Search for compare instructions which use the phi.
6885 for (auto *User : PHI->users()) {
6886 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6887 }
6888 }
6889
6890 PHI->addIncoming(Result, LookupBB);
6891 }
6892
6893 Builder.CreateBr(CommonDest);
6894 if (DTU)
6895 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6896
6897 // Remove the switch.
6898 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6899 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6900 BasicBlock *Succ = SI->getSuccessor(i);
6901
6902 if (Succ == SI->getDefaultDest())
6903 continue;
6904 Succ->removePredecessor(BB);
6905 if (DTU && RemovedSuccessors.insert(Succ).second)
6906 Updates.push_back({DominatorTree::Delete, BB, Succ});
6907 }
6908 SI->eraseFromParent();
6909
6910 if (DTU)
6911 DTU->applyUpdates(Updates);
6912
6913 ++NumLookupTables;
6914 if (NeedMask)
6915 ++NumLookupTablesHoles;
6916 return true;
6917}
6918
6919/// Try to transform a switch that has "holes" in it to a contiguous sequence
6920/// of cases.
6921///
6922/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6923/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6924///
6925/// This converts a sparse switch into a dense switch which allows better
6926/// lowering and could also allow transforming into a lookup table.
6927static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6928 const DataLayout &DL,
6929 const TargetTransformInfo &TTI) {
6930 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6931 if (CondTy->getIntegerBitWidth() > 64 ||
6932 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6933 return false;
6934 // Only bother with this optimization if there are more than 3 switch cases;
6935 // SDAG will only bother creating jump tables for 4 or more cases.
6936 if (SI->getNumCases() < 4)
6937 return false;
6938
6939 // This transform is agnostic to the signedness of the input or case values. We
6940 // can treat the case values as signed or unsigned. We can optimize more common
6941 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6942 // as signed.
6944 for (const auto &C : SI->cases())
6945 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6946 llvm::sort(Values);
6947
6948 // If the switch is already dense, there's nothing useful to do here.
6949 if (isSwitchDense(Values))
6950 return false;
6951
6952 // First, transform the values such that they start at zero and ascend.
6953 int64_t Base = Values[0];
6954 for (auto &V : Values)
6955 V -= (uint64_t)(Base);
6956
6957 // Now we have signed numbers that have been shifted so that, given enough
6958 // precision, there are no negative values. Since the rest of the transform
6959 // is bitwise only, we switch now to an unsigned representation.
6960
6961 // This transform can be done speculatively because it is so cheap - it
6962 // results in a single rotate operation being inserted.
6963
6964 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6965 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6966 // less than 64.
6967 unsigned Shift = 64;
6968 for (auto &V : Values)
6969 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6970 assert(Shift < 64);
6971 if (Shift > 0)
6972 for (auto &V : Values)
6973 V = (int64_t)((uint64_t)V >> Shift);
6974
6975 if (!isSwitchDense(Values))
6976 // Transform didn't create a dense switch.
6977 return false;
6978
6979 // The obvious transform is to shift the switch condition right and emit a
6980 // check that the condition actually cleanly divided by GCD, i.e.
6981 // C & (1 << Shift - 1) == 0
6982 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6983 //
6984 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6985 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6986 // are nonzero then the switch condition will be very large and will hit the
6987 // default case.
6988
6989 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6990 Builder.SetInsertPoint(SI);
6991 Value *Sub =
6992 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6993 Value *Rot = Builder.CreateIntrinsic(
6994 Ty, Intrinsic::fshl,
6995 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6996 SI->replaceUsesOfWith(SI->getCondition(), Rot);
6997
6998 for (auto Case : SI->cases()) {
6999 auto *Orig = Case.getCaseValue();
7000 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
7001 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7002 }
7003 return true;
7004}
7005
7006/// Tries to transform switch of powers of two to reduce switch range.
7007/// For example, switch like:
7008/// switch (C) { case 1: case 2: case 64: case 128: }
7009/// will be transformed to:
7010/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7011///
7012/// This transformation allows better lowering and could allow transforming into
7013/// a lookup table.
7015 const DataLayout &DL,
7016 const TargetTransformInfo &TTI) {
7017 Value *Condition = SI->getCondition();
7018 LLVMContext &Context = SI->getContext();
7019 auto *CondTy = cast<IntegerType>(Condition->getType());
7020
7021 if (CondTy->getIntegerBitWidth() > 64 ||
7022 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7023 return false;
7024
7025 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7026 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7027 {Condition, ConstantInt::getTrue(Context)}),
7029
7030 if (CttzIntrinsicCost > TTI::TCC_Basic)
7031 // Inserting intrinsic is too expensive.
7032 return false;
7033
7034 // Only bother with this optimization if there are more than 3 switch cases.
7035 // SDAG will only bother creating jump tables for 4 or more cases.
7036 if (SI->getNumCases() < 4)
7037 return false;
7038
7039 // We perform this optimization only for switches with
7040 // unreachable default case.
7041 // This assumtion will save us from checking if `Condition` is a power of two.
7042 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7043 return false;
7044
7045 // Check that switch cases are powers of two.
7047 for (const auto &Case : SI->cases()) {
7048 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7049 if (llvm::has_single_bit(CaseValue))
7050 Values.push_back(CaseValue);
7051 else
7052 return false;
7053 }
7054
7055 // isSwichDense requires case values to be sorted.
7056 llvm::sort(Values);
7057 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7058 llvm::countr_zero(Values.front()) + 1))
7059 // Transform is unable to generate dense switch.
7060 return false;
7061
7062 Builder.SetInsertPoint(SI);
7063
7064 // Replace each case with its trailing zeros number.
7065 for (auto &Case : SI->cases()) {
7066 auto *OrigValue = Case.getCaseValue();
7067 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7068 OrigValue->getValue().countr_zero()));
7069 }
7070
7071 // Replace condition with its trailing zeros number.
7072 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7073 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7074
7075 SI->setCondition(ConditionTrailingZeros);
7076
7077 return true;
7078}
7079
7080bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7081 BasicBlock *BB = SI->getParent();
7082
7083 if (isValueEqualityComparison(SI)) {
7084 // If we only have one predecessor, and if it is a branch on this value,
7085 // see if that predecessor totally determines the outcome of this switch.
7086 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7087 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7088 return requestResimplify();
7089
7090 Value *Cond = SI->getCondition();
7091 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7092 if (SimplifySwitchOnSelect(SI, Select))
7093 return requestResimplify();
7094
7095 // If the block only contains the switch, see if we can fold the block
7096 // away into any preds.
7097 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7098 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7099 return requestResimplify();
7100 }
7101
7102 // Try to transform the switch into an icmp and a branch.
7103 // The conversion from switch to comparison may lose information on
7104 // impossible switch values, so disable it early in the pipeline.
7105 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7106 return requestResimplify();
7107
7108 // Remove unreachable cases.
7109 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7110 return requestResimplify();
7111
7112 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7113 return requestResimplify();
7114
7115 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7116 return requestResimplify();
7117
7118 // The conversion from switch to lookup tables results in difficult-to-analyze
7119 // code and makes pruning branches much harder. This is a problem if the
7120 // switch expression itself can still be restricted as a result of inlining or
7121 // CVP. Therefore, only apply this transformation during late stages of the
7122 // optimisation pipeline.
7123 if (Options.ConvertSwitchToLookupTable &&
7124 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7125 return requestResimplify();
7126
7127 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7128 return requestResimplify();
7129
7130 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7131 return requestResimplify();
7132
7133 if (HoistCommon &&
7134 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7135 return requestResimplify();
7136
7137 return false;
7138}
7139
7140bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7141 BasicBlock *BB = IBI->getParent();
7142 bool Changed = false;
7143
7144 // Eliminate redundant destinations.
7147 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7148 BasicBlock *Dest = IBI->getDestination(i);
7149 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7150 if (!Dest->hasAddressTaken())
7151 RemovedSuccs.insert(Dest);
7152 Dest->removePredecessor(BB);
7153 IBI->removeDestination(i);
7154 --i;
7155 --e;
7156 Changed = true;
7157 }
7158 }
7159
7160 if (DTU) {
7161 std::vector<DominatorTree::UpdateType> Updates;
7162 Updates.reserve(RemovedSuccs.size());
7163 for (auto *RemovedSucc : RemovedSuccs)
7164 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7165 DTU->applyUpdates(Updates);
7166 }
7167
7168 if (IBI->getNumDestinations() == 0) {
7169 // If the indirectbr has no successors, change it to unreachable.
7170 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7172 return true;
7173 }
7174
7175 if (IBI->getNumDestinations() == 1) {
7176 // If the indirectbr has one successor, change it to a direct branch.
7179 return true;
7180 }
7181
7182 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7183 if (SimplifyIndirectBrOnSelect(IBI, SI))
7184 return requestResimplify();
7185 }
7186 return Changed;
7187}
7188
7189/// Given an block with only a single landing pad and a unconditional branch
7190/// try to find another basic block which this one can be merged with. This
7191/// handles cases where we have multiple invokes with unique landing pads, but
7192/// a shared handler.
7193///
7194/// We specifically choose to not worry about merging non-empty blocks
7195/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7196/// practice, the optimizer produces empty landing pad blocks quite frequently
7197/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7198/// sinking in this file)
7199///
7200/// This is primarily a code size optimization. We need to avoid performing
7201/// any transform which might inhibit optimization (such as our ability to
7202/// specialize a particular handler via tail commoning). We do this by not
7203/// merging any blocks which require us to introduce a phi. Since the same
7204/// values are flowing through both blocks, we don't lose any ability to
7205/// specialize. If anything, we make such specialization more likely.
7206///
7207/// TODO - This transformation could remove entries from a phi in the target
7208/// block when the inputs in the phi are the same for the two blocks being
7209/// merged. In some cases, this could result in removal of the PHI entirely.
7211 BasicBlock *BB, DomTreeUpdater *DTU) {
7212 auto Succ = BB->getUniqueSuccessor();
7213 assert(Succ);
7214 // If there's a phi in the successor block, we'd likely have to introduce
7215 // a phi into the merged landing pad block.
7216 if (isa<PHINode>(*Succ->begin()))
7217 return false;
7218
7219 for (BasicBlock *OtherPred : predecessors(Succ)) {
7220 if (BB == OtherPred)
7221 continue;
7222 BasicBlock::iterator I = OtherPred->begin();
7223 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7224 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7225 continue;
7226 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7227 ;
7228 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7229 if (!BI2 || !BI2->isIdenticalTo(BI))
7230 continue;
7231
7232 std::vector<DominatorTree::UpdateType> Updates;
7233
7234 // We've found an identical block. Update our predecessors to take that
7235 // path instead and make ourselves dead.
7237 for (BasicBlock *Pred : UniquePreds) {
7238 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7239 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7240 "unexpected successor");
7241 II->setUnwindDest(OtherPred);
7242 if (DTU) {
7243 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7244 Updates.push_back({DominatorTree::Delete, Pred, BB});
7245 }
7246 }
7247
7248 // The debug info in OtherPred doesn't cover the merged control flow that
7249 // used to go through BB. We need to delete it or update it.
7250 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7251 if (isa<DbgInfoIntrinsic>(Inst))
7252 Inst.eraseFromParent();
7253
7255 for (BasicBlock *Succ : UniqueSuccs) {
7256 Succ->removePredecessor(BB);
7257 if (DTU)
7258 Updates.push_back({DominatorTree::Delete, BB, Succ});
7259 }
7260
7261 IRBuilder<> Builder(BI);
7262 Builder.CreateUnreachable();
7263 BI->eraseFromParent();
7264 if (DTU)
7265 DTU->applyUpdates(Updates);
7266 return true;
7267 }
7268 return false;
7269}
7270
7271bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7272 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7273 : simplifyCondBranch(Branch, Builder);
7274}
7275
7276bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7277 IRBuilder<> &Builder) {
7278 BasicBlock *BB = BI->getParent();
7279 BasicBlock *Succ = BI->getSuccessor(0);
7280
7281 // If the Terminator is the only non-phi instruction, simplify the block.
7282 // If LoopHeader is provided, check if the block or its successor is a loop
7283 // header. (This is for early invocations before loop simplify and
7284 // vectorization to keep canonical loop forms for nested loops. These blocks
7285 // can be eliminated when the pass is invoked later in the back-end.)
7286 // Note that if BB has only one predecessor then we do not introduce new
7287 // backedge, so we can eliminate BB.
7288 bool NeedCanonicalLoop =
7289 Options.NeedCanonicalLoop &&
7290 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7291 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7293 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7294 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7295 return true;
7296
7297 // If the only instruction in the block is a seteq/setne comparison against a
7298 // constant, try to simplify the block.
7299 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7300 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7301 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7302 ;
7303 if (I->isTerminator() &&
7304 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7305 return true;
7306 }
7307
7308 // See if we can merge an empty landing pad block with another which is
7309 // equivalent.
7310 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7311 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7312 ;
7313 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7314 return true;
7315 }
7316
7317 // If this basic block is ONLY a compare and a branch, and if a predecessor
7318 // branches to us and our successor, fold the comparison into the
7319 // predecessor and use logical operations to update the incoming value
7320 // for PHI nodes in common successor.
7321 if (Options.SpeculateBlocks &&
7322 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7323 Options.BonusInstThreshold))
7324 return requestResimplify();
7325 return false;
7326}
7327
7329 BasicBlock *PredPred = nullptr;
7330 for (auto *P : predecessors(BB)) {
7331 BasicBlock *PPred = P->getSinglePredecessor();
7332 if (!PPred || (PredPred && PredPred != PPred))
7333 return nullptr;
7334 PredPred = PPred;
7335 }
7336 return PredPred;
7337}
7338
7339bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7340 assert(
7341 !isa<ConstantInt>(BI->getCondition()) &&
7342 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7343 "Tautological conditional branch should have been eliminated already.");
7344
7345 BasicBlock *BB = BI->getParent();
7346 if (!Options.SimplifyCondBranch ||
7347 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7348 return false;
7349
7350 // Conditional branch
7351 if (isValueEqualityComparison(BI)) {
7352 // If we only have one predecessor, and if it is a branch on this value,
7353 // see if that predecessor totally determines the outcome of this
7354 // switch.
7355 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7356 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7357 return requestResimplify();
7358
7359 // This block must be empty, except for the setcond inst, if it exists.
7360 // Ignore dbg and pseudo intrinsics.
7361 auto I = BB->instructionsWithoutDebug(true).begin();
7362 if (&*I == BI) {
7363 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7364 return requestResimplify();
7365 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7366 ++I;
7367 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7368 return requestResimplify();
7369 }
7370 }
7371
7372 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7373 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7374 return true;
7375
7376 // If this basic block has dominating predecessor blocks and the dominating
7377 // blocks' conditions imply BI's condition, we know the direction of BI.
7378 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7379 if (Imp) {
7380 // Turn this into a branch on constant.
7381 auto *OldCond = BI->getCondition();
7382 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7383 : ConstantInt::getFalse(BB->getContext());
7384 BI->setCondition(TorF);
7386 return requestResimplify();
7387 }
7388
7389 // If this basic block is ONLY a compare and a branch, and if a predecessor
7390 // branches to us and one of our successors, fold the comparison into the
7391 // predecessor and use logical operations to pick the right destination.
7392 if (Options.SpeculateBlocks &&
7393 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7394 Options.BonusInstThreshold))
7395 return requestResimplify();
7396
7397 // We have a conditional branch to two blocks that are only reachable
7398 // from BI. We know that the condbr dominates the two blocks, so see if
7399 // there is any identical code in the "then" and "else" blocks. If so, we
7400 // can hoist it up to the branching block.
7401 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7402 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7403 if (HoistCommon && hoistCommonCodeFromSuccessors(
7404 BI->getParent(), !Options.HoistCommonInsts))
7405 return requestResimplify();
7406 } else {
7407 // If Successor #1 has multiple preds, we may be able to conditionally
7408 // execute Successor #0 if it branches to Successor #1.
7409 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7410 if (Succ0TI->getNumSuccessors() == 1 &&
7411 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7412 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7413 return requestResimplify();
7414 }
7415 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7416 // If Successor #0 has multiple preds, we may be able to conditionally
7417 // execute Successor #1 if it branches to Successor #0.
7418 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7419 if (Succ1TI->getNumSuccessors() == 1 &&
7420 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7421 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7422 return requestResimplify();
7423 }
7424
7425 // If this is a branch on something for which we know the constant value in
7426 // predecessors (e.g. a phi node in the current block), thread control
7427 // through this block.
7429 return requestResimplify();
7430
7431 // Scan predecessor blocks for conditional branches.
7432 for (BasicBlock *Pred : predecessors(BB))
7433 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7434 if (PBI != BI && PBI->isConditional())
7435 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7436 return requestResimplify();
7437
7438 // Look for diamond patterns.
7439 if (MergeCondStores)
7441 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7442 if (PBI != BI && PBI->isConditional())
7443 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7444 return requestResimplify();
7445
7446 return false;
7447}
7448
7449/// Check if passing a value to an instruction will cause undefined behavior.
7450static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7451 Constant *C = dyn_cast<Constant>(V);
7452 if (!C)
7453 return false;
7454
7455 if (I->use_empty())
7456 return false;
7457
7458 if (C->isNullValue() || isa<UndefValue>(C)) {
7459 // Only look at the first use, avoid hurting compile time with long uselists
7460 auto *Use = cast<Instruction>(*I->user_begin());
7461 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7462 // before I in the block. The latter two can be the case if Use is a PHI
7463 // node.
7464 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7465 return false;
7466
7467 // Now make sure that there are no instructions in between that can alter
7468 // control flow (eg. calls)
7469 auto InstrRange =
7470 make_range(std::next(I->getIterator()), Use->getIterator());
7471 if (any_of(InstrRange, [](Instruction &I) {
7473 }))
7474 return false;
7475
7476 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7477 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7478 if (GEP->getPointerOperand() == I) {
7479 // The current base address is null, there are four cases to consider:
7480 // getelementptr (TY, null, 0) -> null
7481 // getelementptr (TY, null, not zero) -> may be modified
7482 // getelementptr inbounds (TY, null, 0) -> null
7483 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7484 // undefined?
7485 if (!GEP->hasAllZeroIndices() &&
7486 (!GEP->isInBounds() ||
7487 NullPointerIsDefined(GEP->getFunction(),
7488 GEP->getPointerAddressSpace())))
7489 PtrValueMayBeModified = true;
7490 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7491 }
7492
7493 // Look through return.
7494 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7495 bool HasNoUndefAttr =
7496 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7497 // Return undefined to a noundef return value is undefined.
7498 if (isa<UndefValue>(C) && HasNoUndefAttr)
7499 return true;
7500 // Return null to a nonnull+noundef return value is undefined.
7501 if (C->isNullValue() && HasNoUndefAttr &&
7502 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7503 return !PtrValueMayBeModified;
7504 }
7505 }
7506
7507 // Look through bitcasts.
7508 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7509 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7510
7511 // Load from null is undefined.
7512 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7513 if (!LI->isVolatile())
7514 return !NullPointerIsDefined(LI->getFunction(),
7515 LI->getPointerAddressSpace());
7516
7517 // Store to null is undefined.
7518 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7519 if (!SI->isVolatile())
7520 return (!NullPointerIsDefined(SI->getFunction(),
7521 SI->getPointerAddressSpace())) &&
7522 SI->getPointerOperand() == I;
7523
7524 // llvm.assume(false/undef) always triggers immediate UB.
7525 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
7526 // Ignore assume operand bundles.
7527 if (I == Assume->getArgOperand(0))
7528 return true;
7529 }
7530
7531 if (auto *CB = dyn_cast<CallBase>(Use)) {
7532 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7533 return false;
7534 // A call to null is undefined.
7535 if (CB->getCalledOperand() == I)
7536 return true;
7537
7538 if (C->isNullValue()) {
7539 for (const llvm::Use &Arg : CB->args())
7540 if (Arg == I) {
7541 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7542 if (CB->isPassingUndefUB(ArgIdx) &&
7543 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7544 // Passing null to a nonnnull+noundef argument is undefined.
7545 return !PtrValueMayBeModified;
7546 }
7547 }
7548 } else if (isa<UndefValue>(C)) {
7549 // Passing undef to a noundef argument is undefined.
7550 for (const llvm::Use &Arg : CB->args())
7551 if (Arg == I) {
7552 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7553 if (CB->isPassingUndefUB(ArgIdx)) {
7554 // Passing undef to a noundef argument is undefined.
7555 return true;
7556 }
7557 }
7558 }
7559 }
7560 }
7561 return false;
7562}
7563
7564/// If BB has an incoming value that will always trigger undefined behavior
7565/// (eg. null pointer dereference), remove the branch leading here.
7567 DomTreeUpdater *DTU,
7568 AssumptionCache *AC) {
7569 for (PHINode &PHI : BB->phis())
7570 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7571 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7572 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7573 Instruction *T = Predecessor->getTerminator();
7574 IRBuilder<> Builder(T);
7575 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7576 BB->removePredecessor(Predecessor);
7577 // Turn unconditional branches into unreachables and remove the dead
7578 // destination from conditional branches.
7579 if (BI->isUnconditional())
7580 Builder.CreateUnreachable();
7581 else {
7582 // Preserve guarding condition in assume, because it might not be
7583 // inferrable from any dominating condition.
7584 Value *Cond = BI->getCondition();
7585 CallInst *Assumption;
7586 if (BI->getSuccessor(0) == BB)
7587 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7588 else
7589 Assumption = Builder.CreateAssumption(Cond);
7590 if (AC)
7591 AC->registerAssumption(cast<AssumeInst>(Assumption));
7592 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7593 : BI->getSuccessor(0));
7594 }
7595 BI->eraseFromParent();
7596 if (DTU)
7597 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7598 return true;
7599 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7600 // Redirect all branches leading to UB into
7601 // a newly created unreachable block.
7602 BasicBlock *Unreachable = BasicBlock::Create(
7603 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7604 Builder.SetInsertPoint(Unreachable);
7605 // The new block contains only one instruction: Unreachable
7606 Builder.CreateUnreachable();
7607 for (const auto &Case : SI->cases())
7608 if (Case.getCaseSuccessor() == BB) {
7609 BB->removePredecessor(Predecessor);
7610 Case.setSuccessor(Unreachable);
7611 }
7612 if (SI->getDefaultDest() == BB) {
7613 BB->removePredecessor(Predecessor);
7614 SI->setDefaultDest(Unreachable);
7615 }
7616
7617 if (DTU)
7618 DTU->applyUpdates(
7619 { { DominatorTree::Insert, Predecessor, Unreachable },
7620 { DominatorTree::Delete, Predecessor, BB } });
7621 return true;
7622 }
7623 }
7624
7625 return false;
7626}
7627
7628bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7629 bool Changed = false;
7630
7631 assert(BB && BB->getParent() && "Block not embedded in function!");
7632 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7633
7634 // Remove basic blocks that have no predecessors (except the entry block)...
7635 // or that just have themself as a predecessor. These are unreachable.
7636 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7637 BB->getSinglePredecessor() == BB) {
7638 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7639 DeleteDeadBlock(BB, DTU);
7640 return true;
7641 }
7642
7643 // Check to see if we can constant propagate this terminator instruction
7644 // away...
7645 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7646 /*TLI=*/nullptr, DTU);
7647
7648 // Check for and eliminate duplicate PHI nodes in this block.
7649 Changed |= EliminateDuplicatePHINodes(BB);
7650
7651 // Check for and remove branches that will always cause undefined behavior.
7653 return requestResimplify();
7654
7655 // Merge basic blocks into their predecessor if there is only one distinct
7656 // pred, and if there is only one distinct successor of the predecessor, and
7657 // if there are no PHI nodes.
7658 if (MergeBlockIntoPredecessor(BB, DTU))
7659 return true;
7660
7661 if (SinkCommon && Options.SinkCommonInsts)
7662 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7663 MergeCompatibleInvokes(BB, DTU)) {
7664 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7665 // so we may now how duplicate PHI's.
7666 // Let's rerun EliminateDuplicatePHINodes() first,
7667 // before FoldTwoEntryPHINode() potentially converts them into select's,
7668 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7669 return true;
7670 }
7671
7672 IRBuilder<> Builder(BB);
7673
7674 if (Options.SpeculateBlocks &&
7675 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7676 // If there is a trivial two-entry PHI node in this basic block, and we can
7677 // eliminate it, do so now.
7678 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7679 if (PN->getNumIncomingValues() == 2)
7680 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7681 return true;
7682 }
7683
7685 Builder.SetInsertPoint(Terminator);
7686 switch (Terminator->getOpcode()) {
7687 case Instruction::Br:
7688 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7689 break;
7690 case Instruction::Resume:
7691 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7692 break;
7693 case Instruction::CleanupRet:
7694 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7695 break;
7696 case Instruction::Switch:
7697 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7698 break;
7699 case Instruction::Unreachable:
7700 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7701 break;
7702 case Instruction::IndirectBr:
7703 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7704 break;
7705 }
7706
7707 return Changed;
7708}
7709
7710bool SimplifyCFGOpt::run(BasicBlock *BB) {
7711 bool Changed = false;
7712
7713 // Repeated simplify BB as long as resimplification is requested.
7714 do {
7715 Resimplify = false;
7716
7717 // Perform one round of simplifcation. Resimplify flag will be set if
7718 // another iteration is requested.
7719 Changed |= simplifyOnce(BB);
7720 } while (Resimplify);
7721
7722 return Changed;
7723}
7724
7727 ArrayRef<WeakVH> LoopHeaders) {
7728 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7729 Options)
7730 .run(BB);
7731}
#define Fail
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< Instruction *, SmallVector< Value *, 4 > > &PHIOperands)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:335
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
const Instruction & front() const
Definition: BasicBlock.h:453
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:474
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:490
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:324
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:712
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:672
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:478
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:613
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:509
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:889
BasicBlock * getBasicBlock() const
Definition: Constants.h:918
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
bool cannotMerge() const
Determine if the call cannot be tail merged.
Definition: InstrTypes.h:2285
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2402
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isNegative() const
Definition: Constants.h:200
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:255
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:184
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool hasPostDomTree() const
Returns true if it holds a PostDominatorTree.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
iterator begin()
Definition: Function.h:803
size_t size() const
Definition: Function.h:808
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1143
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:551
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1803
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:84
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1706
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:932
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
BasicBlock * getUnwindDest() const
void setNormalDest(BasicBlock *B)
void setUnwindDest(BasicBlock *B)
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Align getAlign() const
Definition: Instructions.h:369
bool isSimple() const
Definition: Instructions.h:406
Value * getValueOperand()
Definition: Instructions.h:414
bool isUnordered() const
Definition: Instructions.h:408
Value * getPointerOperand()
Definition: Instructions.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
bool user_empty() const
Definition: Value.h:385
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:501
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:848
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1895
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void RemapDbgVariableRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:285
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:693
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1120
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2048
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapDbgVariableRecord(Module *M, DbgVariableRecord *V, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:273
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3160
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:264
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3341
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3607
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4109
auto max_element(R &&Range)
Definition: STLExtras.h:1986
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1487
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254