LLVM 20.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
69#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <climits>
80#include <cstddef>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE "simplifycfg"
94
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
121 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122 cl::init(20),
123 cl::desc("Allow reordering across at most this many "
124 "instructions when hoisting"));
125
126static cl::opt<bool>
127 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
128 cl::desc("Sink common instructions down to the end block"));
129
131 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
132 cl::desc("Hoist conditional stores if an unconditional store precedes"));
133
135 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
136 cl::desc("Hoist conditional stores even if an unconditional store does not "
137 "precede - hoist multiple conditional stores into a single "
138 "predicated store"));
139
141 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
142 cl::desc("When merging conditional stores, do so even if the resultant "
143 "basic blocks are unlikely to be if-converted as a result"));
144
146 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
147 cl::desc("Allow exactly one expensive instruction to be speculatively "
148 "executed"));
149
151 "max-speculation-depth", cl::Hidden, cl::init(10),
152 cl::desc("Limit maximum recursion depth when calculating costs of "
153 "speculatively executed instructions"));
154
155static cl::opt<int>
156 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157 cl::init(10),
158 cl::desc("Max size of a block which is still considered "
159 "small enough to thread through"));
160
161// Two is chosen to allow one negation and a logical combine.
163 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164 cl::init(2),
165 cl::desc("Maximum cost of combining conditions when "
166 "folding branches"));
167
169 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170 cl::init(2),
171 cl::desc("Multiplier to apply to threshold when determining whether or not "
172 "to fold branch to common destination when vector operations are "
173 "present"));
174
176 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
177 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
178
180 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
181 cl::desc("Limit cases to analyze when converting a switch to select"));
182
183STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184STATISTIC(NumLinearMaps,
185 "Number of switch instructions turned into linear mapping");
186STATISTIC(NumLookupTables,
187 "Number of switch instructions turned into lookup tables");
189 NumLookupTablesHoles,
190 "Number of switch instructions turned into lookup tables (holes checked)");
191STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192STATISTIC(NumFoldValueComparisonIntoPredecessors,
193 "Number of value comparisons folded into predecessor basic blocks");
194STATISTIC(NumFoldBranchToCommonDest,
195 "Number of branches folded into predecessor basic block");
197 NumHoistCommonCode,
198 "Number of common instruction 'blocks' hoisted up to the begin block");
199STATISTIC(NumHoistCommonInstrs,
200 "Number of common instructions hoisted up to the begin block");
201STATISTIC(NumSinkCommonCode,
202 "Number of common instruction 'blocks' sunk down to the end block");
203STATISTIC(NumSinkCommonInstrs,
204 "Number of common instructions sunk down to the end block");
205STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206STATISTIC(NumInvokes,
207 "Number of invokes with empty resume blocks simplified into calls");
208STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211namespace {
212
213// The first field contains the value that the switch produces when a certain
214// case group is selected, and the second field is a vector containing the
215// cases composing the case group.
216using SwitchCaseResultVectorTy =
218
219// The first field contains the phi node that generates a result of the switch
220// and the second field contains the value generated for a certain case in the
221// switch for that PHI.
222using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
223
224/// ValueEqualityComparisonCase - Represents a case of a switch.
225struct ValueEqualityComparisonCase {
227 BasicBlock *Dest;
228
229 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
230 : Value(Value), Dest(Dest) {}
231
232 bool operator<(ValueEqualityComparisonCase RHS) const {
233 // Comparing pointers is ok as we only rely on the order for uniquing.
234 return Value < RHS.Value;
235 }
236
237 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
238};
239
240class SimplifyCFGOpt {
242 DomTreeUpdater *DTU;
243 const DataLayout &DL;
244 ArrayRef<WeakVH> LoopHeaders;
246 bool Resimplify;
247
248 Value *isValueEqualityComparison(Instruction *TI);
249 BasicBlock *GetValueEqualityComparisonCases(
250 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252 BasicBlock *Pred,
253 IRBuilder<> &Builder);
254 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
255 Instruction *PTI,
256 IRBuilder<> &Builder);
257 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
258 IRBuilder<> &Builder);
259
260 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261 bool simplifySingleResume(ResumeInst *RI);
262 bool simplifyCommonResume(ResumeInst *RI);
263 bool simplifyCleanupReturn(CleanupReturnInst *RI);
264 bool simplifyUnreachable(UnreachableInst *UI);
265 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266 bool simplifyIndirectBr(IndirectBrInst *IBI);
267 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272 IRBuilder<> &Builder);
273
274 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
275 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276 Instruction *TI, Instruction *I1,
277 SmallVectorImpl<Instruction *> &OtherSuccTIs);
278 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
279 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
280 BasicBlock *TrueBB, BasicBlock *FalseBB,
281 uint32_t TrueWeight, uint32_t FalseWeight);
282 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283 const DataLayout &DL);
284 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
285 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
286 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288public:
289 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291 const SimplifyCFGOptions &Opts)
292 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
293 assert((!DTU || !DTU->hasPostDomTree()) &&
294 "SimplifyCFG is not yet capable of maintaining validity of a "
295 "PostDomTree, so don't ask for it.");
296 }
297
298 bool simplifyOnce(BasicBlock *BB);
299 bool run(BasicBlock *BB);
300
301 // Helper to set Resimplify and return change indication.
302 bool requestResimplify() {
303 Resimplify = true;
304 return true;
305 }
306};
307
308} // end anonymous namespace
309
310/// Return true if all the PHI nodes in the basic block \p BB
311/// receive compatible (identical) incoming values when coming from
312/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313///
314/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315/// is provided, and *both* of the values are present in the set,
316/// then they are considered equal.
318 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
319 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
320 assert(IncomingBlocks.size() == 2 &&
321 "Only for a pair of incoming blocks at the time!");
322
323 // FIXME: it is okay if one of the incoming values is an `undef` value,
324 // iff the other incoming value is guaranteed to be a non-poison value.
325 // FIXME: it is okay if one of the incoming values is a `poison` value.
326 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
328 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
329 if (IV0 == IV1)
330 return true;
331 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
332 EquivalenceSet->contains(IV1))
333 return true;
334 return false;
335 });
336}
337
338/// Return true if it is safe to merge these two
339/// terminator instructions together.
340static bool
342 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
343 if (SI1 == SI2)
344 return false; // Can't merge with self!
345
346 // It is not safe to merge these two switch instructions if they have a common
347 // successor, and if that successor has a PHI node, and if *that* PHI node has
348 // conflicting incoming values from the two switch blocks.
349 BasicBlock *SI1BB = SI1->getParent();
350 BasicBlock *SI2BB = SI2->getParent();
351
352 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
353 bool Fail = false;
354 for (BasicBlock *Succ : successors(SI2BB)) {
355 if (!SI1Succs.count(Succ))
356 continue;
357 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
358 continue;
359 Fail = true;
360 if (FailBlocks)
361 FailBlocks->insert(Succ);
362 else
363 break;
364 }
365
366 return !Fail;
367}
368
369/// Update PHI nodes in Succ to indicate that there will now be entries in it
370/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371/// will be the same as those coming in from ExistPred, an existing predecessor
372/// of Succ.
373static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
374 BasicBlock *ExistPred,
375 MemorySSAUpdater *MSSAU = nullptr) {
376 for (PHINode &PN : Succ->phis())
377 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
378 if (MSSAU)
379 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
380 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
381}
382
383/// Compute an abstract "cost" of speculating the given instruction,
384/// which is assumed to be safe to speculate. TCC_Free means cheap,
385/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386/// expensive.
388 const TargetTransformInfo &TTI) {
389 assert((!isa<Instruction>(I) ||
390 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391 "Instruction is not safe to speculatively execute!");
393}
394
395/// If we have a merge point of an "if condition" as accepted above,
396/// return true if the specified value dominates the block. We
397/// don't handle the true generality of domination here, just a special case
398/// which works well enough for us.
399///
400/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401/// see if V (which must be an instruction) and its recursive operands
402/// that do not dominate BB have a combined cost lower than Budget and
403/// are non-trapping. If both are true, the instruction is inserted into the
404/// set and true is returned.
405///
406/// The cost for most non-trapping instructions is defined as 1 except for
407/// Select whose cost is 2.
408///
409/// After this function returns, Cost is increased by the cost of
410/// V plus its non-dominating operands. If that cost is greater than
411/// Budget, false is returned and Cost is undefined.
413 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
415 InstructionCost Budget,
417 unsigned Depth = 0) {
418 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419 // so limit the recursion depth.
420 // TODO: While this recursion limit does prevent pathological behavior, it
421 // would be better to track visited instructions to avoid cycles.
423 return false;
424
425 Instruction *I = dyn_cast<Instruction>(V);
426 if (!I) {
427 // Non-instructions dominate all instructions and can be executed
428 // unconditionally.
429 return true;
430 }
431 BasicBlock *PBB = I->getParent();
432
433 // We don't want to allow weird loops that might have the "if condition" in
434 // the bottom of this block.
435 if (PBB == BB)
436 return false;
437
438 // If this instruction is defined in a block that contains an unconditional
439 // branch to BB, then it must be in the 'conditional' part of the "if
440 // statement". If not, it definitely dominates the region.
441 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
442 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
443 return true;
444
445 // If we have seen this instruction before, don't count it again.
446 if (AggressiveInsts.count(I))
447 return true;
448
449 // Okay, it looks like the instruction IS in the "condition". Check to
450 // see if it's a cheap instruction to unconditionally compute, and if it
451 // only uses stuff defined outside of the condition. If so, hoist it out.
453 return false;
454
456
457 // Allow exactly one instruction to be speculated regardless of its cost
458 // (as long as it is safe to do so).
459 // This is intended to flatten the CFG even if the instruction is a division
460 // or other expensive operation. The speculation of an expensive instruction
461 // is expected to be undone in CodeGenPrepare if the speculation has not
462 // enabled further IR optimizations.
463 if (Cost > Budget &&
464 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
465 !Cost.isValid()))
466 return false;
467
468 // Okay, we can only really hoist these out if their operands do
469 // not take us over the cost threshold.
470 for (Use &Op : I->operands())
471 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
472 Depth + 1))
473 return false;
474 // Okay, it's safe to do this! Remember this instruction.
475 AggressiveInsts.insert(I);
476 return true;
477}
478
479/// Extract ConstantInt from value, looking through IntToPtr
480/// and PointerNullValue. Return NULL if value is not a constant int.
482 // Normal constant int.
483 ConstantInt *CI = dyn_cast<ConstantInt>(V);
484 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
485 DL.isNonIntegralPointerType(V->getType()))
486 return CI;
487
488 // This is some kind of pointer constant. Turn it into a pointer-sized
489 // ConstantInt if possible.
490 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
491
492 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
493 if (isa<ConstantPointerNull>(V))
494 return ConstantInt::get(PtrTy, 0);
495
496 // IntToPtr const int.
497 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
498 if (CE->getOpcode() == Instruction::IntToPtr)
499 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
500 // The constant is very likely to have the right type already.
501 if (CI->getType() == PtrTy)
502 return CI;
503 else
504 return cast<ConstantInt>(
505 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
506 }
507 return nullptr;
508}
509
510namespace {
511
512/// Given a chain of or (||) or and (&&) comparison of a value against a
513/// constant, this will try to recover the information required for a switch
514/// structure.
515/// It will depth-first traverse the chain of comparison, seeking for patterns
516/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517/// representing the different cases for the switch.
518/// Note that if the chain is composed of '||' it will build the set of elements
519/// that matches the comparisons (i.e. any of this value validate the chain)
520/// while for a chain of '&&' it will build the set elements that make the test
521/// fail.
522struct ConstantComparesGatherer {
523 const DataLayout &DL;
524
525 /// Value found for the switch comparison
526 Value *CompValue = nullptr;
527
528 /// Extra clause to be checked before the switch
529 Value *Extra = nullptr;
530
531 /// Set of integers to match in switch
533
534 /// Number of comparisons matched in the and/or chain
535 unsigned UsedICmps = 0;
536
537 /// Construct and compute the result for the comparison instruction Cond
538 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
539 gather(Cond);
540 }
541
542 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543 ConstantComparesGatherer &
544 operator=(const ConstantComparesGatherer &) = delete;
545
546private:
547 /// Try to set the current value used for the comparison, it succeeds only if
548 /// it wasn't set before or if the new value is the same as the old one
549 bool setValueOnce(Value *NewVal) {
550 if (CompValue && CompValue != NewVal)
551 return false;
552 CompValue = NewVal;
553 return (CompValue != nullptr);
554 }
555
556 /// Try to match Instruction "I" as a comparison against a constant and
557 /// populates the array Vals with the set of values that match (or do not
558 /// match depending on isEQ).
559 /// Return false on failure. On success, the Value the comparison matched
560 /// against is placed in CompValue.
561 /// If CompValue is already set, the function is expected to fail if a match
562 /// is found but the value compared to is different.
563 bool matchInstruction(Instruction *I, bool isEQ) {
564 // If this is an icmp against a constant, handle this as one of the cases.
565 ICmpInst *ICI;
566 ConstantInt *C;
567 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
568 (C = GetConstantInt(I->getOperand(1), DL)))) {
569 return false;
570 }
571
572 Value *RHSVal;
573 const APInt *RHSC;
574
575 // Pattern match a special case
576 // (x & ~2^z) == y --> x == y || x == y|2^z
577 // This undoes a transformation done by instcombine to fuse 2 compares.
578 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579 // It's a little bit hard to see why the following transformations are
580 // correct. Here is a CVC3 program to verify them for 64-bit values:
581
582 /*
583 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584 x : BITVECTOR(64);
585 y : BITVECTOR(64);
586 z : BITVECTOR(64);
587 mask : BITVECTOR(64) = BVSHL(ONE, z);
588 QUERY( (y & ~mask = y) =>
589 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
590 );
591 QUERY( (y | mask = y) =>
592 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
593 );
594 */
595
596 // Please note that each pattern must be a dual implication (<--> or
597 // iff). One directional implication can create spurious matches. If the
598 // implication is only one-way, an unsatisfiable condition on the left
599 // side can imply a satisfiable condition on the right side. Dual
600 // implication ensures that satisfiable conditions are transformed to
601 // other satisfiable conditions and unsatisfiable conditions are
602 // transformed to other unsatisfiable conditions.
603
604 // Here is a concrete example of a unsatisfiable condition on the left
605 // implying a satisfiable condition on the right:
606 //
607 // mask = (1 << z)
608 // (x & ~mask) == y --> (x == y || x == (y | mask))
609 //
610 // Substituting y = 3, z = 0 yields:
611 // (x & -2) == 3 --> (x == 3 || x == 2)
612
613 // Pattern match a special case:
614 /*
615 QUERY( (y & ~mask = y) =>
616 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
617 );
618 */
619 if (match(ICI->getOperand(0),
620 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
621 APInt Mask = ~*RHSC;
622 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623 // If we already have a value for the switch, it has to match!
624 if (!setValueOnce(RHSVal))
625 return false;
626
627 Vals.push_back(C);
628 Vals.push_back(
629 ConstantInt::get(C->getContext(),
630 C->getValue() | Mask));
631 UsedICmps++;
632 return true;
633 }
634 }
635
636 // Pattern match a special case:
637 /*
638 QUERY( (y | mask = y) =>
639 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
640 );
641 */
642 if (match(ICI->getOperand(0),
643 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
644 APInt Mask = *RHSC;
645 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(RHSVal))
648 return false;
649
650 Vals.push_back(C);
651 Vals.push_back(ConstantInt::get(C->getContext(),
652 C->getValue() & ~Mask));
653 UsedICmps++;
654 return true;
655 }
656 }
657
658 // If we already have a value for the switch, it has to match!
659 if (!setValueOnce(ICI->getOperand(0)))
660 return false;
661
662 UsedICmps++;
663 Vals.push_back(C);
664 return ICI->getOperand(0);
665 }
666
667 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668 ConstantRange Span =
670
671 // Shift the range if the compare is fed by an add. This is the range
672 // compare idiom as emitted by instcombine.
673 Value *CandidateVal = I->getOperand(0);
674 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
675 Span = Span.subtract(*RHSC);
676 CandidateVal = RHSVal;
677 }
678
679 // If this is an and/!= check, then we are looking to build the set of
680 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
681 // x != 0 && x != 1.
682 if (!isEQ)
683 Span = Span.inverse();
684
685 // If there are a ton of values, we don't want to make a ginormous switch.
686 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
687 return false;
688 }
689
690 // If we already have a value for the switch, it has to match!
691 if (!setValueOnce(CandidateVal))
692 return false;
693
694 // Add all values from the range to the set
695 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
697
698 UsedICmps++;
699 return true;
700 }
701
702 /// Given a potentially 'or'd or 'and'd together collection of icmp
703 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
704 /// the value being compared, and stick the list constants into the Vals
705 /// vector.
706 /// One "Extra" case is allowed to differ from the other.
707 void gather(Value *V) {
708 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
709
710 // Keep a stack (SmallVector for efficiency) for depth-first traversal
713
714 // Initialize
715 Visited.insert(V);
716 DFT.push_back(V);
717
718 while (!DFT.empty()) {
719 V = DFT.pop_back_val();
720
721 if (Instruction *I = dyn_cast<Instruction>(V)) {
722 // If it is a || (or && depending on isEQ), process the operands.
723 Value *Op0, *Op1;
724 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
725 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
726 if (Visited.insert(Op1).second)
727 DFT.push_back(Op1);
728 if (Visited.insert(Op0).second)
729 DFT.push_back(Op0);
730
731 continue;
732 }
733
734 // Try to match the current instruction
735 if (matchInstruction(I, isEQ))
736 // Match succeed, continue the loop
737 continue;
738 }
739
740 // One element of the sequence of || (or &&) could not be match as a
741 // comparison against the same value as the others.
742 // We allow only one "Extra" case to be checked before the switch
743 if (!Extra) {
744 Extra = V;
745 continue;
746 }
747 // Failed to parse a proper sequence, abort now
748 CompValue = nullptr;
749 break;
750 }
751 }
752};
753
754} // end anonymous namespace
755
757 MemorySSAUpdater *MSSAU = nullptr) {
758 Instruction *Cond = nullptr;
759 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
760 Cond = dyn_cast<Instruction>(SI->getCondition());
761 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
762 if (BI->isConditional())
763 Cond = dyn_cast<Instruction>(BI->getCondition());
764 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
765 Cond = dyn_cast<Instruction>(IBI->getAddress());
766 }
767
768 TI->eraseFromParent();
769 if (Cond)
771}
772
773/// Return true if the specified terminator checks
774/// to see if a value is equal to constant integer value.
775Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
776 Value *CV = nullptr;
777 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
778 // Do not permit merging of large switch instructions into their
779 // predecessors unless there is only one predecessor.
780 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
781 CV = SI->getCondition();
782 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
783 if (BI->isConditional() && BI->getCondition()->hasOneUse())
784 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
785 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
786 CV = ICI->getOperand(0);
787 }
788
789 // Unwrap any lossless ptrtoint cast.
790 if (CV) {
791 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
792 Value *Ptr = PTII->getPointerOperand();
793 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794 CV = Ptr;
795 }
796 }
797 return CV;
798}
799
800/// Given a value comparison instruction,
801/// decode all of the 'cases' that it represents and return the 'default' block.
802BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
803 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
805 Cases.reserve(SI->getNumCases());
806 for (auto Case : SI->cases())
807 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
808 Case.getCaseSuccessor()));
809 return SI->getDefaultDest();
810 }
811
812 BranchInst *BI = cast<BranchInst>(TI);
813 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
814 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
815 Cases.push_back(ValueEqualityComparisonCase(
816 GetConstantInt(ICI->getOperand(1), DL), Succ));
817 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
818}
819
820/// Given a vector of bb/value pairs, remove any entries
821/// in the list that match the specified block.
822static void
824 std::vector<ValueEqualityComparisonCase> &Cases) {
825 llvm::erase(Cases, BB);
826}
827
828/// Return true if there are any keys in C1 that exist in C2 as well.
829static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830 std::vector<ValueEqualityComparisonCase> &C2) {
831 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
832
833 // Make V1 be smaller than V2.
834 if (V1->size() > V2->size())
835 std::swap(V1, V2);
836
837 if (V1->empty())
838 return false;
839 if (V1->size() == 1) {
840 // Just scan V2.
841 ConstantInt *TheVal = (*V1)[0].Value;
842 for (const ValueEqualityComparisonCase &VECC : *V2)
843 if (TheVal == VECC.Value)
844 return true;
845 }
846
847 // Otherwise, just sort both lists and compare element by element.
848 array_pod_sort(V1->begin(), V1->end());
849 array_pod_sort(V2->begin(), V2->end());
850 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
851 while (i1 != e1 && i2 != e2) {
852 if ((*V1)[i1].Value == (*V2)[i2].Value)
853 return true;
854 if ((*V1)[i1].Value < (*V2)[i2].Value)
855 ++i1;
856 else
857 ++i2;
858 }
859 return false;
860}
861
862// Set branch weights on SwitchInst. This sets the metadata if there is at
863// least one non-zero weight.
865 bool IsExpected) {
866 // Check that there is at least one non-zero weight. Otherwise, pass
867 // nullptr to setMetadata which will erase the existing metadata.
868 MDNode *N = nullptr;
869 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
870 N = MDBuilder(SI->getParent()->getContext())
871 .createBranchWeights(Weights, IsExpected);
872 SI->setMetadata(LLVMContext::MD_prof, N);
873}
874
875// Similar to the above, but for branch and select instructions that take
876// exactly 2 weights.
877static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
878 uint32_t FalseWeight, bool IsExpected) {
879 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
880 // Check that there is at least one non-zero weight. Otherwise, pass
881 // nullptr to setMetadata which will erase the existing metadata.
882 MDNode *N = nullptr;
883 if (TrueWeight || FalseWeight)
884 N = MDBuilder(I->getParent()->getContext())
885 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
886 I->setMetadata(LLVMContext::MD_prof, N);
887}
888
889/// If TI is known to be a terminator instruction and its block is known to
890/// only have a single predecessor block, check to see if that predecessor is
891/// also a value comparison with the same value, and if that comparison
892/// determines the outcome of this comparison. If so, simplify TI. This does a
893/// very limited form of jump threading.
894bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
895 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
896 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
897 if (!PredVal)
898 return false; // Not a value comparison in predecessor.
899
900 Value *ThisVal = isValueEqualityComparison(TI);
901 assert(ThisVal && "This isn't a value comparison!!");
902 if (ThisVal != PredVal)
903 return false; // Different predicates.
904
905 // TODO: Preserve branch weight metadata, similarly to how
906 // FoldValueComparisonIntoPredecessors preserves it.
907
908 // Find out information about when control will move from Pred to TI's block.
909 std::vector<ValueEqualityComparisonCase> PredCases;
910 BasicBlock *PredDef =
911 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
912 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
913
914 // Find information about how control leaves this block.
915 std::vector<ValueEqualityComparisonCase> ThisCases;
916 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
917 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
918
919 // If TI's block is the default block from Pred's comparison, potentially
920 // simplify TI based on this knowledge.
921 if (PredDef == TI->getParent()) {
922 // If we are here, we know that the value is none of those cases listed in
923 // PredCases. If there are any cases in ThisCases that are in PredCases, we
924 // can simplify TI.
925 if (!ValuesOverlap(PredCases, ThisCases))
926 return false;
927
928 if (isa<BranchInst>(TI)) {
929 // Okay, one of the successors of this condbr is dead. Convert it to a
930 // uncond br.
931 assert(ThisCases.size() == 1 && "Branch can only have one case!");
932 // Insert the new branch.
933 Instruction *NI = Builder.CreateBr(ThisDef);
934 (void)NI;
935
936 // Remove PHI node entries for the dead edge.
937 ThisCases[0].Dest->removePredecessor(PredDef);
938
939 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
940 << "Through successor TI: " << *TI << "Leaving: " << *NI
941 << "\n");
942
944
945 if (DTU)
946 DTU->applyUpdates(
947 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
948
949 return true;
950 }
951
952 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
953 // Okay, TI has cases that are statically dead, prune them away.
955 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
956 DeadCases.insert(PredCases[i].Value);
957
958 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
959 << "Through successor TI: " << *TI);
960
961 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
962 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
963 --i;
964 auto *Successor = i->getCaseSuccessor();
965 if (DTU)
966 ++NumPerSuccessorCases[Successor];
967 if (DeadCases.count(i->getCaseValue())) {
968 Successor->removePredecessor(PredDef);
969 SI.removeCase(i);
970 if (DTU)
971 --NumPerSuccessorCases[Successor];
972 }
973 }
974
975 if (DTU) {
976 std::vector<DominatorTree::UpdateType> Updates;
977 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
978 if (I.second == 0)
979 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
980 DTU->applyUpdates(Updates);
981 }
982
983 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
984 return true;
985 }
986
987 // Otherwise, TI's block must correspond to some matched value. Find out
988 // which value (or set of values) this is.
989 ConstantInt *TIV = nullptr;
990 BasicBlock *TIBB = TI->getParent();
991 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
992 if (PredCases[i].Dest == TIBB) {
993 if (TIV)
994 return false; // Cannot handle multiple values coming to this block.
995 TIV = PredCases[i].Value;
996 }
997 assert(TIV && "No edge from pred to succ?");
998
999 // Okay, we found the one constant that our value can be if we get into TI's
1000 // BB. Find out which successor will unconditionally be branched to.
1001 BasicBlock *TheRealDest = nullptr;
1002 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1003 if (ThisCases[i].Value == TIV) {
1004 TheRealDest = ThisCases[i].Dest;
1005 break;
1006 }
1007
1008 // If not handled by any explicit cases, it is handled by the default case.
1009 if (!TheRealDest)
1010 TheRealDest = ThisDef;
1011
1012 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1013
1014 // Remove PHI node entries for dead edges.
1015 BasicBlock *CheckEdge = TheRealDest;
1016 for (BasicBlock *Succ : successors(TIBB))
1017 if (Succ != CheckEdge) {
1018 if (Succ != TheRealDest)
1019 RemovedSuccs.insert(Succ);
1020 Succ->removePredecessor(TIBB);
1021 } else
1022 CheckEdge = nullptr;
1023
1024 // Insert the new branch.
1025 Instruction *NI = Builder.CreateBr(TheRealDest);
1026 (void)NI;
1027
1028 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1029 << "Through successor TI: " << *TI << "Leaving: " << *NI
1030 << "\n");
1031
1033 if (DTU) {
1035 Updates.reserve(RemovedSuccs.size());
1036 for (auto *RemovedSucc : RemovedSuccs)
1037 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1038 DTU->applyUpdates(Updates);
1039 }
1040 return true;
1041}
1042
1043namespace {
1044
1045/// This class implements a stable ordering of constant
1046/// integers that does not depend on their address. This is important for
1047/// applications that sort ConstantInt's to ensure uniqueness.
1048struct ConstantIntOrdering {
1049 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1050 return LHS->getValue().ult(RHS->getValue());
1051 }
1052};
1053
1054} // end anonymous namespace
1055
1057 ConstantInt *const *P2) {
1058 const ConstantInt *LHS = *P1;
1059 const ConstantInt *RHS = *P2;
1060 if (LHS == RHS)
1061 return 0;
1062 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1063}
1064
1065/// Get Weights of a given terminator, the default weight is at the front
1066/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1067/// metadata.
1069 SmallVectorImpl<uint64_t> &Weights) {
1070 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1071 assert(MD && "Invalid branch-weight metadata");
1072 extractFromBranchWeightMD64(MD, Weights);
1073
1074 // If TI is a conditional eq, the default case is the false case,
1075 // and the corresponding branch-weight data is at index 2. We swap the
1076 // default weight to be the first entry.
1077 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1078 assert(Weights.size() == 2);
1079 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1080 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1081 std::swap(Weights.front(), Weights.back());
1082 }
1083}
1084
1085/// Keep halving the weights until all can fit in uint32_t.
1087 uint64_t Max = *llvm::max_element(Weights);
1088 if (Max > UINT_MAX) {
1089 unsigned Offset = 32 - llvm::countl_zero(Max);
1090 for (uint64_t &I : Weights)
1091 I >>= Offset;
1092 }
1093}
1094
1096 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1097 Instruction *PTI = PredBlock->getTerminator();
1098
1099 // If we have bonus instructions, clone them into the predecessor block.
1100 // Note that there may be multiple predecessor blocks, so we cannot move
1101 // bonus instructions to a predecessor block.
1102 for (Instruction &BonusInst : *BB) {
1103 if (BonusInst.isTerminator())
1104 continue;
1105
1106 Instruction *NewBonusInst = BonusInst.clone();
1107
1108 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1109 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1110 // Unless the instruction has the same !dbg location as the original
1111 // branch, drop it. When we fold the bonus instructions we want to make
1112 // sure we reset their debug locations in order to avoid stepping on
1113 // dead code caused by folding dead branches.
1114 NewBonusInst->setDebugLoc(DebugLoc());
1115 }
1116
1117 RemapInstruction(NewBonusInst, VMap,
1119
1120 // If we speculated an instruction, we need to drop any metadata that may
1121 // result in undefined behavior, as the metadata might have been valid
1122 // only given the branch precondition.
1123 // Similarly strip attributes on call parameters that may cause UB in
1124 // location the call is moved to.
1125 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1126
1127 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1128 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1129 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1131
1132 if (isa<DbgInfoIntrinsic>(BonusInst))
1133 continue;
1134
1135 NewBonusInst->takeName(&BonusInst);
1136 BonusInst.setName(NewBonusInst->getName() + ".old");
1137 VMap[&BonusInst] = NewBonusInst;
1138
1139 // Update (liveout) uses of bonus instructions,
1140 // now that the bonus instruction has been cloned into predecessor.
1141 // Note that we expect to be in a block-closed SSA form for this to work!
1142 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1143 auto *UI = cast<Instruction>(U.getUser());
1144 auto *PN = dyn_cast<PHINode>(UI);
1145 if (!PN) {
1146 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1147 "If the user is not a PHI node, then it should be in the same "
1148 "block as, and come after, the original bonus instruction.");
1149 continue; // Keep using the original bonus instruction.
1150 }
1151 // Is this the block-closed SSA form PHI node?
1152 if (PN->getIncomingBlock(U) == BB)
1153 continue; // Great, keep using the original bonus instruction.
1154 // The only other alternative is an "use" when coming from
1155 // the predecessor block - here we should refer to the cloned bonus instr.
1156 assert(PN->getIncomingBlock(U) == PredBlock &&
1157 "Not in block-closed SSA form?");
1158 U.set(NewBonusInst);
1159 }
1160 }
1161}
1162
1163bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1164 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1165 BasicBlock *BB = TI->getParent();
1166 BasicBlock *Pred = PTI->getParent();
1167
1169
1170 // Figure out which 'cases' to copy from SI to PSI.
1171 std::vector<ValueEqualityComparisonCase> BBCases;
1172 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1173
1174 std::vector<ValueEqualityComparisonCase> PredCases;
1175 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1176
1177 // Based on whether the default edge from PTI goes to BB or not, fill in
1178 // PredCases and PredDefault with the new switch cases we would like to
1179 // build.
1181
1182 // Update the branch weight metadata along the way
1184 bool PredHasWeights = hasBranchWeightMD(*PTI);
1185 bool SuccHasWeights = hasBranchWeightMD(*TI);
1186
1187 if (PredHasWeights) {
1188 GetBranchWeights(PTI, Weights);
1189 // branch-weight metadata is inconsistent here.
1190 if (Weights.size() != 1 + PredCases.size())
1191 PredHasWeights = SuccHasWeights = false;
1192 } else if (SuccHasWeights)
1193 // If there are no predecessor weights but there are successor weights,
1194 // populate Weights with 1, which will later be scaled to the sum of
1195 // successor's weights
1196 Weights.assign(1 + PredCases.size(), 1);
1197
1198 SmallVector<uint64_t, 8> SuccWeights;
1199 if (SuccHasWeights) {
1200 GetBranchWeights(TI, SuccWeights);
1201 // branch-weight metadata is inconsistent here.
1202 if (SuccWeights.size() != 1 + BBCases.size())
1203 PredHasWeights = SuccHasWeights = false;
1204 } else if (PredHasWeights)
1205 SuccWeights.assign(1 + BBCases.size(), 1);
1206
1207 if (PredDefault == BB) {
1208 // If this is the default destination from PTI, only the edges in TI
1209 // that don't occur in PTI, or that branch to BB will be activated.
1210 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1211 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1212 if (PredCases[i].Dest != BB)
1213 PTIHandled.insert(PredCases[i].Value);
1214 else {
1215 // The default destination is BB, we don't need explicit targets.
1216 std::swap(PredCases[i], PredCases.back());
1217
1218 if (PredHasWeights || SuccHasWeights) {
1219 // Increase weight for the default case.
1220 Weights[0] += Weights[i + 1];
1221 std::swap(Weights[i + 1], Weights.back());
1222 Weights.pop_back();
1223 }
1224
1225 PredCases.pop_back();
1226 --i;
1227 --e;
1228 }
1229
1230 // Reconstruct the new switch statement we will be building.
1231 if (PredDefault != BBDefault) {
1232 PredDefault->removePredecessor(Pred);
1233 if (DTU && PredDefault != BB)
1234 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1235 PredDefault = BBDefault;
1236 ++NewSuccessors[BBDefault];
1237 }
1238
1239 unsigned CasesFromPred = Weights.size();
1240 uint64_t ValidTotalSuccWeight = 0;
1241 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1242 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1243 PredCases.push_back(BBCases[i]);
1244 ++NewSuccessors[BBCases[i].Dest];
1245 if (SuccHasWeights || PredHasWeights) {
1246 // The default weight is at index 0, so weight for the ith case
1247 // should be at index i+1. Scale the cases from successor by
1248 // PredDefaultWeight (Weights[0]).
1249 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1250 ValidTotalSuccWeight += SuccWeights[i + 1];
1251 }
1252 }
1253
1254 if (SuccHasWeights || PredHasWeights) {
1255 ValidTotalSuccWeight += SuccWeights[0];
1256 // Scale the cases from predecessor by ValidTotalSuccWeight.
1257 for (unsigned i = 1; i < CasesFromPred; ++i)
1258 Weights[i] *= ValidTotalSuccWeight;
1259 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1260 Weights[0] *= SuccWeights[0];
1261 }
1262 } else {
1263 // If this is not the default destination from PSI, only the edges
1264 // in SI that occur in PSI with a destination of BB will be
1265 // activated.
1266 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1267 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1268 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1269 if (PredCases[i].Dest == BB) {
1270 PTIHandled.insert(PredCases[i].Value);
1271
1272 if (PredHasWeights || SuccHasWeights) {
1273 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1274 std::swap(Weights[i + 1], Weights.back());
1275 Weights.pop_back();
1276 }
1277
1278 std::swap(PredCases[i], PredCases.back());
1279 PredCases.pop_back();
1280 --i;
1281 --e;
1282 }
1283
1284 // Okay, now we know which constants were sent to BB from the
1285 // predecessor. Figure out where they will all go now.
1286 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1287 if (PTIHandled.count(BBCases[i].Value)) {
1288 // If this is one we are capable of getting...
1289 if (PredHasWeights || SuccHasWeights)
1290 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1291 PredCases.push_back(BBCases[i]);
1292 ++NewSuccessors[BBCases[i].Dest];
1293 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1294 }
1295
1296 // If there are any constants vectored to BB that TI doesn't handle,
1297 // they must go to the default destination of TI.
1298 for (ConstantInt *I : PTIHandled) {
1299 if (PredHasWeights || SuccHasWeights)
1300 Weights.push_back(WeightsForHandled[I]);
1301 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1302 ++NewSuccessors[BBDefault];
1303 }
1304 }
1305
1306 // Okay, at this point, we know which new successor Pred will get. Make
1307 // sure we update the number of entries in the PHI nodes for these
1308 // successors.
1309 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1310 if (DTU) {
1311 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1312 Updates.reserve(Updates.size() + NewSuccessors.size());
1313 }
1314 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1315 NewSuccessors) {
1316 for (auto I : seq(NewSuccessor.second)) {
1317 (void)I;
1318 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1319 }
1320 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1321 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1322 }
1323
1324 Builder.SetInsertPoint(PTI);
1325 // Convert pointer to int before we switch.
1326 if (CV->getType()->isPointerTy()) {
1327 CV =
1328 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1329 }
1330
1331 // Now that the successors are updated, create the new Switch instruction.
1332 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1333 NewSI->setDebugLoc(PTI->getDebugLoc());
1334 for (ValueEqualityComparisonCase &V : PredCases)
1335 NewSI->addCase(V.Value, V.Dest);
1336
1337 if (PredHasWeights || SuccHasWeights) {
1338 // Halve the weights if any of them cannot fit in an uint32_t
1339 FitWeights(Weights);
1340
1341 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1342
1343 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1344 }
1345
1347
1348 // Okay, last check. If BB is still a successor of PSI, then we must
1349 // have an infinite loop case. If so, add an infinitely looping block
1350 // to handle the case to preserve the behavior of the code.
1351 BasicBlock *InfLoopBlock = nullptr;
1352 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1353 if (NewSI->getSuccessor(i) == BB) {
1354 if (!InfLoopBlock) {
1355 // Insert it at the end of the function, because it's either code,
1356 // or it won't matter if it's hot. :)
1357 InfLoopBlock =
1358 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1359 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1360 if (DTU)
1361 Updates.push_back(
1362 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1363 }
1364 NewSI->setSuccessor(i, InfLoopBlock);
1365 }
1366
1367 if (DTU) {
1368 if (InfLoopBlock)
1369 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1370
1371 Updates.push_back({DominatorTree::Delete, Pred, BB});
1372
1373 DTU->applyUpdates(Updates);
1374 }
1375
1376 ++NumFoldValueComparisonIntoPredecessors;
1377 return true;
1378}
1379
1380/// The specified terminator is a value equality comparison instruction
1381/// (either a switch or a branch on "X == c").
1382/// See if any of the predecessors of the terminator block are value comparisons
1383/// on the same value. If so, and if safe to do so, fold them together.
1384bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1385 IRBuilder<> &Builder) {
1386 BasicBlock *BB = TI->getParent();
1387 Value *CV = isValueEqualityComparison(TI); // CondVal
1388 assert(CV && "Not a comparison?");
1389
1390 bool Changed = false;
1391
1393 while (!Preds.empty()) {
1394 BasicBlock *Pred = Preds.pop_back_val();
1395 Instruction *PTI = Pred->getTerminator();
1396
1397 // Don't try to fold into itself.
1398 if (Pred == BB)
1399 continue;
1400
1401 // See if the predecessor is a comparison with the same value.
1402 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1403 if (PCV != CV)
1404 continue;
1405
1407 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1408 for (auto *Succ : FailBlocks) {
1409 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1410 return false;
1411 }
1412 }
1413
1414 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1415 Changed = true;
1416 }
1417 return Changed;
1418}
1419
1420// If we would need to insert a select that uses the value of this invoke
1421// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1422// need to do this), we can't hoist the invoke, as there is nowhere to put the
1423// select in this case.
1425 Instruction *I1, Instruction *I2) {
1426 for (BasicBlock *Succ : successors(BB1)) {
1427 for (const PHINode &PN : Succ->phis()) {
1428 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1429 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1430 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1431 return false;
1432 }
1433 }
1434 }
1435 return true;
1436}
1437
1438// Get interesting characteristics of instructions that
1439// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1440// instructions can be reordered across.
1446
1448 unsigned Flags = 0;
1449 if (I->mayReadFromMemory())
1450 Flags |= SkipReadMem;
1451 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1452 // inalloca) across stacksave/stackrestore boundaries.
1453 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1454 Flags |= SkipSideEffect;
1456 Flags |= SkipImplicitControlFlow;
1457 return Flags;
1458}
1459
1460// Returns true if it is safe to reorder an instruction across preceding
1461// instructions in a basic block.
1462static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1463 // Don't reorder a store over a load.
1464 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1465 return false;
1466
1467 // If we have seen an instruction with side effects, it's unsafe to reorder an
1468 // instruction which reads memory or itself has side effects.
1469 if ((Flags & SkipSideEffect) &&
1470 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1471 return false;
1472
1473 // Reordering across an instruction which does not necessarily transfer
1474 // control to the next instruction is speculation.
1476 return false;
1477
1478 // Hoisting of llvm.deoptimize is only legal together with the next return
1479 // instruction, which this pass is not always able to do.
1480 if (auto *CB = dyn_cast<CallBase>(I))
1481 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1482 return false;
1483
1484 // It's also unsafe/illegal to hoist an instruction above its instruction
1485 // operands
1486 BasicBlock *BB = I->getParent();
1487 for (Value *Op : I->operands()) {
1488 if (auto *J = dyn_cast<Instruction>(Op))
1489 if (J->getParent() == BB)
1490 return false;
1491 }
1492
1493 return true;
1494}
1495
1496static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1497
1498/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1499/// instructions \p I1 and \p I2 can and should be hoisted.
1501 const TargetTransformInfo &TTI) {
1502 // If we're going to hoist a call, make sure that the two instructions
1503 // we're commoning/hoisting are both marked with musttail, or neither of
1504 // them is marked as such. Otherwise, we might end up in a situation where
1505 // we hoist from a block where the terminator is a `ret` to a block where
1506 // the terminator is a `br`, and `musttail` calls expect to be followed by
1507 // a return.
1508 auto *C1 = dyn_cast<CallInst>(I1);
1509 auto *C2 = dyn_cast<CallInst>(I2);
1510 if (C1 && C2)
1511 if (C1->isMustTailCall() != C2->isMustTailCall())
1512 return false;
1513
1515 return false;
1516
1517 // If any of the two call sites has nomerge or convergent attribute, stop
1518 // hoisting.
1519 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1520 if (CB1->cannotMerge() || CB1->isConvergent())
1521 return false;
1522 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1523 if (CB2->cannotMerge() || CB2->isConvergent())
1524 return false;
1525
1526 return true;
1527}
1528
1529/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1530/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1531/// hoistCommonCodeFromSuccessors. e.g. The input:
1532/// I1 DVRs: { x, z },
1533/// OtherInsts: { I2 DVRs: { x, y, z } }
1534/// would result in hoisting only DbgVariableRecord x.
1536 Instruction *TI, Instruction *I1,
1537 SmallVectorImpl<Instruction *> &OtherInsts) {
1538 if (!I1->hasDbgRecords())
1539 return;
1540 using CurrentAndEndIt =
1541 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1542 // Vector of {Current, End} iterators.
1544 Itrs.reserve(OtherInsts.size() + 1);
1545 // Helper lambdas for lock-step checks:
1546 // Return true if this Current == End.
1547 auto atEnd = [](const CurrentAndEndIt &Pair) {
1548 return Pair.first == Pair.second;
1549 };
1550 // Return true if all Current are identical.
1551 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1552 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1554 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1555 });
1556 };
1557
1558 // Collect the iterators.
1559 Itrs.push_back(
1560 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1561 for (Instruction *Other : OtherInsts) {
1562 if (!Other->hasDbgRecords())
1563 return;
1564 Itrs.push_back(
1565 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1566 }
1567
1568 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1569 // the lock-step DbgRecord are identical, hoist all of them to TI.
1570 // This replicates the dbg.* intrinsic behaviour in
1571 // hoistCommonCodeFromSuccessors.
1572 while (none_of(Itrs, atEnd)) {
1573 bool HoistDVRs = allIdentical(Itrs);
1574 for (CurrentAndEndIt &Pair : Itrs) {
1575 // Increment Current iterator now as we may be about to move the
1576 // DbgRecord.
1577 DbgRecord &DR = *Pair.first++;
1578 if (HoistDVRs) {
1579 DR.removeFromParent();
1580 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1581 }
1582 }
1583 }
1584}
1585
1586/// Hoist any common code in the successor blocks up into the block. This
1587/// function guarantees that BB dominates all successors. If EqTermsOnly is
1588/// given, only perform hoisting in case both blocks only contain a terminator.
1589/// In that case, only the original BI will be replaced and selects for PHIs are
1590/// added.
1591bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1592 bool EqTermsOnly) {
1593 // This does very trivial matching, with limited scanning, to find identical
1594 // instructions in the two blocks. In particular, we don't want to get into
1595 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1596 // such, we currently just scan for obviously identical instructions in an
1597 // identical order, possibly separated by the same number of non-identical
1598 // instructions.
1599 unsigned int SuccSize = succ_size(BB);
1600 if (SuccSize < 2)
1601 return false;
1602
1603 // If either of the blocks has it's address taken, then we can't do this fold,
1604 // because the code we'd hoist would no longer run when we jump into the block
1605 // by it's address.
1606 for (auto *Succ : successors(BB))
1607 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1608 return false;
1609
1610 auto *TI = BB->getTerminator();
1611
1612 // The second of pair is a SkipFlags bitmask.
1613 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1614 SmallVector<SuccIterPair, 8> SuccIterPairs;
1615 for (auto *Succ : successors(BB)) {
1616 BasicBlock::iterator SuccItr = Succ->begin();
1617 if (isa<PHINode>(*SuccItr))
1618 return false;
1619 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1620 }
1621
1622 // Check if only hoisting terminators is allowed. This does not add new
1623 // instructions to the hoist location.
1624 if (EqTermsOnly) {
1625 // Skip any debug intrinsics, as they are free to hoist.
1626 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1627 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1628 if (!INonDbg->isTerminator())
1629 return false;
1630 }
1631 // Now we know that we only need to hoist debug intrinsics and the
1632 // terminator. Let the loop below handle those 2 cases.
1633 }
1634
1635 // Count how many instructions were not hoisted so far. There's a limit on how
1636 // many instructions we skip, serving as a compilation time control as well as
1637 // preventing excessive increase of life ranges.
1638 unsigned NumSkipped = 0;
1639 // If we find an unreachable instruction at the beginning of a basic block, we
1640 // can still hoist instructions from the rest of the basic blocks.
1641 if (SuccIterPairs.size() > 2) {
1642 erase_if(SuccIterPairs,
1643 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1644 if (SuccIterPairs.size() < 2)
1645 return false;
1646 }
1647
1648 bool Changed = false;
1649
1650 for (;;) {
1651 auto *SuccIterPairBegin = SuccIterPairs.begin();
1652 auto &BB1ItrPair = *SuccIterPairBegin++;
1653 auto OtherSuccIterPairRange =
1654 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1655 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1656
1657 Instruction *I1 = &*BB1ItrPair.first;
1658
1659 // Skip debug info if it is not identical.
1660 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1661 Instruction *I2 = &*Iter;
1662 return I1->isIdenticalToWhenDefined(I2);
1663 });
1664 if (!AllDbgInstsAreIdentical) {
1665 while (isa<DbgInfoIntrinsic>(I1))
1666 I1 = &*++BB1ItrPair.first;
1667 for (auto &SuccIter : OtherSuccIterRange) {
1668 Instruction *I2 = &*SuccIter;
1669 while (isa<DbgInfoIntrinsic>(I2))
1670 I2 = &*++SuccIter;
1671 }
1672 }
1673
1674 bool AllInstsAreIdentical = true;
1675 bool HasTerminator = I1->isTerminator();
1676 for (auto &SuccIter : OtherSuccIterRange) {
1677 Instruction *I2 = &*SuccIter;
1678 HasTerminator |= I2->isTerminator();
1679 if (AllInstsAreIdentical && (!I1->isIdenticalToWhenDefined(I2) ||
1680 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1681 AllInstsAreIdentical = false;
1682 }
1683
1685 for (auto &SuccIter : OtherSuccIterRange)
1686 OtherInsts.push_back(&*SuccIter);
1687
1688 // If we are hoisting the terminator instruction, don't move one (making a
1689 // broken BB), instead clone it, and remove BI.
1690 if (HasTerminator) {
1691 // Even if BB, which contains only one unreachable instruction, is ignored
1692 // at the beginning of the loop, we can hoist the terminator instruction.
1693 // If any instructions remain in the block, we cannot hoist terminators.
1694 if (NumSkipped || !AllInstsAreIdentical) {
1695 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1696 return Changed;
1697 }
1698
1699 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1700 Changed;
1701 }
1702
1703 if (AllInstsAreIdentical) {
1704 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1705 AllInstsAreIdentical =
1706 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1707 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1708 Instruction *I2 = &*Pair.first;
1709 unsigned SkipFlagsBB2 = Pair.second;
1710 // Even if the instructions are identical, it may not
1711 // be safe to hoist them if we have skipped over
1712 // instructions with side effects or their operands
1713 // weren't hoisted.
1714 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1716 });
1717 }
1718
1719 if (AllInstsAreIdentical) {
1720 BB1ItrPair.first++;
1721 if (isa<DbgInfoIntrinsic>(I1)) {
1722 // The debug location is an integral part of a debug info intrinsic
1723 // and can't be separated from it or replaced. Instead of attempting
1724 // to merge locations, simply hoist both copies of the intrinsic.
1725 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1726 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1727 // and leave any that were not hoisted behind (by calling moveBefore
1728 // rather than moveBeforePreserving).
1729 I1->moveBefore(TI);
1730 for (auto &SuccIter : OtherSuccIterRange) {
1731 auto *I2 = &*SuccIter++;
1732 assert(isa<DbgInfoIntrinsic>(I2));
1733 I2->moveBefore(TI);
1734 }
1735 } else {
1736 // For a normal instruction, we just move one to right before the
1737 // branch, then replace all uses of the other with the first. Finally,
1738 // we remove the now redundant second instruction.
1739 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1740 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1741 // and leave any that were not hoisted behind (by calling moveBefore
1742 // rather than moveBeforePreserving).
1743 I1->moveBefore(TI);
1744 for (auto &SuccIter : OtherSuccIterRange) {
1745 Instruction *I2 = &*SuccIter++;
1746 assert(I2 != I1);
1747 if (!I2->use_empty())
1748 I2->replaceAllUsesWith(I1);
1749 I1->andIRFlags(I2);
1750 combineMetadataForCSE(I1, I2, true);
1751 // I1 and I2 are being combined into a single instruction. Its debug
1752 // location is the merged locations of the original instructions.
1753 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1754 I2->eraseFromParent();
1755 }
1756 }
1757 if (!Changed)
1758 NumHoistCommonCode += SuccIterPairs.size();
1759 Changed = true;
1760 NumHoistCommonInstrs += SuccIterPairs.size();
1761 } else {
1762 if (NumSkipped >= HoistCommonSkipLimit) {
1763 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1764 return Changed;
1765 }
1766 // We are about to skip over a pair of non-identical instructions. Record
1767 // if any have characteristics that would prevent reordering instructions
1768 // across them.
1769 for (auto &SuccIterPair : SuccIterPairs) {
1770 Instruction *I = &*SuccIterPair.first++;
1771 SuccIterPair.second |= skippedInstrFlags(I);
1772 }
1773 ++NumSkipped;
1774 }
1775 }
1776}
1777
1778bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1779 Instruction *TI, Instruction *I1,
1780 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1781
1782 auto *BI = dyn_cast<BranchInst>(TI);
1783
1784 bool Changed = false;
1785 BasicBlock *TIParent = TI->getParent();
1786 BasicBlock *BB1 = I1->getParent();
1787
1788 // Use only for an if statement.
1789 auto *I2 = *OtherSuccTIs.begin();
1790 auto *BB2 = I2->getParent();
1791 if (BI) {
1792 assert(OtherSuccTIs.size() == 1);
1793 assert(BI->getSuccessor(0) == I1->getParent());
1794 assert(BI->getSuccessor(1) == I2->getParent());
1795 }
1796
1797 // In the case of an if statement, we try to hoist an invoke.
1798 // FIXME: Can we define a safety predicate for CallBr?
1799 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1800 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1801 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1802 return false;
1803
1804 // TODO: callbr hoisting currently disabled pending further study.
1805 if (isa<CallBrInst>(I1))
1806 return false;
1807
1808 for (BasicBlock *Succ : successors(BB1)) {
1809 for (PHINode &PN : Succ->phis()) {
1810 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1811 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1812 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1813 if (BB1V == BB2V)
1814 continue;
1815
1816 // In the case of an if statement, check for
1817 // passingValueIsAlwaysUndefined here because we would rather eliminate
1818 // undefined control flow then converting it to a select.
1819 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1821 return false;
1822 }
1823 }
1824 }
1825
1826 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
1827 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1828 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
1829 // Clone the terminator and hoist it into the pred, without any debug info.
1830 Instruction *NT = I1->clone();
1831 NT->insertInto(TIParent, TI->getIterator());
1832 if (!NT->getType()->isVoidTy()) {
1833 I1->replaceAllUsesWith(NT);
1834 for (Instruction *OtherSuccTI : OtherSuccTIs)
1835 OtherSuccTI->replaceAllUsesWith(NT);
1836 NT->takeName(I1);
1837 }
1838 Changed = true;
1839 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1840
1841 // Ensure terminator gets a debug location, even an unknown one, in case
1842 // it involves inlinable calls.
1844 Locs.push_back(I1->getDebugLoc());
1845 for (auto *OtherSuccTI : OtherSuccTIs)
1846 Locs.push_back(OtherSuccTI->getDebugLoc());
1847 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1848
1849 // PHIs created below will adopt NT's merged DebugLoc.
1850 IRBuilder<NoFolder> Builder(NT);
1851
1852 // In the case of an if statement, hoisting one of the terminators from our
1853 // successor is a great thing. Unfortunately, the successors of the if/else
1854 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1855 // must agree for all PHI nodes, so we insert select instruction to compute
1856 // the final result.
1857 if (BI) {
1858 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1859 for (BasicBlock *Succ : successors(BB1)) {
1860 for (PHINode &PN : Succ->phis()) {
1861 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1862 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1863 if (BB1V == BB2V)
1864 continue;
1865
1866 // These values do not agree. Insert a select instruction before NT
1867 // that determines the right value.
1868 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1869 if (!SI) {
1870 // Propagate fast-math-flags from phi node to its replacement select.
1871 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1872 if (isa<FPMathOperator>(PN))
1873 Builder.setFastMathFlags(PN.getFastMathFlags());
1874
1875 SI = cast<SelectInst>(Builder.CreateSelect(
1876 BI->getCondition(), BB1V, BB2V,
1877 BB1V->getName() + "." + BB2V->getName(), BI));
1878 }
1879
1880 // Make the PHI node use the select for all incoming values for BB1/BB2
1881 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1882 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1883 PN.setIncomingValue(i, SI);
1884 }
1885 }
1886 }
1887
1889
1890 // Update any PHI nodes in our new successors.
1891 for (BasicBlock *Succ : successors(BB1)) {
1892 AddPredecessorToBlock(Succ, TIParent, BB1);
1893 if (DTU)
1894 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1895 }
1896
1897 if (DTU)
1898 for (BasicBlock *Succ : successors(TI))
1899 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1900
1902 if (DTU)
1903 DTU->applyUpdates(Updates);
1904 return Changed;
1905}
1906
1907// Check lifetime markers.
1908static bool isLifeTimeMarker(const Instruction *I) {
1909 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1910 switch (II->getIntrinsicID()) {
1911 default:
1912 break;
1913 case Intrinsic::lifetime_start:
1914 case Intrinsic::lifetime_end:
1915 return true;
1916 }
1917 }
1918 return false;
1919}
1920
1921// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1922// into variables.
1924 int OpIdx) {
1925 return !isa<IntrinsicInst>(I);
1926}
1927
1928// All instructions in Insts belong to different blocks that all unconditionally
1929// branch to a common successor. Analyze each instruction and return true if it
1930// would be possible to sink them into their successor, creating one common
1931// instruction instead. For every value that would be required to be provided by
1932// PHI node (because an operand varies in each input block), add to PHIOperands.
1935 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
1936 // Prune out obviously bad instructions to move. Each instruction must have
1937 // the same number of uses, and we check later that the uses are consistent.
1938 std::optional<unsigned> NumUses;
1939 for (auto *I : Insts) {
1940 // These instructions may change or break semantics if moved.
1941 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1942 I->getType()->isTokenTy())
1943 return false;
1944
1945 // Do not try to sink an instruction in an infinite loop - it can cause
1946 // this algorithm to infinite loop.
1947 if (I->getParent()->getSingleSuccessor() == I->getParent())
1948 return false;
1949
1950 // Conservatively return false if I is an inline-asm instruction. Sinking
1951 // and merging inline-asm instructions can potentially create arguments
1952 // that cannot satisfy the inline-asm constraints.
1953 // If the instruction has nomerge or convergent attribute, return false.
1954 if (const auto *C = dyn_cast<CallBase>(I))
1955 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1956 return false;
1957
1958 if (!NumUses)
1959 NumUses = I->getNumUses();
1960 else if (NumUses != I->getNumUses())
1961 return false;
1962 }
1963
1964 const Instruction *I0 = Insts.front();
1965 const auto I0MMRA = MMRAMetadata(*I0);
1966 for (auto *I : Insts) {
1967 if (!I->isSameOperationAs(I0))
1968 return false;
1969
1970 // swifterror pointers can only be used by a load or store; sinking a load
1971 // or store would require introducing a select for the pointer operand,
1972 // which isn't allowed for swifterror pointers.
1973 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1974 return false;
1975 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1976 return false;
1977
1978 // Treat MMRAs conservatively. This pass can be quite aggressive and
1979 // could drop a lot of MMRAs otherwise.
1980 if (MMRAMetadata(*I) != I0MMRA)
1981 return false;
1982 }
1983
1984 // Uses must be consistent: If I0 is used in a phi node in the sink target,
1985 // then the other phi operands must match the instructions from Insts. This
1986 // also has to hold true for any phi nodes that would be created as a result
1987 // of sinking. Both of these cases are represented by PhiOperands.
1988 for (const Use &U : I0->uses()) {
1989 auto It = PHIOperands.find(&U);
1990 if (It == PHIOperands.end())
1991 // There may be uses in other blocks when sinking into a loop header.
1992 return false;
1993 if (!equal(Insts, It->second))
1994 return false;
1995 }
1996
1997 // Because SROA can't handle speculating stores of selects, try not to sink
1998 // loads, stores or lifetime markers of allocas when we'd have to create a
1999 // PHI for the address operand. Also, because it is likely that loads or
2000 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2001 // them.
2002 // This can cause code churn which can have unintended consequences down
2003 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2004 // FIXME: This is a workaround for a deficiency in SROA - see
2005 // https://llvm.org/bugs/show_bug.cgi?id=30188
2006 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2007 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2008 }))
2009 return false;
2010 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2011 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2012 }))
2013 return false;
2014 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2015 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2016 }))
2017 return false;
2018
2019 // For calls to be sinkable, they must all be indirect, or have same callee.
2020 // I.e. if we have two direct calls to different callees, we don't want to
2021 // turn that into an indirect call. Likewise, if we have an indirect call,
2022 // and a direct call, we don't actually want to have a single indirect call.
2023 if (isa<CallBase>(I0)) {
2024 auto IsIndirectCall = [](const Instruction *I) {
2025 return cast<CallBase>(I)->isIndirectCall();
2026 };
2027 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2028 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2029 if (HaveIndirectCalls) {
2030 if (!AllCallsAreIndirect)
2031 return false;
2032 } else {
2033 // All callees must be identical.
2034 Value *Callee = nullptr;
2035 for (const Instruction *I : Insts) {
2036 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2037 if (!Callee)
2038 Callee = CurrCallee;
2039 else if (Callee != CurrCallee)
2040 return false;
2041 }
2042 }
2043 }
2044
2045 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2046 Value *Op = I0->getOperand(OI);
2047 if (Op->getType()->isTokenTy())
2048 // Don't touch any operand of token type.
2049 return false;
2050
2051 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2052 assert(I->getNumOperands() == I0->getNumOperands());
2053 return I->getOperand(OI) == I0->getOperand(OI);
2054 };
2055 if (!all_of(Insts, SameAsI0)) {
2056 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2058 // We can't create a PHI from this GEP.
2059 return false;
2060 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2061 for (auto *I : Insts)
2062 Ops.push_back(I->getOperand(OI));
2063 }
2064 }
2065 return true;
2066}
2067
2068// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2069// instruction of every block in Blocks to their common successor, commoning
2070// into one instruction.
2072 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2073
2074 // canSinkInstructions returning true guarantees that every block has at
2075 // least one non-terminator instruction.
2077 for (auto *BB : Blocks) {
2078 Instruction *I = BB->getTerminator();
2079 do {
2080 I = I->getPrevNode();
2081 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2082 if (!isa<DbgInfoIntrinsic>(I))
2083 Insts.push_back(I);
2084 }
2085
2086 // We don't need to do any more checking here; canSinkInstructions should
2087 // have done it all for us.
2088 SmallVector<Value*, 4> NewOperands;
2089 Instruction *I0 = Insts.front();
2090 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2091 // This check is different to that in canSinkInstructions. There, we
2092 // cared about the global view once simplifycfg (and instcombine) have
2093 // completed - it takes into account PHIs that become trivially
2094 // simplifiable. However here we need a more local view; if an operand
2095 // differs we create a PHI and rely on instcombine to clean up the very
2096 // small mess we may make.
2097 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2098 return I->getOperand(O) != I0->getOperand(O);
2099 });
2100 if (!NeedPHI) {
2101 NewOperands.push_back(I0->getOperand(O));
2102 continue;
2103 }
2104
2105 // Create a new PHI in the successor block and populate it.
2106 auto *Op = I0->getOperand(O);
2107 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2108 auto *PN =
2109 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2110 PN->insertBefore(BBEnd->begin());
2111 for (auto *I : Insts)
2112 PN->addIncoming(I->getOperand(O), I->getParent());
2113 NewOperands.push_back(PN);
2114 }
2115
2116 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2117 // and move it to the start of the successor block.
2118 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2119 I0->getOperandUse(O).set(NewOperands[O]);
2120
2121 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2122
2123 // Update metadata and IR flags, and merge debug locations.
2124 for (auto *I : Insts)
2125 if (I != I0) {
2126 // The debug location for the "common" instruction is the merged locations
2127 // of all the commoned instructions. We start with the original location
2128 // of the "common" instruction and iteratively merge each location in the
2129 // loop below.
2130 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2131 // However, as N-way merge for CallInst is rare, so we use simplified API
2132 // instead of using complex API for N-way merge.
2133 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2134 combineMetadataForCSE(I0, I, true);
2135 I0->andIRFlags(I);
2136 }
2137
2138 for (User *U : make_early_inc_range(I0->users())) {
2139 // canSinkLastInstruction checked that all instructions are only used by
2140 // phi nodes in a way that allows replacing the phi node with the common
2141 // instruction.
2142 auto *PN = cast<PHINode>(U);
2143 PN->replaceAllUsesWith(I0);
2144 PN->eraseFromParent();
2145 }
2146
2147 // Finally nuke all instructions apart from the common instruction.
2148 for (auto *I : Insts) {
2149 if (I == I0)
2150 continue;
2151 // The remaining uses are debug users, replace those with the common inst.
2152 // In most (all?) cases this just introduces a use-before-def.
2153 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2154 I->replaceAllUsesWith(I0);
2155 I->eraseFromParent();
2156 }
2157}
2158
2159namespace {
2160
2161 // LockstepReverseIterator - Iterates through instructions
2162 // in a set of blocks in reverse order from the first non-terminator.
2163 // For example (assume all blocks have size n):
2164 // LockstepReverseIterator I([B1, B2, B3]);
2165 // *I-- = [B1[n], B2[n], B3[n]];
2166 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2167 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2168 // ...
2169 class LockstepReverseIterator {
2172 bool Fail;
2173
2174 public:
2175 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2176 reset();
2177 }
2178
2179 void reset() {
2180 Fail = false;
2181 Insts.clear();
2182 for (auto *BB : Blocks) {
2183 Instruction *Inst = BB->getTerminator();
2184 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2185 Inst = Inst->getPrevNode();
2186 if (!Inst) {
2187 // Block wasn't big enough.
2188 Fail = true;
2189 return;
2190 }
2191 Insts.push_back(Inst);
2192 }
2193 }
2194
2195 bool isValid() const {
2196 return !Fail;
2197 }
2198
2199 void operator--() {
2200 if (Fail)
2201 return;
2202 for (auto *&Inst : Insts) {
2203 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2204 Inst = Inst->getPrevNode();
2205 // Already at beginning of block.
2206 if (!Inst) {
2207 Fail = true;
2208 return;
2209 }
2210 }
2211 }
2212
2213 void operator++() {
2214 if (Fail)
2215 return;
2216 for (auto *&Inst : Insts) {
2217 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2218 Inst = Inst->getNextNode();
2219 // Already at end of block.
2220 if (!Inst) {
2221 Fail = true;
2222 return;
2223 }
2224 }
2225 }
2226
2228 return Insts;
2229 }
2230 };
2231
2232} // end anonymous namespace
2233
2234/// Check whether BB's predecessors end with unconditional branches. If it is
2235/// true, sink any common code from the predecessors to BB.
2237 DomTreeUpdater *DTU) {
2238 // We support two situations:
2239 // (1) all incoming arcs are unconditional
2240 // (2) there are non-unconditional incoming arcs
2241 //
2242 // (2) is very common in switch defaults and
2243 // else-if patterns;
2244 //
2245 // if (a) f(1);
2246 // else if (b) f(2);
2247 //
2248 // produces:
2249 //
2250 // [if]
2251 // / \
2252 // [f(1)] [if]
2253 // | | \
2254 // | | |
2255 // | [f(2)]|
2256 // \ | /
2257 // [ end ]
2258 //
2259 // [end] has two unconditional predecessor arcs and one conditional. The
2260 // conditional refers to the implicit empty 'else' arc. This conditional
2261 // arc can also be caused by an empty default block in a switch.
2262 //
2263 // In this case, we attempt to sink code from all *unconditional* arcs.
2264 // If we can sink instructions from these arcs (determined during the scan
2265 // phase below) we insert a common successor for all unconditional arcs and
2266 // connect that to [end], to enable sinking:
2267 //
2268 // [if]
2269 // / \
2270 // [x(1)] [if]
2271 // | | \
2272 // | | \
2273 // | [x(2)] |
2274 // \ / |
2275 // [sink.split] |
2276 // \ /
2277 // [ end ]
2278 //
2279 SmallVector<BasicBlock*,4> UnconditionalPreds;
2280 bool HaveNonUnconditionalPredecessors = false;
2281 for (auto *PredBB : predecessors(BB)) {
2282 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2283 if (PredBr && PredBr->isUnconditional())
2284 UnconditionalPreds.push_back(PredBB);
2285 else
2286 HaveNonUnconditionalPredecessors = true;
2287 }
2288 if (UnconditionalPreds.size() < 2)
2289 return false;
2290
2291 // We take a two-step approach to tail sinking. First we scan from the end of
2292 // each block upwards in lockstep. If the n'th instruction from the end of each
2293 // block can be sunk, those instructions are added to ValuesToSink and we
2294 // carry on. If we can sink an instruction but need to PHI-merge some operands
2295 // (because they're not identical in each instruction) we add these to
2296 // PHIOperands.
2297 // We prepopulate PHIOperands with the phis that already exist in BB.
2299 for (PHINode &PN : BB->phis()) {
2301 for (const Use &U : PN.incoming_values())
2302 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2303 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2304 for (BasicBlock *Pred : UnconditionalPreds)
2305 Ops.push_back(*IncomingVals[Pred]);
2306 }
2307
2308 int ScanIdx = 0;
2309 SmallPtrSet<Value*,4> InstructionsToSink;
2310 LockstepReverseIterator LRI(UnconditionalPreds);
2311 while (LRI.isValid() &&
2312 canSinkInstructions(*LRI, PHIOperands)) {
2313 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2314 << "\n");
2315 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2316 ++ScanIdx;
2317 --LRI;
2318 }
2319
2320 // If no instructions can be sunk, early-return.
2321 if (ScanIdx == 0)
2322 return false;
2323
2324 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2325
2326 if (!followedByDeoptOrUnreachable) {
2327 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2328 // actually sink before encountering instruction that is unprofitable to
2329 // sink?
2330 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2331 unsigned NumPHIInsts = 0;
2332 for (Use &U : (*LRI)[0]->operands()) {
2333 auto It = PHIOperands.find(&U);
2334 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2335 return InstructionsToSink.contains(V);
2336 })) {
2337 ++NumPHIInsts;
2338 // FIXME: this check is overly optimistic. We may end up not sinking
2339 // said instruction, due to the very same profitability check.
2340 // See @creating_too_many_phis in sink-common-code.ll.
2341 }
2342 }
2343 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2344 return NumPHIInsts <= 1;
2345 };
2346
2347 // We've determined that we are going to sink last ScanIdx instructions,
2348 // and recorded them in InstructionsToSink. Now, some instructions may be
2349 // unprofitable to sink. But that determination depends on the instructions
2350 // that we are going to sink.
2351
2352 // First, forward scan: find the first instruction unprofitable to sink,
2353 // recording all the ones that are profitable to sink.
2354 // FIXME: would it be better, after we detect that not all are profitable.
2355 // to either record the profitable ones, or erase the unprofitable ones?
2356 // Maybe we need to choose (at runtime) the one that will touch least
2357 // instrs?
2358 LRI.reset();
2359 int Idx = 0;
2360 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2361 while (Idx < ScanIdx) {
2362 if (!ProfitableToSinkInstruction(LRI)) {
2363 // Too many PHIs would be created.
2364 LLVM_DEBUG(
2365 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2366 break;
2367 }
2368 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2369 --LRI;
2370 ++Idx;
2371 }
2372
2373 // If no instructions can be sunk, early-return.
2374 if (Idx == 0)
2375 return false;
2376
2377 // Did we determine that (only) some instructions are unprofitable to sink?
2378 if (Idx < ScanIdx) {
2379 // Okay, some instructions are unprofitable.
2380 ScanIdx = Idx;
2381 InstructionsToSink = InstructionsProfitableToSink;
2382
2383 // But, that may make other instructions unprofitable, too.
2384 // So, do a backward scan, do any earlier instructions become
2385 // unprofitable?
2386 assert(
2387 !ProfitableToSinkInstruction(LRI) &&
2388 "We already know that the last instruction is unprofitable to sink");
2389 ++LRI;
2390 --Idx;
2391 while (Idx >= 0) {
2392 // If we detect that an instruction becomes unprofitable to sink,
2393 // all earlier instructions won't be sunk either,
2394 // so preemptively keep InstructionsProfitableToSink in sync.
2395 // FIXME: is this the most performant approach?
2396 for (auto *I : *LRI)
2397 InstructionsProfitableToSink.erase(I);
2398 if (!ProfitableToSinkInstruction(LRI)) {
2399 // Everything starting with this instruction won't be sunk.
2400 ScanIdx = Idx;
2401 InstructionsToSink = InstructionsProfitableToSink;
2402 }
2403 ++LRI;
2404 --Idx;
2405 }
2406 }
2407
2408 // If no instructions can be sunk, early-return.
2409 if (ScanIdx == 0)
2410 return false;
2411 }
2412
2413 bool Changed = false;
2414
2415 if (HaveNonUnconditionalPredecessors) {
2416 if (!followedByDeoptOrUnreachable) {
2417 // It is always legal to sink common instructions from unconditional
2418 // predecessors. However, if not all predecessors are unconditional,
2419 // this transformation might be pessimizing. So as a rule of thumb,
2420 // don't do it unless we'd sink at least one non-speculatable instruction.
2421 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2422 LRI.reset();
2423 int Idx = 0;
2424 bool Profitable = false;
2425 while (Idx < ScanIdx) {
2426 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2427 Profitable = true;
2428 break;
2429 }
2430 --LRI;
2431 ++Idx;
2432 }
2433 if (!Profitable)
2434 return false;
2435 }
2436
2437 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2438 // We have a conditional edge and we're going to sink some instructions.
2439 // Insert a new block postdominating all blocks we're going to sink from.
2440 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2441 // Edges couldn't be split.
2442 return false;
2443 Changed = true;
2444 }
2445
2446 // Now that we've analyzed all potential sinking candidates, perform the
2447 // actual sink. We iteratively sink the last non-terminator of the source
2448 // blocks into their common successor unless doing so would require too
2449 // many PHI instructions to be generated (currently only one PHI is allowed
2450 // per sunk instruction).
2451 //
2452 // We can use InstructionsToSink to discount values needing PHI-merging that will
2453 // actually be sunk in a later iteration. This allows us to be more
2454 // aggressive in what we sink. This does allow a false positive where we
2455 // sink presuming a later value will also be sunk, but stop half way through
2456 // and never actually sink it which means we produce more PHIs than intended.
2457 // This is unlikely in practice though.
2458 int SinkIdx = 0;
2459 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2460 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2461 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2462 << "\n");
2463
2464 // Because we've sunk every instruction in turn, the current instruction to
2465 // sink is always at index 0.
2466 LRI.reset();
2467
2468 sinkLastInstruction(UnconditionalPreds);
2469 NumSinkCommonInstrs++;
2470 Changed = true;
2471 }
2472 if (SinkIdx != 0)
2473 ++NumSinkCommonCode;
2474 return Changed;
2475}
2476
2477namespace {
2478
2479struct CompatibleSets {
2480 using SetTy = SmallVector<InvokeInst *, 2>;
2481
2483
2484 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2485
2486 SetTy &getCompatibleSet(InvokeInst *II);
2487
2488 void insert(InvokeInst *II);
2489};
2490
2491CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2492 // Perform a linear scan over all the existing sets, see if the new `invoke`
2493 // is compatible with any particular set. Since we know that all the `invokes`
2494 // within a set are compatible, only check the first `invoke` in each set.
2495 // WARNING: at worst, this has quadratic complexity.
2496 for (CompatibleSets::SetTy &Set : Sets) {
2497 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2498 return Set;
2499 }
2500
2501 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2502 return Sets.emplace_back();
2503}
2504
2505void CompatibleSets::insert(InvokeInst *II) {
2506 getCompatibleSet(II).emplace_back(II);
2507}
2508
2509bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2510 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2511
2512 // Can we theoretically merge these `invoke`s?
2513 auto IsIllegalToMerge = [](InvokeInst *II) {
2514 return II->cannotMerge() || II->isInlineAsm();
2515 };
2516 if (any_of(Invokes, IsIllegalToMerge))
2517 return false;
2518
2519 // Either both `invoke`s must be direct,
2520 // or both `invoke`s must be indirect.
2521 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2522 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2523 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2524 if (HaveIndirectCalls) {
2525 if (!AllCallsAreIndirect)
2526 return false;
2527 } else {
2528 // All callees must be identical.
2529 Value *Callee = nullptr;
2530 for (InvokeInst *II : Invokes) {
2531 Value *CurrCallee = II->getCalledOperand();
2532 assert(CurrCallee && "There is always a called operand.");
2533 if (!Callee)
2534 Callee = CurrCallee;
2535 else if (Callee != CurrCallee)
2536 return false;
2537 }
2538 }
2539
2540 // Either both `invoke`s must not have a normal destination,
2541 // or both `invoke`s must have a normal destination,
2542 auto HasNormalDest = [](InvokeInst *II) {
2543 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2544 };
2545 if (any_of(Invokes, HasNormalDest)) {
2546 // Do not merge `invoke` that does not have a normal destination with one
2547 // that does have a normal destination, even though doing so would be legal.
2548 if (!all_of(Invokes, HasNormalDest))
2549 return false;
2550
2551 // All normal destinations must be identical.
2552 BasicBlock *NormalBB = nullptr;
2553 for (InvokeInst *II : Invokes) {
2554 BasicBlock *CurrNormalBB = II->getNormalDest();
2555 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2556 if (!NormalBB)
2557 NormalBB = CurrNormalBB;
2558 else if (NormalBB != CurrNormalBB)
2559 return false;
2560 }
2561
2562 // In the normal destination, the incoming values for these two `invoke`s
2563 // must be compatible.
2564 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2566 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2567 &EquivalenceSet))
2568 return false;
2569 }
2570
2571#ifndef NDEBUG
2572 // All unwind destinations must be identical.
2573 // We know that because we have started from said unwind destination.
2574 BasicBlock *UnwindBB = nullptr;
2575 for (InvokeInst *II : Invokes) {
2576 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2577 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2578 if (!UnwindBB)
2579 UnwindBB = CurrUnwindBB;
2580 else
2581 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2582 }
2583#endif
2584
2585 // In the unwind destination, the incoming values for these two `invoke`s
2586 // must be compatible.
2588 Invokes.front()->getUnwindDest(),
2589 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2590 return false;
2591
2592 // Ignoring arguments, these `invoke`s must be identical,
2593 // including operand bundles.
2594 const InvokeInst *II0 = Invokes.front();
2595 for (auto *II : Invokes.drop_front())
2596 if (!II->isSameOperationAs(II0))
2597 return false;
2598
2599 // Can we theoretically form the data operands for the merged `invoke`?
2600 auto IsIllegalToMergeArguments = [](auto Ops) {
2601 Use &U0 = std::get<0>(Ops);
2602 Use &U1 = std::get<1>(Ops);
2603 if (U0 == U1)
2604 return false;
2605 return U0->getType()->isTokenTy() ||
2606 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2607 U0.getOperandNo());
2608 };
2609 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2610 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2611 IsIllegalToMergeArguments))
2612 return false;
2613
2614 return true;
2615}
2616
2617} // namespace
2618
2619// Merge all invokes in the provided set, all of which are compatible
2620// as per the `CompatibleSets::shouldBelongToSameSet()`.
2622 DomTreeUpdater *DTU) {
2623 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2624
2626 if (DTU)
2627 Updates.reserve(2 + 3 * Invokes.size());
2628
2629 bool HasNormalDest =
2630 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2631
2632 // Clone one of the invokes into a new basic block.
2633 // Since they are all compatible, it doesn't matter which invoke is cloned.
2634 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2635 InvokeInst *II0 = Invokes.front();
2636 BasicBlock *II0BB = II0->getParent();
2637 BasicBlock *InsertBeforeBlock =
2638 II0->getParent()->getIterator()->getNextNode();
2639 Function *Func = II0BB->getParent();
2640 LLVMContext &Ctx = II0->getContext();
2641
2642 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2643 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2644
2645 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2646 // NOTE: all invokes have the same attributes, so no handling needed.
2647 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2648
2649 if (!HasNormalDest) {
2650 // This set does not have a normal destination,
2651 // so just form a new block with unreachable terminator.
2652 BasicBlock *MergedNormalDest = BasicBlock::Create(
2653 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2654 new UnreachableInst(Ctx, MergedNormalDest);
2655 MergedInvoke->setNormalDest(MergedNormalDest);
2656 }
2657
2658 // The unwind destination, however, remainds identical for all invokes here.
2659
2660 return MergedInvoke;
2661 }();
2662
2663 if (DTU) {
2664 // Predecessor blocks that contained these invokes will now branch to
2665 // the new block that contains the merged invoke, ...
2666 for (InvokeInst *II : Invokes)
2667 Updates.push_back(
2668 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2669
2670 // ... which has the new `unreachable` block as normal destination,
2671 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2672 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2673 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2674 SuccBBOfMergedInvoke});
2675
2676 // Since predecessor blocks now unconditionally branch to a new block,
2677 // they no longer branch to their original successors.
2678 for (InvokeInst *II : Invokes)
2679 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2680 Updates.push_back(
2681 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2682 }
2683
2684 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2685
2686 // Form the merged operands for the merged invoke.
2687 for (Use &U : MergedInvoke->operands()) {
2688 // Only PHI together the indirect callees and data operands.
2689 if (MergedInvoke->isCallee(&U)) {
2690 if (!IsIndirectCall)
2691 continue;
2692 } else if (!MergedInvoke->isDataOperand(&U))
2693 continue;
2694
2695 // Don't create trivial PHI's with all-identical incoming values.
2696 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2697 return II->getOperand(U.getOperandNo()) != U.get();
2698 });
2699 if (!NeedPHI)
2700 continue;
2701
2702 // Form a PHI out of all the data ops under this index.
2704 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2705 for (InvokeInst *II : Invokes)
2706 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2707
2708 U.set(PN);
2709 }
2710
2711 // We've ensured that each PHI node has compatible (identical) incoming values
2712 // when coming from each of the `invoke`s in the current merge set,
2713 // so update the PHI nodes accordingly.
2714 for (BasicBlock *Succ : successors(MergedInvoke))
2715 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2716 /*ExistPred=*/Invokes.front()->getParent());
2717
2718 // And finally, replace the original `invoke`s with an unconditional branch
2719 // to the block with the merged `invoke`. Also, give that merged `invoke`
2720 // the merged debugloc of all the original `invoke`s.
2721 DILocation *MergedDebugLoc = nullptr;
2722 for (InvokeInst *II : Invokes) {
2723 // Compute the debug location common to all the original `invoke`s.
2724 if (!MergedDebugLoc)
2725 MergedDebugLoc = II->getDebugLoc();
2726 else
2727 MergedDebugLoc =
2728 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2729
2730 // And replace the old `invoke` with an unconditionally branch
2731 // to the block with the merged `invoke`.
2732 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2733 OrigSuccBB->removePredecessor(II->getParent());
2734 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2735 II->replaceAllUsesWith(MergedInvoke);
2736 II->eraseFromParent();
2737 ++NumInvokesMerged;
2738 }
2739 MergedInvoke->setDebugLoc(MergedDebugLoc);
2740 ++NumInvokeSetsFormed;
2741
2742 if (DTU)
2743 DTU->applyUpdates(Updates);
2744}
2745
2746/// If this block is a `landingpad` exception handling block, categorize all
2747/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2748/// being "mergeable" together, and then merge invokes in each set together.
2749///
2750/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2751/// [...] [...]
2752/// | |
2753/// [invoke0] [invoke1]
2754/// / \ / \
2755/// [cont0] [landingpad] [cont1]
2756/// to:
2757/// [...] [...]
2758/// \ /
2759/// [invoke]
2760/// / \
2761/// [cont] [landingpad]
2762///
2763/// But of course we can only do that if the invokes share the `landingpad`,
2764/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2765/// and the invoked functions are "compatible".
2768 return false;
2769
2770 bool Changed = false;
2771
2772 // FIXME: generalize to all exception handling blocks?
2773 if (!BB->isLandingPad())
2774 return Changed;
2775
2776 CompatibleSets Grouper;
2777
2778 // Record all the predecessors of this `landingpad`. As per verifier,
2779 // the only allowed predecessor is the unwind edge of an `invoke`.
2780 // We want to group "compatible" `invokes` into the same set to be merged.
2781 for (BasicBlock *PredBB : predecessors(BB))
2782 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2783
2784 // And now, merge `invoke`s that were grouped togeter.
2785 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2786 if (Invokes.size() < 2)
2787 continue;
2788 Changed = true;
2789 MergeCompatibleInvokesImpl(Invokes, DTU);
2790 }
2791
2792 return Changed;
2793}
2794
2795namespace {
2796/// Track ephemeral values, which should be ignored for cost-modelling
2797/// purposes. Requires walking instructions in reverse order.
2798class EphemeralValueTracker {
2800
2801 bool isEphemeral(const Instruction *I) {
2802 if (isa<AssumeInst>(I))
2803 return true;
2804 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2805 all_of(I->users(), [&](const User *U) {
2806 return EphValues.count(cast<Instruction>(U));
2807 });
2808 }
2809
2810public:
2811 bool track(const Instruction *I) {
2812 if (isEphemeral(I)) {
2813 EphValues.insert(I);
2814 return true;
2815 }
2816 return false;
2817 }
2818
2819 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2820};
2821} // namespace
2822
2823/// Determine if we can hoist sink a sole store instruction out of a
2824/// conditional block.
2825///
2826/// We are looking for code like the following:
2827/// BrBB:
2828/// store i32 %add, i32* %arrayidx2
2829/// ... // No other stores or function calls (we could be calling a memory
2830/// ... // function).
2831/// %cmp = icmp ult %x, %y
2832/// br i1 %cmp, label %EndBB, label %ThenBB
2833/// ThenBB:
2834/// store i32 %add5, i32* %arrayidx2
2835/// br label EndBB
2836/// EndBB:
2837/// ...
2838/// We are going to transform this into:
2839/// BrBB:
2840/// store i32 %add, i32* %arrayidx2
2841/// ... //
2842/// %cmp = icmp ult %x, %y
2843/// %add.add5 = select i1 %cmp, i32 %add, %add5
2844/// store i32 %add.add5, i32* %arrayidx2
2845/// ...
2846///
2847/// \return The pointer to the value of the previous store if the store can be
2848/// hoisted into the predecessor block. 0 otherwise.
2850 BasicBlock *StoreBB, BasicBlock *EndBB) {
2851 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2852 if (!StoreToHoist)
2853 return nullptr;
2854
2855 // Volatile or atomic.
2856 if (!StoreToHoist->isSimple())
2857 return nullptr;
2858
2859 Value *StorePtr = StoreToHoist->getPointerOperand();
2860 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2861
2862 // Look for a store to the same pointer in BrBB.
2863 unsigned MaxNumInstToLookAt = 9;
2864 // Skip pseudo probe intrinsic calls which are not really killing any memory
2865 // accesses.
2866 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2867 if (!MaxNumInstToLookAt)
2868 break;
2869 --MaxNumInstToLookAt;
2870
2871 // Could be calling an instruction that affects memory like free().
2872 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2873 return nullptr;
2874
2875 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2876 // Found the previous store to same location and type. Make sure it is
2877 // simple, to avoid introducing a spurious non-atomic write after an
2878 // atomic write.
2879 if (SI->getPointerOperand() == StorePtr &&
2880 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2881 SI->getAlign() >= StoreToHoist->getAlign())
2882 // Found the previous store, return its value operand.
2883 return SI->getValueOperand();
2884 return nullptr; // Unknown store.
2885 }
2886
2887 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2888 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2889 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2890 // Local objects (created by an `alloca` instruction) are always
2891 // writable, so once we are past a read from a location it is valid to
2892 // also write to that same location.
2893 // If the address of the local object never escapes the function, that
2894 // means it's never concurrently read or written, hence moving the store
2895 // from under the condition will not introduce a data race.
2896 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2897 if (AI && !PointerMayBeCaptured(AI, false, true))
2898 // Found a previous load, return it.
2899 return LI;
2900 }
2901 // The load didn't work out, but we may still find a store.
2902 }
2903 }
2904
2905 return nullptr;
2906}
2907
2908/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2909/// converted to selects.
2911 BasicBlock *EndBB,
2912 unsigned &SpeculatedInstructions,
2914 const TargetTransformInfo &TTI) {
2916 BB->getParent()->hasMinSize()
2919
2920 bool HaveRewritablePHIs = false;
2921 for (PHINode &PN : EndBB->phis()) {
2922 Value *OrigV = PN.getIncomingValueForBlock(BB);
2923 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2924
2925 // FIXME: Try to remove some of the duplication with
2926 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2927 if (ThenV == OrigV)
2928 continue;
2929
2930 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2932
2933 // Don't convert to selects if we could remove undefined behavior instead.
2934 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2936 return false;
2937
2938 HaveRewritablePHIs = true;
2939 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2940 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2941 if (!OrigCE && !ThenCE)
2942 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2943
2944 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2945 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2946 InstructionCost MaxCost =
2948 if (OrigCost + ThenCost > MaxCost)
2949 return false;
2950
2951 // Account for the cost of an unfolded ConstantExpr which could end up
2952 // getting expanded into Instructions.
2953 // FIXME: This doesn't account for how many operations are combined in the
2954 // constant expression.
2955 ++SpeculatedInstructions;
2956 if (SpeculatedInstructions > 1)
2957 return false;
2958 }
2959
2960 return HaveRewritablePHIs;
2961}
2962
2963/// Speculate a conditional basic block flattening the CFG.
2964///
2965/// Note that this is a very risky transform currently. Speculating
2966/// instructions like this is most often not desirable. Instead, there is an MI
2967/// pass which can do it with full awareness of the resource constraints.
2968/// However, some cases are "obvious" and we should do directly. An example of
2969/// this is speculating a single, reasonably cheap instruction.
2970///
2971/// There is only one distinct advantage to flattening the CFG at the IR level:
2972/// it makes very common but simplistic optimizations such as are common in
2973/// instcombine and the DAG combiner more powerful by removing CFG edges and
2974/// modeling their effects with easier to reason about SSA value graphs.
2975///
2976///
2977/// An illustration of this transform is turning this IR:
2978/// \code
2979/// BB:
2980/// %cmp = icmp ult %x, %y
2981/// br i1 %cmp, label %EndBB, label %ThenBB
2982/// ThenBB:
2983/// %sub = sub %x, %y
2984/// br label BB2
2985/// EndBB:
2986/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2987/// ...
2988/// \endcode
2989///
2990/// Into this IR:
2991/// \code
2992/// BB:
2993/// %cmp = icmp ult %x, %y
2994/// %sub = sub %x, %y
2995/// %cond = select i1 %cmp, 0, %sub
2996/// ...
2997/// \endcode
2998///
2999/// \returns true if the conditional block is removed.
3000bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3001 BasicBlock *ThenBB) {
3002 if (!Options.SpeculateBlocks)
3003 return false;
3004
3005 // Be conservative for now. FP select instruction can often be expensive.
3006 Value *BrCond = BI->getCondition();
3007 if (isa<FCmpInst>(BrCond))
3008 return false;
3009
3010 BasicBlock *BB = BI->getParent();
3011 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3012 InstructionCost Budget =
3014
3015 // If ThenBB is actually on the false edge of the conditional branch, remember
3016 // to swap the select operands later.
3017 bool Invert = false;
3018 if (ThenBB != BI->getSuccessor(0)) {
3019 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3020 Invert = true;
3021 }
3022 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3023
3024 // If the branch is non-unpredictable, and is predicted to *not* branch to
3025 // the `then` block, then avoid speculating it.
3026 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3027 uint64_t TWeight, FWeight;
3028 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3029 (TWeight + FWeight) != 0) {
3030 uint64_t EndWeight = Invert ? TWeight : FWeight;
3031 BranchProbability BIEndProb =
3032 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3034 if (BIEndProb >= Likely)
3035 return false;
3036 }
3037 }
3038
3039 // Keep a count of how many times instructions are used within ThenBB when
3040 // they are candidates for sinking into ThenBB. Specifically:
3041 // - They are defined in BB, and
3042 // - They have no side effects, and
3043 // - All of their uses are in ThenBB.
3044 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3045
3046 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3047
3048 unsigned SpeculatedInstructions = 0;
3049 Value *SpeculatedStoreValue = nullptr;
3050 StoreInst *SpeculatedStore = nullptr;
3051 EphemeralValueTracker EphTracker;
3052 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3053 // Skip debug info.
3054 if (isa<DbgInfoIntrinsic>(I)) {
3055 SpeculatedDbgIntrinsics.push_back(&I);
3056 continue;
3057 }
3058
3059 // Skip pseudo probes. The consequence is we lose track of the branch
3060 // probability for ThenBB, which is fine since the optimization here takes
3061 // place regardless of the branch probability.
3062 if (isa<PseudoProbeInst>(I)) {
3063 // The probe should be deleted so that it will not be over-counted when
3064 // the samples collected on the non-conditional path are counted towards
3065 // the conditional path. We leave it for the counts inference algorithm to
3066 // figure out a proper count for an unknown probe.
3067 SpeculatedDbgIntrinsics.push_back(&I);
3068 continue;
3069 }
3070
3071 // Ignore ephemeral values, they will be dropped by the transform.
3072 if (EphTracker.track(&I))
3073 continue;
3074
3075 // Only speculatively execute a single instruction (not counting the
3076 // terminator) for now.
3077 ++SpeculatedInstructions;
3078 if (SpeculatedInstructions > 1)
3079 return false;
3080
3081 // Don't hoist the instruction if it's unsafe or expensive.
3083 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3084 &I, BB, ThenBB, EndBB))))
3085 return false;
3086 if (!SpeculatedStoreValue &&
3089 return false;
3090
3091 // Store the store speculation candidate.
3092 if (SpeculatedStoreValue)
3093 SpeculatedStore = cast<StoreInst>(&I);
3094
3095 // Do not hoist the instruction if any of its operands are defined but not
3096 // used in BB. The transformation will prevent the operand from
3097 // being sunk into the use block.
3098 for (Use &Op : I.operands()) {
3099 Instruction *OpI = dyn_cast<Instruction>(Op);
3100 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3101 continue; // Not a candidate for sinking.
3102
3103 ++SinkCandidateUseCounts[OpI];
3104 }
3105 }
3106
3107 // Consider any sink candidates which are only used in ThenBB as costs for
3108 // speculation. Note, while we iterate over a DenseMap here, we are summing
3109 // and so iteration order isn't significant.
3110 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3111 if (Inst->hasNUses(Count)) {
3112 ++SpeculatedInstructions;
3113 if (SpeculatedInstructions > 1)
3114 return false;
3115 }
3116
3117 // Check that we can insert the selects and that it's not too expensive to do
3118 // so.
3119 bool Convert = SpeculatedStore != nullptr;
3121 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3122 SpeculatedInstructions,
3123 Cost, TTI);
3124 if (!Convert || Cost > Budget)
3125 return false;
3126
3127 // If we get here, we can hoist the instruction and if-convert.
3128 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3129
3130 // Insert a select of the value of the speculated store.
3131 if (SpeculatedStoreValue) {
3132 IRBuilder<NoFolder> Builder(BI);
3133 Value *OrigV = SpeculatedStore->getValueOperand();
3134 Value *TrueV = SpeculatedStore->getValueOperand();
3135 Value *FalseV = SpeculatedStoreValue;
3136 if (Invert)
3137 std::swap(TrueV, FalseV);
3138 Value *S = Builder.CreateSelect(
3139 BrCond, TrueV, FalseV, "spec.store.select", BI);
3140 SpeculatedStore->setOperand(0, S);
3141 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3142 SpeculatedStore->getDebugLoc());
3143 // The value stored is still conditional, but the store itself is now
3144 // unconditonally executed, so we must be sure that any linked dbg.assign
3145 // intrinsics are tracking the new stored value (the result of the
3146 // select). If we don't, and the store were to be removed by another pass
3147 // (e.g. DSE), then we'd eventually end up emitting a location describing
3148 // the conditional value, unconditionally.
3149 //
3150 // === Before this transformation ===
3151 // pred:
3152 // store %one, %x.dest, !DIAssignID !1
3153 // dbg.assign %one, "x", ..., !1, ...
3154 // br %cond if.then
3155 //
3156 // if.then:
3157 // store %two, %x.dest, !DIAssignID !2
3158 // dbg.assign %two, "x", ..., !2, ...
3159 //
3160 // === After this transformation ===
3161 // pred:
3162 // store %one, %x.dest, !DIAssignID !1
3163 // dbg.assign %one, "x", ..., !1
3164 /// ...
3165 // %merge = select %cond, %two, %one
3166 // store %merge, %x.dest, !DIAssignID !2
3167 // dbg.assign %merge, "x", ..., !2
3168 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3169 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3170 DbgAssign->replaceVariableLocationOp(OrigV, S);
3171 };
3172 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3173 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3174 }
3175
3176 // Metadata can be dependent on the condition we are hoisting above.
3177 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3178 // to avoid making it appear as if the condition is a constant, which would
3179 // be misleading while debugging.
3180 // Similarly strip attributes that maybe dependent on condition we are
3181 // hoisting above.
3182 for (auto &I : make_early_inc_range(*ThenBB)) {
3183 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3184 // Don't update the DILocation of dbg.assign intrinsics.
3185 if (!isa<DbgAssignIntrinsic>(&I))
3186 I.setDebugLoc(DebugLoc());
3187 }
3188 I.dropUBImplyingAttrsAndMetadata();
3189
3190 // Drop ephemeral values.
3191 if (EphTracker.contains(&I)) {
3192 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3193 I.eraseFromParent();
3194 }
3195 }
3196
3197 // Hoist the instructions.
3198 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3199 // to these instructions, in the same way that dbg.value intrinsics are
3200 // dropped at the end of this block.
3201 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3202 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3203 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3204 // equivalent).
3205 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3206 !DVR || !DVR->isDbgAssign())
3207 It.dropOneDbgRecord(&DR);
3208 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3209 std::prev(ThenBB->end()));
3210
3211 // Insert selects and rewrite the PHI operands.
3212 IRBuilder<NoFolder> Builder(BI);
3213 for (PHINode &PN : EndBB->phis()) {
3214 unsigned OrigI = PN.getBasicBlockIndex(BB);
3215 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3216 Value *OrigV = PN.getIncomingValue(OrigI);
3217 Value *ThenV = PN.getIncomingValue(ThenI);
3218
3219 // Skip PHIs which are trivial.
3220 if (OrigV == ThenV)
3221 continue;
3222
3223 // Create a select whose true value is the speculatively executed value and
3224 // false value is the pre-existing value. Swap them if the branch
3225 // destinations were inverted.
3226 Value *TrueV = ThenV, *FalseV = OrigV;
3227 if (Invert)
3228 std::swap(TrueV, FalseV);
3229 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3230 PN.setIncomingValue(OrigI, V);
3231 PN.setIncomingValue(ThenI, V);
3232 }
3233
3234 // Remove speculated dbg intrinsics.
3235 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3236 // dbg value for the different flows and inserting it after the select.
3237 for (Instruction *I : SpeculatedDbgIntrinsics) {
3238 // We still want to know that an assignment took place so don't remove
3239 // dbg.assign intrinsics.
3240 if (!isa<DbgAssignIntrinsic>(I))
3241 I->eraseFromParent();
3242 }
3243
3244 ++NumSpeculations;
3245 return true;
3246}
3247
3248/// Return true if we can thread a branch across this block.
3250 int Size = 0;
3251 EphemeralValueTracker EphTracker;
3252
3253 // Walk the loop in reverse so that we can identify ephemeral values properly
3254 // (values only feeding assumes).
3255 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3256 // Can't fold blocks that contain noduplicate or convergent calls.
3257 if (CallInst *CI = dyn_cast<CallInst>(&I))
3258 if (CI->cannotDuplicate() || CI->isConvergent())
3259 return false;
3260
3261 // Ignore ephemeral values which are deleted during codegen.
3262 // We will delete Phis while threading, so Phis should not be accounted in
3263 // block's size.
3264 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3265 if (Size++ > MaxSmallBlockSize)
3266 return false; // Don't clone large BB's.
3267 }
3268
3269 // We can only support instructions that do not define values that are
3270 // live outside of the current basic block.
3271 for (User *U : I.users()) {
3272 Instruction *UI = cast<Instruction>(U);
3273 if (UI->getParent() != BB || isa<PHINode>(UI))
3274 return false;
3275 }
3276
3277 // Looks ok, continue checking.
3278 }
3279
3280 return true;
3281}
3282
3284 BasicBlock *To) {
3285 // Don't look past the block defining the value, we might get the value from
3286 // a previous loop iteration.
3287 auto *I = dyn_cast<Instruction>(V);
3288 if (I && I->getParent() == To)
3289 return nullptr;
3290
3291 // We know the value if the From block branches on it.
3292 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3293 if (BI && BI->isConditional() && BI->getCondition() == V &&
3294 BI->getSuccessor(0) != BI->getSuccessor(1))
3295 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3297
3298 return nullptr;
3299}
3300
3301/// If we have a conditional branch on something for which we know the constant
3302/// value in predecessors (e.g. a phi node in the current block), thread edges
3303/// from the predecessor to their ultimate destination.
3304static std::optional<bool>
3306 const DataLayout &DL,
3307 AssumptionCache *AC) {
3309 BasicBlock *BB = BI->getParent();
3310 Value *Cond = BI->getCondition();
3311 PHINode *PN = dyn_cast<PHINode>(Cond);
3312 if (PN && PN->getParent() == BB) {
3313 // Degenerate case of a single entry PHI.
3314 if (PN->getNumIncomingValues() == 1) {
3316 return true;
3317 }
3318
3319 for (Use &U : PN->incoming_values())
3320 if (auto *CB = dyn_cast<ConstantInt>(U))
3321 KnownValues[CB].insert(PN->getIncomingBlock(U));
3322 } else {
3323 for (BasicBlock *Pred : predecessors(BB)) {
3324 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3325 KnownValues[CB].insert(Pred);
3326 }
3327 }
3328
3329 if (KnownValues.empty())
3330 return false;
3331
3332 // Now we know that this block has multiple preds and two succs.
3333 // Check that the block is small enough and values defined in the block are
3334 // not used outside of it.
3336 return false;
3337
3338 for (const auto &Pair : KnownValues) {
3339 // Okay, we now know that all edges from PredBB should be revectored to
3340 // branch to RealDest.
3341 ConstantInt *CB = Pair.first;
3342 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3343 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3344
3345 if (RealDest == BB)
3346 continue; // Skip self loops.
3347
3348 // Skip if the predecessor's terminator is an indirect branch.
3349 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3350 return isa<IndirectBrInst>(PredBB->getTerminator());
3351 }))
3352 continue;
3353
3354 LLVM_DEBUG({
3355 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3356 << " has value " << *Pair.first << " in predecessors:\n";
3357 for (const BasicBlock *PredBB : Pair.second)
3358 dbgs() << " " << PredBB->getName() << "\n";
3359 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3360 });
3361
3362 // Split the predecessors we are threading into a new edge block. We'll
3363 // clone the instructions into this block, and then redirect it to RealDest.
3364 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3365
3366 // TODO: These just exist to reduce test diff, we can drop them if we like.
3367 EdgeBB->setName(RealDest->getName() + ".critedge");
3368 EdgeBB->moveBefore(RealDest);
3369
3370 // Update PHI nodes.
3371 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3372
3373 // BB may have instructions that are being threaded over. Clone these
3374 // instructions into EdgeBB. We know that there will be no uses of the
3375 // cloned instructions outside of EdgeBB.
3376 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3377 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3378 TranslateMap[Cond] = CB;
3379
3380 // RemoveDIs: track instructions that we optimise away while folding, so
3381 // that we can copy DbgVariableRecords from them later.
3382 BasicBlock::iterator SrcDbgCursor = BB->begin();
3383 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3384 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3385 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3386 continue;
3387 }
3388 // Clone the instruction.
3389 Instruction *N = BBI->clone();
3390 // Insert the new instruction into its new home.
3391 N->insertInto(EdgeBB, InsertPt);
3392
3393 if (BBI->hasName())
3394 N->setName(BBI->getName() + ".c");
3395
3396 // Update operands due to translation.
3397 for (Use &Op : N->operands()) {
3398 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3399 if (PI != TranslateMap.end())
3400 Op = PI->second;
3401 }
3402
3403 // Check for trivial simplification.
3404 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3405 if (!BBI->use_empty())
3406 TranslateMap[&*BBI] = V;
3407 if (!N->mayHaveSideEffects()) {
3408 N->eraseFromParent(); // Instruction folded away, don't need actual
3409 // inst
3410 N = nullptr;
3411 }
3412 } else {
3413 if (!BBI->use_empty())
3414 TranslateMap[&*BBI] = N;
3415 }
3416 if (N) {
3417 // Copy all debug-info attached to instructions from the last we
3418 // successfully clone, up to this instruction (they might have been
3419 // folded away).
3420 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3421 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3422 SrcDbgCursor = std::next(BBI);
3423 // Clone debug-info on this instruction too.
3424 N->cloneDebugInfoFrom(&*BBI);
3425
3426 // Register the new instruction with the assumption cache if necessary.
3427 if (auto *Assume = dyn_cast<AssumeInst>(N))
3428 if (AC)
3429 AC->registerAssumption(Assume);
3430 }
3431 }
3432
3433 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3434 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3435 InsertPt->cloneDebugInfoFrom(BI);
3436
3437 BB->removePredecessor(EdgeBB);
3438 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3439 EdgeBI->setSuccessor(0, RealDest);
3440 EdgeBI->setDebugLoc(BI->getDebugLoc());
3441
3442 if (DTU) {
3444 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3445 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3446 DTU->applyUpdates(Updates);
3447 }
3448
3449 // For simplicity, we created a separate basic block for the edge. Merge
3450 // it back into the predecessor if possible. This not only avoids
3451 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3452 // bypass the check for trivial cycles above.
3453 MergeBlockIntoPredecessor(EdgeBB, DTU);
3454
3455 // Signal repeat, simplifying any other constants.
3456 return std::nullopt;
3457 }
3458
3459 return false;
3460}
3461
3463 DomTreeUpdater *DTU,
3464 const DataLayout &DL,
3465 AssumptionCache *AC) {
3466 std::optional<bool> Result;
3467 bool EverChanged = false;
3468 do {
3469 // Note that None means "we changed things, but recurse further."
3470 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3471 EverChanged |= Result == std::nullopt || *Result;
3472 } while (Result == std::nullopt);
3473 return EverChanged;
3474}
3475
3476/// Given a BB that starts with the specified two-entry PHI node,
3477/// see if we can eliminate it.
3479 DomTreeUpdater *DTU, const DataLayout &DL,
3480 bool SpeculateUnpredictables) {
3481 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3482 // statement", which has a very simple dominance structure. Basically, we
3483 // are trying to find the condition that is being branched on, which
3484 // subsequently causes this merge to happen. We really want control
3485 // dependence information for this check, but simplifycfg can't keep it up
3486 // to date, and this catches most of the cases we care about anyway.
3487 BasicBlock *BB = PN->getParent();
3488
3489 BasicBlock *IfTrue, *IfFalse;
3490 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3491 if (!DomBI)
3492 return false;
3493 Value *IfCond = DomBI->getCondition();
3494 // Don't bother if the branch will be constant folded trivially.
3495 if (isa<ConstantInt>(IfCond))
3496 return false;
3497
3498 BasicBlock *DomBlock = DomBI->getParent();
3501 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3502 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3503 });
3504 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3505 "Will have either one or two blocks to speculate.");
3506
3507 // If the branch is non-unpredictable, see if we either predictably jump to
3508 // the merge bb (if we have only a single 'then' block), or if we predictably
3509 // jump to one specific 'then' block (if we have two of them).
3510 // It isn't beneficial to speculatively execute the code
3511 // from the block that we know is predictably not entered.
3512 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3513 if (!IsUnpredictable) {
3514 uint64_t TWeight, FWeight;
3515 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3516 (TWeight + FWeight) != 0) {
3517 BranchProbability BITrueProb =
3518 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3520 BranchProbability BIFalseProb = BITrueProb.getCompl();
3521 if (IfBlocks.size() == 1) {
3522 BranchProbability BIBBProb =
3523 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3524 if (BIBBProb >= Likely)
3525 return false;
3526 } else {
3527 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3528 return false;
3529 }
3530 }
3531 }
3532
3533 // Don't try to fold an unreachable block. For example, the phi node itself
3534 // can't be the candidate if-condition for a select that we want to form.
3535 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3536 if (IfCondPhiInst->getParent() == BB)
3537 return false;
3538
3539 // Okay, we found that we can merge this two-entry phi node into a select.
3540 // Doing so would require us to fold *all* two entry phi nodes in this block.
3541 // At some point this becomes non-profitable (particularly if the target
3542 // doesn't support cmov's). Only do this transformation if there are two or
3543 // fewer PHI nodes in this block.
3544 unsigned NumPhis = 0;
3545 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3546 if (NumPhis > 2)
3547 return false;
3548
3549 // Loop over the PHI's seeing if we can promote them all to select
3550 // instructions. While we are at it, keep track of the instructions
3551 // that need to be moved to the dominating block.
3552 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3554 InstructionCost Budget =
3556 if (SpeculateUnpredictables && IsUnpredictable)
3557 Budget += TTI.getBranchMispredictPenalty();
3558
3559 bool Changed = false;
3560 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3561 PHINode *PN = cast<PHINode>(II++);
3562 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3563 PN->replaceAllUsesWith(V);
3564 PN->eraseFromParent();
3565 Changed = true;
3566 continue;
3567 }
3568
3569 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3570 Cost, Budget, TTI) ||
3571 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3572 Cost, Budget, TTI))
3573 return Changed;
3574 }
3575
3576 // If we folded the first phi, PN dangles at this point. Refresh it. If
3577 // we ran out of PHIs then we simplified them all.
3578 PN = dyn_cast<PHINode>(BB->begin());
3579 if (!PN)
3580 return true;
3581
3582 // Return true if at least one of these is a 'not', and another is either
3583 // a 'not' too, or a constant.
3584 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3585 if (!match(V0, m_Not(m_Value())))
3586 std::swap(V0, V1);
3587 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3588 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3589 };
3590
3591 // Don't fold i1 branches on PHIs which contain binary operators or
3592 // (possibly inverted) select form of or/ands, unless one of
3593 // the incoming values is an 'not' and another one is freely invertible.
3594 // These can often be turned into switches and other things.
3595 auto IsBinOpOrAnd = [](Value *V) {
3596 return match(
3597 V, m_CombineOr(
3598 m_BinOp(),
3601 };
3602 if (PN->getType()->isIntegerTy(1) &&
3603 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3604 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3605 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3606 PN->getIncomingValue(1)))
3607 return Changed;
3608
3609 // If all PHI nodes are promotable, check to make sure that all instructions
3610 // in the predecessor blocks can be promoted as well. If not, we won't be able
3611 // to get rid of the control flow, so it's not worth promoting to select
3612 // instructions.
3613 for (BasicBlock *IfBlock : IfBlocks)
3614 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3615 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3616 // This is not an aggressive instruction that we can promote.
3617 // Because of this, we won't be able to get rid of the control flow, so
3618 // the xform is not worth it.
3619 return Changed;
3620 }
3621
3622 // If either of the blocks has it's address taken, we can't do this fold.
3623 if (any_of(IfBlocks,
3624 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3625 return Changed;
3626
3627 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3628 if (IsUnpredictable) dbgs() << " (unpredictable)";
3629 dbgs() << " T: " << IfTrue->getName()
3630 << " F: " << IfFalse->getName() << "\n");
3631
3632 // If we can still promote the PHI nodes after this gauntlet of tests,
3633 // do all of the PHI's now.
3634
3635 // Move all 'aggressive' instructions, which are defined in the
3636 // conditional parts of the if's up to the dominating block.
3637 for (BasicBlock *IfBlock : IfBlocks)
3638 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3639
3640 IRBuilder<NoFolder> Builder(DomBI);
3641 // Propagate fast-math-flags from phi nodes to replacement selects.
3642 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3643 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3644 if (isa<FPMathOperator>(PN))
3645 Builder.setFastMathFlags(PN->getFastMathFlags());
3646
3647 // Change the PHI node into a select instruction.
3648 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3649 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3650
3651 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3652 PN->replaceAllUsesWith(Sel);
3653 Sel->takeName(PN);
3654 PN->eraseFromParent();
3655 }
3656
3657 // At this point, all IfBlocks are empty, so our if statement
3658 // has been flattened. Change DomBlock to jump directly to our new block to
3659 // avoid other simplifycfg's kicking in on the diamond.
3660 Builder.CreateBr(BB);
3661
3663 if (DTU) {
3664 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3665 for (auto *Successor : successors(DomBlock))
3666 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3667 }
3668
3669 DomBI->eraseFromParent();
3670 if (DTU)
3671 DTU->applyUpdates(Updates);
3672
3673 return true;
3674}
3675
3677 Instruction::BinaryOps Opc, Value *LHS,
3678 Value *RHS, const Twine &Name = "") {
3679 // Try to relax logical op to binary op.
3680 if (impliesPoison(RHS, LHS))
3681 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3682 if (Opc == Instruction::And)
3683 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3684 if (Opc == Instruction::Or)
3685 return Builder.CreateLogicalOr(LHS, RHS, Name);
3686 llvm_unreachable("Invalid logical opcode");
3687}
3688
3689/// Return true if either PBI or BI has branch weight available, and store
3690/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3691/// not have branch weight, use 1:1 as its weight.
3693 uint64_t &PredTrueWeight,
3694 uint64_t &PredFalseWeight,
3695 uint64_t &SuccTrueWeight,
3696 uint64_t &SuccFalseWeight) {
3697 bool PredHasWeights =
3698 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3699 bool SuccHasWeights =
3700 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3701 if (PredHasWeights || SuccHasWeights) {
3702 if (!PredHasWeights)
3703 PredTrueWeight = PredFalseWeight = 1;
3704 if (!SuccHasWeights)
3705 SuccTrueWeight = SuccFalseWeight = 1;
3706 return true;
3707 } else {
3708 return false;
3709 }
3710}
3711
3712/// Determine if the two branches share a common destination and deduce a glue
3713/// that joins the branches' conditions to arrive at the common destination if
3714/// that would be profitable.
3715static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3717 const TargetTransformInfo *TTI) {
3718 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3719 "Both blocks must end with a conditional branches.");
3721 "PredBB must be a predecessor of BB.");
3722
3723 // We have the potential to fold the conditions together, but if the
3724 // predecessor branch is predictable, we may not want to merge them.
3725 uint64_t PTWeight, PFWeight;
3726 BranchProbability PBITrueProb, Likely;
3727 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3728 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3729 (PTWeight + PFWeight) != 0) {
3730 PBITrueProb =
3731 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3733 }
3734
3735 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3736 // Speculate the 2nd condition unless the 1st is probably true.
3737 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3738 return {{BI->getSuccessor(0), Instruction::Or, false}};
3739 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3740 // Speculate the 2nd condition unless the 1st is probably false.
3741 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3742 return {{BI->getSuccessor(1), Instruction::And, false}};
3743 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3744 // Speculate the 2nd condition unless the 1st is probably true.
3745 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3746 return {{BI->getSuccessor(1), Instruction::And, true}};
3747 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3748 // Speculate the 2nd condition unless the 1st is probably false.
3749 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3750 return {{BI->getSuccessor(0), Instruction::Or, true}};
3751 }
3752 return std::nullopt;
3753}
3754
3756 DomTreeUpdater *DTU,
3757 MemorySSAUpdater *MSSAU,
3758 const TargetTransformInfo *TTI) {
3759 BasicBlock *BB = BI->getParent();
3760 BasicBlock *PredBlock = PBI->getParent();
3761
3762 // Determine if the two branches share a common destination.
3763 BasicBlock *CommonSucc;
3765 bool InvertPredCond;
3766 std::tie(CommonSucc, Opc, InvertPredCond) =
3768
3769 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3770
3771 IRBuilder<> Builder(PBI);
3772 // The builder is used to create instructions to eliminate the branch in BB.
3773 // If BB's terminator has !annotation metadata, add it to the new
3774 // instructions.
3776 {LLVMContext::MD_annotation});
3777
3778 // If we need to invert the condition in the pred block to match, do so now.
3779 if (InvertPredCond) {
3780 InvertBranch(PBI, Builder);
3781 }
3782
3783 BasicBlock *UniqueSucc =
3784 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3785
3786 // Before cloning instructions, notify the successor basic block that it
3787 // is about to have a new predecessor. This will update PHI nodes,
3788 // which will allow us to update live-out uses of bonus instructions.
3789 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3790
3791 // Try to update branch weights.
3792 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3793 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3794 SuccTrueWeight, SuccFalseWeight)) {
3795 SmallVector<uint64_t, 8> NewWeights;
3796
3797 if (PBI->getSuccessor(0) == BB) {
3798 // PBI: br i1 %x, BB, FalseDest
3799 // BI: br i1 %y, UniqueSucc, FalseDest
3800 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3801 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3802 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3803 // TrueWeight for PBI * FalseWeight for BI.
3804 // We assume that total weights of a BranchInst can fit into 32 bits.
3805 // Therefore, we will not have overflow using 64-bit arithmetic.
3806 NewWeights.push_back(PredFalseWeight *
3807 (SuccFalseWeight + SuccTrueWeight) +
3808 PredTrueWeight * SuccFalseWeight);
3809 } else {
3810 // PBI: br i1 %x, TrueDest, BB
3811 // BI: br i1 %y, TrueDest, UniqueSucc
3812 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3813 // FalseWeight for PBI * TrueWeight for BI.
3814 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3815 PredFalseWeight * SuccTrueWeight);
3816 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3817 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3818 }
3819
3820 // Halve the weights if any of them cannot fit in an uint32_t
3821 FitWeights(NewWeights);
3822
3823 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3824 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
3825
3826 // TODO: If BB is reachable from all paths through PredBlock, then we
3827 // could replace PBI's branch probabilities with BI's.
3828 } else
3829 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3830
3831 // Now, update the CFG.
3832 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3833
3834 if (DTU)
3835 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3836 {DominatorTree::Delete, PredBlock, BB}});
3837
3838 // If BI was a loop latch, it may have had associated loop metadata.
3839 // We need to copy it to the new latch, that is, PBI.
3840 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3841 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3842
3843 ValueToValueMapTy VMap; // maps original values to cloned values
3845
3846 Module *M = BB->getModule();
3847
3848 if (PredBlock->IsNewDbgInfoFormat) {
3849 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3850 for (DbgVariableRecord &DVR :
3852 RemapDbgRecord(M, &DVR, VMap,
3854 }
3855 }
3856
3857 // Now that the Cond was cloned into the predecessor basic block,
3858 // or/and the two conditions together.
3859 Value *BICond = VMap[BI->getCondition()];
3860 PBI->setCondition(
3861 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3862
3863 ++NumFoldBranchToCommonDest;
3864 return true;
3865}
3866
3867/// Return if an instruction's type or any of its operands' types are a vector
3868/// type.
3869static bool isVectorOp(Instruction &I) {
3870 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3871 return U->getType()->isVectorTy();
3872 });
3873}
3874
3875/// If this basic block is simple enough, and if a predecessor branches to us
3876/// and one of our successors, fold the block into the predecessor and use
3877/// logical operations to pick the right destination.
3879 MemorySSAUpdater *MSSAU,
3880 const TargetTransformInfo *TTI,
3881 unsigned BonusInstThreshold) {
3882 // If this block ends with an unconditional branch,
3883 // let SpeculativelyExecuteBB() deal with it.
3884 if (!BI->isConditional())
3885 return false;
3886
3887 BasicBlock *BB = BI->getParent();
3891
3892 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3893
3894 if (!Cond ||
3895 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3896 !isa<SelectInst>(Cond)) ||
3897 Cond->getParent() != BB || !Cond->hasOneUse())
3898 return false;
3899
3900 // Finally, don't infinitely unroll conditional loops.
3901 if (is_contained(successors(BB), BB))
3902 return false;
3903
3904 // With which predecessors will we want to deal with?
3906 for (BasicBlock *PredBlock : predecessors(BB)) {
3907 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3908
3909 // Check that we have two conditional branches. If there is a PHI node in
3910 // the common successor, verify that the same value flows in from both
3911 // blocks.
3912 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3913 continue;
3914
3915 // Determine if the two branches share a common destination.
3916 BasicBlock *CommonSucc;
3918 bool InvertPredCond;
3919 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3920 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3921 else
3922 continue;
3923
3924 // Check the cost of inserting the necessary logic before performing the
3925 // transformation.
3926 if (TTI) {
3927 Type *Ty = BI->getCondition()->getType();
3929 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3930 !isa<CmpInst>(PBI->getCondition())))
3931 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3932
3934 continue;
3935 }
3936
3937 // Ok, we do want to deal with this predecessor. Record it.
3938 Preds.emplace_back(PredBlock);
3939 }
3940
3941 // If there aren't any predecessors into which we can fold,
3942 // don't bother checking the cost.
3943 if (Preds.empty())
3944 return false;
3945
3946 // Only allow this transformation if computing the condition doesn't involve
3947 // too many instructions and these involved instructions can be executed
3948 // unconditionally. We denote all involved instructions except the condition
3949 // as "bonus instructions", and only allow this transformation when the
3950 // number of the bonus instructions we'll need to create when cloning into
3951 // each predecessor does not exceed a certain threshold.
3952 unsigned NumBonusInsts = 0;
3953 bool SawVectorOp = false;
3954 const unsigned PredCount = Preds.size();
3955 for (Instruction &I : *BB) {
3956 // Don't check the branch condition comparison itself.
3957 if (&I == Cond)
3958 continue;
3959 // Ignore dbg intrinsics, and the terminator.
3960 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3961 continue;
3962 // I must be safe to execute unconditionally.
3964 return false;
3965 SawVectorOp |= isVectorOp(I);
3966
3967 // Account for the cost of duplicating this instruction into each
3968 // predecessor. Ignore free instructions.
3969 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3971 NumBonusInsts += PredCount;
3972
3973 // Early exits once we reach the limit.
3974 if (NumBonusInsts >
3975 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3976 return false;
3977 }
3978
3979 auto IsBCSSAUse = [BB, &I](Use &U) {
3980 auto *UI = cast<Instruction>(U.getUser());
3981 if (auto *PN = dyn_cast<PHINode>(UI))
3982 return PN->getIncomingBlock(U) == BB;
3983 return UI->getParent() == BB && I.comesBefore(UI);
3984 };
3985
3986 // Does this instruction require rewriting of uses?
3987 if (!all_of(I.uses(), IsBCSSAUse))
3988 return false;
3989 }
3990 if (NumBonusInsts >
3991 BonusInstThreshold *
3992 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3993 return false;
3994
3995 // Ok, we have the budget. Perform the transformation.
3996 for (BasicBlock *PredBlock : Preds) {
3997 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
3998 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
3999 }
4000 return false;
4001}
4002
4003// If there is only one store in BB1 and BB2, return it, otherwise return
4004// nullptr.
4006 StoreInst *S = nullptr;
4007 for (auto *BB : {BB1, BB2}) {
4008 if (!BB)
4009 continue;
4010 for (auto &I : *BB)
4011 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4012 if (S)
4013 // Multiple stores seen.
4014 return nullptr;
4015 else
4016 S = SI;
4017 }
4018 }
4019 return S;
4020}
4021
4023 Value *AlternativeV = nullptr) {
4024 // PHI is going to be a PHI node that allows the value V that is defined in
4025 // BB to be referenced in BB's only successor.
4026 //
4027 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4028 // doesn't matter to us what the other operand is (it'll never get used). We
4029 // could just create a new PHI with an undef incoming value, but that could
4030 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4031 // other PHI. So here we directly look for some PHI in BB's successor with V
4032 // as an incoming operand. If we find one, we use it, else we create a new
4033 // one.
4034 //
4035 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4036 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4037 // where OtherBB is the single other predecessor of BB's only successor.
4038 PHINode *PHI = nullptr;
4039 BasicBlock *Succ = BB->getSingleSuccessor();
4040
4041 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4042 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4043 PHI = cast<PHINode>(I);
4044 if (!AlternativeV)
4045 break;
4046
4047 assert(Succ->hasNPredecessors(2));
4048 auto PredI = pred_begin(Succ);
4049 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4050 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4051 break;
4052 PHI = nullptr;
4053 }
4054 if (PHI)
4055 return PHI;
4056
4057 // If V is not an instruction defined in BB, just return it.
4058 if (!AlternativeV &&
4059 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4060 return V;
4061
4062 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4063 PHI->insertBefore(Succ->begin());
4064 PHI->addIncoming(V, BB);
4065 for (BasicBlock *PredBB : predecessors(Succ))
4066 if (PredBB != BB)
4067 PHI->addIncoming(
4068 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4069 return PHI;
4070}
4071
4073 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4074 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4075 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4076 // For every pointer, there must be exactly two stores, one coming from
4077 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4078 // store (to any address) in PTB,PFB or QTB,QFB.
4079 // FIXME: We could relax this restriction with a bit more work and performance
4080 // testing.
4081 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4082 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4083 if (!PStore || !QStore)
4084 return false;
4085
4086 // Now check the stores are compatible.
4087 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4088 PStore->getValueOperand()->getType() !=
4089 QStore->getValueOperand()->getType())
4090 return false;
4091
4092 // Check that sinking the store won't cause program behavior changes. Sinking
4093 // the store out of the Q blocks won't change any behavior as we're sinking
4094 // from a block to its unconditional successor. But we're moving a store from
4095 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4096 // So we need to check that there are no aliasing loads or stores in
4097 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4098 // operations between PStore and the end of its parent block.
4099 //
4100 // The ideal way to do this is to query AliasAnalysis, but we don't
4101 // preserve AA currently so that is dangerous. Be super safe and just
4102 // check there are no other memory operations at all.
4103 for (auto &I : *QFB->getSinglePredecessor())
4104 if (I.mayReadOrWriteMemory())
4105 return false;
4106 for (auto &I : *QFB)
4107 if (&I != QStore && I.mayReadOrWriteMemory())
4108 return false;
4109 if (QTB)
4110 for (auto &I : *QTB)
4111 if (&I != QStore && I.mayReadOrWriteMemory())
4112 return false;
4113 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4114 I != E; ++I)
4115 if (&*I != PStore && I->mayReadOrWriteMemory())
4116 return false;
4117
4118 // If we're not in aggressive mode, we only optimize if we have some
4119 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4120 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4121 if (!BB)
4122 return true;
4123 // Heuristic: if the block can be if-converted/phi-folded and the
4124 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4125 // thread this store.
4127 InstructionCost Budget =
4129 for (auto &I : BB->instructionsWithoutDebug(false)) {
4130 // Consider terminator instruction to be free.
4131 if (I.isTerminator())
4132 continue;
4133 // If this is one the stores that we want to speculate out of this BB,
4134 // then don't count it's cost, consider it to be free.
4135 if (auto *S = dyn_cast<StoreInst>(&I))
4136 if (llvm::find(FreeStores, S))
4137 continue;
4138 // Else, we have a white-list of instructions that we are ak speculating.
4139 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4140 return false; // Not in white-list - not worthwhile folding.
4141 // And finally, if this is a non-free instruction that we are okay
4142 // speculating, ensure that we consider the speculation budget.
4143 Cost +=
4145 if (Cost > Budget)
4146 return false; // Eagerly refuse to fold as soon as we're out of budget.
4147 }
4148 assert(Cost <= Budget &&
4149 "When we run out of budget we will eagerly return from within the "
4150 "per-instruction loop.");
4151 return true;
4152 };
4153
4154 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4156 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4157 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4158 return false;
4159
4160 // If PostBB has more than two predecessors, we need to split it so we can
4161 // sink the store.
4162 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4163 // We know that QFB's only successor is PostBB. And QFB has a single
4164 // predecessor. If QTB exists, then its only successor is also PostBB.
4165 // If QTB does not exist, then QFB's only predecessor has a conditional
4166 // branch to QFB and PostBB.
4167 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4168 BasicBlock *NewBB =
4169 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4170 if (!NewBB)
4171 return false;
4172 PostBB = NewBB;
4173 }
4174
4175 // OK, we're going to sink the stores to PostBB. The store has to be
4176 // conditional though, so first create the predicate.
4177 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4178 ->getCondition();
4179 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4180 ->getCondition();
4181
4183 PStore->getParent());
4185 QStore->getParent(), PPHI);
4186
4187 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4188 IRBuilder<> QB(PostBB, PostBBFirst);
4189 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4190
4191 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4192 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4193
4194 if (InvertPCond)
4195 PPred = QB.CreateNot(PPred);
4196 if (InvertQCond)
4197 QPred = QB.CreateNot(QPred);
4198 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4199
4200 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4201 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4202 /*Unreachable=*/false,
4203 /*BranchWeights=*/nullptr, DTU);
4204
4205 QB.SetInsertPoint(T);
4206 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4207 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4208 // Choose the minimum alignment. If we could prove both stores execute, we
4209 // could use biggest one. In this case, though, we only know that one of the
4210 // stores executes. And we don't know it's safe to take the alignment from a
4211 // store that doesn't execute.
4212 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4213
4214 QStore->eraseFromParent();
4215 PStore->eraseFromParent();
4216
4217 return true;
4218}
4219
4221 DomTreeUpdater *DTU, const DataLayout &DL,
4222 const TargetTransformInfo &TTI) {
4223 // The intention here is to find diamonds or triangles (see below) where each
4224 // conditional block contains a store to the same address. Both of these
4225 // stores are conditional, so they can't be unconditionally sunk. But it may
4226 // be profitable to speculatively sink the stores into one merged store at the
4227 // end, and predicate the merged store on the union of the two conditions of
4228 // PBI and QBI.
4229 //
4230 // This can reduce the number of stores executed if both of the conditions are
4231 // true, and can allow the blocks to become small enough to be if-converted.
4232 // This optimization will also chain, so that ladders of test-and-set
4233 // sequences can be if-converted away.
4234 //
4235 // We only deal with simple diamonds or triangles:
4236 //
4237 // PBI or PBI or a combination of the two
4238 // / \ | \
4239 // PTB PFB | PFB
4240 // \ / | /
4241 // QBI QBI
4242 // / \ | \
4243 // QTB QFB | QFB
4244 // \ / | /
4245 // PostBB PostBB
4246 //
4247 // We model triangles as a type of diamond with a nullptr "true" block.
4248 // Triangles are canonicalized so that the fallthrough edge is represented by
4249 // a true condition, as in the diagram above.
4250 BasicBlock *PTB = PBI->getSuccessor(0);
4251 BasicBlock *PFB = PBI->getSuccessor(1);
4252 BasicBlock *QTB = QBI->getSuccessor(0);
4253 BasicBlock *QFB = QBI->getSuccessor(1);
4254 BasicBlock *PostBB = QFB->getSingleSuccessor();
4255
4256 // Make sure we have a good guess for PostBB. If QTB's only successor is
4257 // QFB, then QFB is a better PostBB.
4258 if (QTB->getSingleSuccessor() == QFB)
4259 PostBB = QFB;
4260
4261 // If we couldn't find a good PostBB, stop.
4262 if (!PostBB)
4263 return false;
4264
4265 bool InvertPCond = false, InvertQCond = false;
4266 // Canonicalize fallthroughs to the true branches.
4267 if (PFB == QBI->getParent()) {
4268 std::swap(PFB, PTB);
4269 InvertPCond = true;
4270 }
4271 if (QFB == PostBB) {
4272 std::swap(QFB, QTB);
4273 InvertQCond = true;
4274 }
4275
4276 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4277 // and QFB may not. Model fallthroughs as a nullptr block.
4278 if (PTB == QBI->getParent())
4279 PTB = nullptr;
4280 if (QTB == PostBB)
4281 QTB = nullptr;
4282
4283 // Legality bailouts. We must have at least the non-fallthrough blocks and
4284 // the post-dominating block, and the non-fallthroughs must only have one
4285 // predecessor.
4286 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4287 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4288 };
4289 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4290 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4291 return false;
4292 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4293 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4294 return false;
4295 if (!QBI->getParent()->hasNUses(2))
4296 return false;
4297
4298 // OK, this is a sequence of two diamonds or triangles.
4299 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4300 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4301 for (auto *BB : {PTB, PFB}) {
4302 if (!BB)
4303 continue;
4304 for (auto &I : *BB)
4305 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4306 PStoreAddresses.insert(SI->getPointerOperand());
4307 }
4308 for (auto *BB : {QTB, QFB}) {
4309 if (!BB)
4310 continue;
4311 for (auto &I : *BB)
4312 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4313 QStoreAddresses.insert(SI->getPointerOperand());
4314 }
4315
4316 set_intersect(PStoreAddresses, QStoreAddresses);
4317 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4318 // clear what it contains.
4319 auto &CommonAddresses = PStoreAddresses;
4320
4321 bool Changed = false;
4322 for (auto *Address : CommonAddresses)
4323 Changed |=
4324 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4325 InvertPCond, InvertQCond, DTU, DL, TTI);
4326 return Changed;
4327}
4328
4329/// If the previous block ended with a widenable branch, determine if reusing
4330/// the target block is profitable and legal. This will have the effect of
4331/// "widening" PBI, but doesn't require us to reason about hosting safety.
4333 DomTreeUpdater *DTU) {
4334 // TODO: This can be generalized in two important ways:
4335 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4336 // values from the PBI edge.
4337 // 2) We can sink side effecting instructions into BI's fallthrough
4338 // successor provided they doesn't contribute to computation of
4339 // BI's condition.
4340 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4341 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4342 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4343 !BI->getParent()->getSinglePredecessor())
4344 return false;
4345 if (!IfFalseBB->phis().empty())
4346 return false; // TODO
4347 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4348 // may undo the transform done here.
4349 // TODO: There might be a more fine-grained solution to this.
4350 if (!llvm::succ_empty(IfFalseBB))
4351 return false;
4352 // Use lambda to lazily compute expensive condition after cheap ones.
4353 auto NoSideEffects = [](BasicBlock &BB) {
4354 return llvm::none_of(BB, [](const Instruction &I) {
4355 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4356 });
4357 };
4358 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4359 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4360 NoSideEffects(*BI->getParent())) {
4361 auto *OldSuccessor = BI->getSuccessor(1);
4362 OldSuccessor->removePredecessor(BI->getParent());
4363 BI->setSuccessor(1, IfFalseBB);
4364 if (DTU)
4365 DTU->applyUpdates(
4366 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4367 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4368 return true;
4369 }
4370 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4371 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4372 NoSideEffects(*BI->getParent())) {
4373 auto *OldSuccessor = BI->getSuccessor(0);
4374 OldSuccessor->removePredecessor(BI->getParent());
4375 BI->setSuccessor(0, IfFalseBB);
4376 if (DTU)
4377 DTU->applyUpdates(
4378 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4379 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4380 return true;
4381 }
4382 return false;
4383}
4384
4385/// If we have a conditional branch as a predecessor of another block,
4386/// this function tries to simplify it. We know
4387/// that PBI and BI are both conditional branches, and BI is in one of the
4388/// successor blocks of PBI - PBI branches to BI.
4390 DomTreeUpdater *DTU,
4391 const DataLayout &DL,
4392 const TargetTransformInfo &TTI) {
4393 assert(PBI->isConditional() && BI->isConditional());
4394 BasicBlock *BB = BI->getParent();
4395
4396 // If this block ends with a branch instruction, and if there is a
4397 // predecessor that ends on a branch of the same condition, make
4398 // this conditional branch redundant.
4399 if (PBI->getCondition() == BI->getCondition() &&
4400 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4401 // Okay, the outcome of this conditional branch is statically
4402 // knowable. If this block had a single pred, handle specially, otherwise
4403 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4404 if (BB->getSinglePredecessor()) {
4405 // Turn this into a branch on constant.
4406 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4407 BI->setCondition(
4408 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4409 return true; // Nuke the branch on constant.
4410 }
4411 }
4412
4413 // If the previous block ended with a widenable branch, determine if reusing
4414 // the target block is profitable and legal. This will have the effect of
4415 // "widening" PBI, but doesn't require us to reason about hosting safety.
4416 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4417 return true;
4418
4419 // If both branches are conditional and both contain stores to the same
4420 // address, remove the stores from the conditionals and create a conditional
4421 // merged store at the end.
4422 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4423 return true;
4424
4425 // If this is a conditional branch in an empty block, and if any
4426 // predecessors are a conditional branch to one of our destinations,
4427 // fold the conditions into logical ops and one cond br.
4428
4429 // Ignore dbg intrinsics.
4430 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4431 return false;
4432
4433 int PBIOp, BIOp;
4434 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4435 PBIOp = 0;
4436 BIOp = 0;
4437 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4438 PBIOp = 0;
4439 BIOp = 1;
4440 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4441 PBIOp = 1;
4442 BIOp = 0;
4443 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4444 PBIOp = 1;
4445 BIOp = 1;
4446 } else {
4447 return false;
4448 }
4449
4450 // Check to make sure that the other destination of this branch
4451 // isn't BB itself. If so, this is an infinite loop that will
4452 // keep getting unwound.
4453 if (PBI->getSuccessor(PBIOp) == BB)
4454 return false;
4455
4456 // If predecessor's branch probability to BB is too low don't merge branches.
4457 SmallVector<uint32_t, 2> PredWeights;
4458 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4459 extractBranchWeights(*PBI, PredWeights) &&
4460 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4461
4463 PredWeights[PBIOp],
4464 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4465
4467 if (CommonDestProb >= Likely)
4468 return false;
4469 }
4470
4471 // Do not perform this transformation if it would require
4472 // insertion of a large number of select instructions. For targets
4473 // without predication/cmovs, this is a big pessimization.
4474
4475 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4476 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4477 unsigned NumPhis = 0;
4478 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4479 ++II, ++NumPhis) {
4480 if (NumPhis > 2) // Disable this xform.
4481 return false;
4482 }
4483
4484 // Finally, if everything is ok, fold the branches to logical ops.
4485 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4486
4487 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4488 << "AND: " << *BI->getParent());
4489
4491
4492 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4493 // branch in it, where one edge (OtherDest) goes back to itself but the other
4494 // exits. We don't *know* that the program avoids the infinite loop
4495 // (even though that seems likely). If we do this xform naively, we'll end up
4496 // recursively unpeeling the loop. Since we know that (after the xform is
4497 // done) that the block *is* infinite if reached, we just make it an obviously
4498 // infinite loop with no cond branch.
4499 if (OtherDest == BB) {
4500 // Insert it at the end of the function, because it's either code,
4501 // or it won't matter if it's hot. :)
4502 BasicBlock *InfLoopBlock =
4503 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4504 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4505 if (DTU)
4506 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4507 OtherDest = InfLoopBlock;
4508 }
4509
4510 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4511
4512 // BI may have other predecessors. Because of this, we leave
4513 // it alone, but modify PBI.
4514
4515 // Make sure we get to CommonDest on True&True directions.
4516 Value *PBICond = PBI->getCondition();
4517 IRBuilder<NoFolder> Builder(PBI);
4518 if (PBIOp)
4519 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4520
4521 Value *BICond = BI->getCondition();
4522 if (BIOp)
4523 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4524
4525 // Merge the conditions.
4526 Value *Cond =
4527 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4528
4529 // Modify PBI to branch on the new condition to the new dests.
4530 PBI->setCondition(Cond);
4531 PBI->setSuccessor(0, CommonDest);
4532 PBI->setSuccessor(1, OtherDest);
4533
4534 if (DTU) {
4535 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4536 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4537
4538 DTU->applyUpdates(Updates);
4539 }
4540
4541 // Update branch weight for PBI.
4542 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4543 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4544 bool HasWeights =
4545 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4546 SuccTrueWeight, SuccFalseWeight);
4547 if (HasWeights) {
4548 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4549 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4550 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4551 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4552 // The weight to CommonDest should be PredCommon * SuccTotal +
4553 // PredOther * SuccCommon.
4554 // The weight to OtherDest should be PredOther * SuccOther.
4555 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4556 PredOther * SuccCommon,
4557 PredOther * SuccOther};
4558 // Halve the weights if any of them cannot fit in an uint32_t
4559 FitWeights(NewWeights);
4560
4561 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4562 }
4563
4564 // OtherDest may have phi nodes. If so, add an entry from PBI's
4565 // block that are identical to the entries for BI's block.
4566 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4567
4568 // We know that the CommonDest already had an edge from PBI to
4569 // it. If it has PHIs though, the PHIs may have different
4570 // entries for BB and PBI's BB. If so, insert a select to make
4571 // them agree.
4572 for (PHINode &PN : CommonDest->phis()) {
4573 Value *BIV = PN.getIncomingValueForBlock(BB);
4574 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4575 Value *PBIV = PN.getIncomingValue(PBBIdx);
4576 if (BIV != PBIV) {
4577 // Insert a select in PBI to pick the right value.
4578 SelectInst *NV = cast<SelectInst>(
4579 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4580 PN.setIncomingValue(PBBIdx, NV);
4581 // Although the select has the same condition as PBI, the original branch
4582 // weights for PBI do not apply to the new select because the select's
4583 // 'logical' edges are incoming edges of the phi that is eliminated, not
4584 // the outgoing edges of PBI.
4585 if (HasWeights) {
4586 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4587 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4588 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4589 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4590 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4591 // The weight to PredOtherDest should be PredOther * SuccCommon.
4592 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4593 PredOther * SuccCommon};
4594
4595 FitWeights(NewWeights);
4596
4597 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4598 /*IsExpected=*/false);
4599 }
4600 }
4601 }
4602
4603 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4604 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4605
4606 // This basic block is probably dead. We know it has at least
4607 // one fewer predecessor.
4608 return true;
4609}
4610
4611// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4612// true or to FalseBB if Cond is false.
4613// Takes care of updating the successors and removing the old terminator.
4614// Also makes sure not to introduce new successors by assuming that edges to
4615// non-successor TrueBBs and FalseBBs aren't reachable.
4616bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4617 Value *Cond, BasicBlock *TrueBB,
4618 BasicBlock *FalseBB,
4619 uint32_t TrueWeight,
4620 uint32_t FalseWeight) {
4621 auto *BB = OldTerm->getParent();
4622 // Remove any superfluous successor edges from the CFG.
4623 // First, figure out which successors to preserve.
4624 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4625 // successor.
4626 BasicBlock *KeepEdge1 = TrueBB;
4627 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4628
4629 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4630
4631 // Then remove the rest.
4632 for (BasicBlock *Succ : successors(OldTerm)) {
4633 // Make sure only to keep exactly one copy of each edge.
4634 if (Succ == KeepEdge1)
4635 KeepEdge1 = nullptr;
4636 else if (Succ == KeepEdge2)
4637 KeepEdge2 = nullptr;
4638 else {
4639 Succ->removePredecessor(BB,
4640 /*KeepOneInputPHIs=*/true);
4641
4642 if (Succ != TrueBB && Succ != FalseBB)
4643 RemovedSuccessors.insert(Succ);
4644 }
4645 }
4646
4647 IRBuilder<> Builder(OldTerm);
4648 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4649
4650 // Insert an appropriate new terminator.
4651 if (!KeepEdge1 && !KeepEdge2) {
4652 if (TrueBB == FalseBB) {
4653 // We were only looking for one successor, and it was present.
4654 // Create an unconditional branch to it.
4655 Builder.CreateBr(TrueBB);
4656 } else {
4657 // We found both of the successors we were looking for.
4658 // Create a conditional branch sharing the condition of the select.
4659 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4660 if (TrueWeight != FalseWeight)
4661 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4662 }
4663 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4664 // Neither of the selected blocks were successors, so this
4665 // terminator must be unreachable.
4666 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4667 } else {
4668 // One of the selected values was a successor, but the other wasn't.
4669 // Insert an unconditional branch to the one that was found;
4670 // the edge to the one that wasn't must be unreachable.
4671 if (!KeepEdge1) {
4672 // Only TrueBB was found.
4673 Builder.CreateBr(TrueBB);
4674 } else {
4675 // Only FalseBB was found.
4676 Builder.CreateBr(FalseBB);
4677 }
4678 }
4679
4681
4682 if (DTU) {
4684 Updates.reserve(RemovedSuccessors.size());
4685 for (auto *RemovedSuccessor : RemovedSuccessors)
4686 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4687 DTU->applyUpdates(Updates);
4688 }
4689
4690 return true;
4691}
4692
4693// Replaces
4694// (switch (select cond, X, Y)) on constant X, Y
4695// with a branch - conditional if X and Y lead to distinct BBs,
4696// unconditional otherwise.
4697bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4698 SelectInst *Select) {
4699 // Check for constant integer values in the select.
4700 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4701 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4702 if (!TrueVal || !FalseVal)
4703 return false;
4704
4705 // Find the relevant condition and destinations.
4706 Value *Condition = Select->getCondition();
4707 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4708 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4709
4710 // Get weight for TrueBB and FalseBB.
4711 uint32_t TrueWeight = 0, FalseWeight = 0;
4713 bool HasWeights = hasBranchWeightMD(*SI);
4714 if (HasWeights) {
4715 GetBranchWeights(SI, Weights);
4716 if (Weights.size() == 1 + SI->getNumCases()) {
4717 TrueWeight =
4718 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4719 FalseWeight =
4720 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4721 }
4722 }
4723
4724 // Perform the actual simplification.
4725 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4726 FalseWeight);
4727}
4728
4729// Replaces
4730// (indirectbr (select cond, blockaddress(@fn, BlockA),
4731// blockaddress(@fn, BlockB)))
4732// with
4733// (br cond, BlockA, BlockB).
4734bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4735 SelectInst *SI) {
4736 // Check that both operands of the select are block addresses.
4737 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4738 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4739 if (!TBA || !FBA)
4740 return false;
4741
4742 // Extract the actual blocks.
4743 BasicBlock *TrueBB = TBA->getBasicBlock();
4744 BasicBlock *FalseBB = FBA->getBasicBlock();
4745
4746 // Perform the actual simplification.
4747 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4748 0);
4749}
4750
4751/// This is called when we find an icmp instruction
4752/// (a seteq/setne with a constant) as the only instruction in a
4753/// block that ends with an uncond branch. We are looking for a very specific
4754/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4755/// this case, we merge the first two "or's of icmp" into a switch, but then the
4756/// default value goes to an uncond block with a seteq in it, we get something
4757/// like:
4758///
4759/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4760/// DEFAULT:
4761/// %tmp = icmp eq i8 %A, 92
4762/// br label %end
4763/// end:
4764/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4765///
4766/// We prefer to split the edge to 'end' so that there is a true/false entry to
4767/// the PHI, merging the third icmp into the switch.
4768bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4769 ICmpInst *ICI, IRBuilder<> &Builder) {
4770 BasicBlock *BB = ICI->getParent();
4771
4772 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4773 // complex.
4774 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4775 return false;
4776
4777 Value *V = ICI->getOperand(0);
4778 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4779
4780 // The pattern we're looking for is where our only predecessor is a switch on
4781 // 'V' and this block is the default case for the switch. In this case we can
4782 // fold the compared value into the switch to simplify things.
4783 BasicBlock *Pred = BB->getSinglePredecessor();
4784 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4785 return false;
4786
4787 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4788 if (SI->getCondition() != V)
4789 return false;
4790
4791 // If BB is reachable on a non-default case, then we simply know the value of
4792 // V in this block. Substitute it and constant fold the icmp instruction
4793 // away.
4794 if (SI->getDefaultDest() != BB) {
4795 ConstantInt *VVal = SI->findCaseDest(BB);
4796 assert(VVal && "Should have a unique destination value");
4797 ICI->setOperand(0, VVal);
4798
4799 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4800 ICI->replaceAllUsesWith(V);
4801 ICI->eraseFromParent();
4802 }
4803 // BB is now empty, so it is likely to simplify away.
4804 return requestResimplify();
4805 }
4806
4807 // Ok, the block is reachable from the default dest. If the constant we're
4808 // comparing exists in one of the other edges, then we can constant fold ICI
4809 // and zap it.
4810 if (SI->findCaseValue(Cst) != SI->case_default()) {
4811 Value *V;
4812 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4814 else
4816
4817 ICI->replaceAllUsesWith(V);
4818 ICI->eraseFromParent();
4819 // BB is now empty, so it is likely to simplify away.
4820 return requestResimplify();
4821 }
4822
4823 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4824 // the block.
4825 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4826 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4827 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4828 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4829 return false;
4830
4831 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4832 // true in the PHI.
4833 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4834 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4835
4836 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4837 std::swap(DefaultCst, NewCst);
4838
4839 // Replace ICI (which is used by the PHI for the default value) with true or
4840 // false depending on if it is EQ or NE.
4841 ICI->replaceAllUsesWith(DefaultCst);
4842 ICI->eraseFromParent();
4843
4845
4846 // Okay, the switch goes to this block on a default value. Add an edge from
4847 // the switch to the merge point on the compared value.
4848 BasicBlock *NewBB =
4849 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4850 {
4852 auto W0 = SIW.getSuccessorWeight(0);
4854 if (W0) {
4855 NewW = ((uint64_t(*W0) + 1) >> 1);
4856 SIW.setSuccessorWeight(0, *NewW);
4857 }
4858 SIW.addCase(Cst, NewBB, NewW);
4859 if (DTU)
4860 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4861 }
4862
4863 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4864 Builder.SetInsertPoint(NewBB);
4865 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4866 Builder.CreateBr(SuccBlock);
4867 PHIUse->addIncoming(NewCst, NewBB);
4868 if (DTU) {
4869 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4870 DTU->applyUpdates(Updates);
4871 }
4872 return true;
4873}
4874
4875/// The specified branch is a conditional branch.
4876/// Check to see if it is branching on an or/and chain of icmp instructions, and
4877/// fold it into a switch instruction if so.
4878bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4879 IRBuilder<> &Builder,
4880 const DataLayout &DL) {
4881 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4882 if (!Cond)
4883 return false;
4884
4885 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4886 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4887 // 'setne's and'ed together, collect them.
4888
4889 // Try to gather values from a chain of and/or to be turned into a switch
4890 ConstantComparesGatherer ConstantCompare(Cond, DL);
4891 // Unpack the result
4892 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4893 Value *CompVal = ConstantCompare.CompValue;
4894 unsigned UsedICmps = ConstantCompare.UsedICmps;
4895 Value *ExtraCase = ConstantCompare.Extra;
4896
4897 // If we didn't have a multiply compared value, fail.
4898 if (!CompVal)
4899 return false;
4900
4901 // Avoid turning single icmps into a switch.
4902 if (UsedICmps <= 1)
4903 return false;
4904
4905 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4906
4907 // There might be duplicate constants in the list, which the switch
4908 // instruction can't handle, remove them now.
4909 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4910 Values.erase(llvm::unique(Values), Values.end());
4911
4912 // If Extra was used, we require at least two switch values to do the
4913 // transformation. A switch with one value is just a conditional branch.
4914 if (ExtraCase && Values.size() < 2)
4915 return false;
4916
4917 // TODO: Preserve branch weight metadata, similarly to how
4918 // FoldValueComparisonIntoPredecessors preserves it.
4919
4920 // Figure out which block is which destination.
4921 BasicBlock *DefaultBB = BI->getSuccessor(1);
4922 BasicBlock *EdgeBB = BI->getSuccessor(0);
4923 if (!TrueWhenEqual)
4924 std::swap(DefaultBB, EdgeBB);
4925
4926 BasicBlock *BB = BI->getParent();
4927
4928 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4929 << " cases into SWITCH. BB is:\n"
4930 << *BB);
4931
4933
4934 // If there are any extra values that couldn't be folded into the switch
4935 // then we evaluate them with an explicit branch first. Split the block
4936 // right before the condbr to handle it.
4937 if (ExtraCase) {
4938 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4939 /*MSSAU=*/nullptr, "switch.early.test");
4940
4941 // Remove the uncond branch added to the old block.
4942 Instruction *OldTI = BB->getTerminator();
4943 Builder.SetInsertPoint(OldTI);
4944
4945 // There can be an unintended UB if extra values are Poison. Before the
4946 // transformation, extra values may not be evaluated according to the
4947 // condition, and it will not raise UB. But after transformation, we are
4948 // evaluating extra values before checking the condition, and it will raise
4949 // UB. It can be solved by adding freeze instruction to extra values.
4950 AssumptionCache *AC = Options.AC;
4951
4952 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4953 ExtraCase = Builder.CreateFreeze(ExtraCase);
4954
4955 if (TrueWhenEqual)
4956 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4957 else
4958 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4959
4960 OldTI->eraseFromParent();
4961
4962 if (DTU)
4963 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4964
4965 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4966 // for the edge we just added.
4967 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4968
4969 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4970 << "\nEXTRABB = " << *BB);
4971 BB = NewBB;
4972 }
4973
4974 Builder.SetInsertPoint(BI);
4975 // Convert pointer to int before we switch.
4976 if (CompVal->getType()->isPointerTy()) {
4977 CompVal = Builder.CreatePtrToInt(
4978 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4979 }
4980
4981 // Create the new switch instruction now.
4982 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4983
4984 // Add all of the 'cases' to the switch instruction.
4985 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4986 New->addCase(Values[i], EdgeBB);
4987
4988 // We added edges from PI to the EdgeBB. As such, if there were any
4989 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4990 // the number of edges added.
4991 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4992 PHINode *PN = cast<PHINode>(BBI);
4993 Value *InVal = PN->getIncomingValueForBlock(BB);
4994 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
4995 PN->addIncoming(InVal, BB);
4996 }
4997
4998 // Erase the old branch instruction.
5000 if (DTU)
5001 DTU->applyUpdates(Updates);
5002
5003 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5004 return true;
5005}
5006
5007bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5008 if (isa<PHINode>(RI->getValue()))
5009 return simplifyCommonResume(RI);
5010 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5011 RI->getValue() == RI->getParent()->getFirstNonPHI())
5012 // The resume must unwind the exception that caused control to branch here.
5013 return simplifySingleResume(RI);
5014
5015 return false;
5016}
5017
5018// Check if cleanup block is empty
5020 for (Instruction &I : R) {
5021 auto *II = dyn_cast<IntrinsicInst>(&I);
5022 if (!II)
5023 return false;
5024
5025 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5026 switch (IntrinsicID) {
5027 case Intrinsic::dbg_declare:
5028 case Intrinsic::dbg_value:
5029 case Intrinsic::dbg_label:
5030 case Intrinsic::lifetime_end:
5031 break;
5032 default:
5033 return false;
5034 }
5035 }
5036 return true;
5037}
5038
5039// Simplify resume that is shared by several landing pads (phi of landing pad).
5040bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5041 BasicBlock *BB = RI->getParent();
5042
5043 // Check that there are no other instructions except for debug and lifetime
5044 // intrinsics between the phi's and resume instruction.
5046 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
5047 return false;
5048
5049 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5050 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5051
5052 // Check incoming blocks to see if any of them are trivial.
5053 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5054 Idx++) {
5055 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5056 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5057
5058 // If the block has other successors, we can not delete it because
5059 // it has other dependents.
5060 if (IncomingBB->getUniqueSuccessor() != BB)
5061 continue;
5062
5063 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5064 // Not the landing pad that caused the control to branch here.
5065 if (IncomingValue != LandingPad)
5066 continue;
5067
5069 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5070 TrivialUnwindBlocks.insert(IncomingBB);
5071 }
5072
5073 // If no trivial unwind blocks, don't do any simplifications.
5074 if (TrivialUnwindBlocks.empty())
5075 return false;
5076
5077 // Turn all invokes that unwind here into calls.
5078 for (auto *TrivialBB : TrivialUnwindBlocks) {
5079 // Blocks that will be simplified should be removed from the phi node.
5080 // Note there could be multiple edges to the resume block, and we need
5081 // to remove them all.
5082 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5083 BB->removePredecessor(TrivialBB, true);
5084
5085 for (BasicBlock *Pred :
5087 removeUnwindEdge(Pred, DTU);
5088 ++NumInvokes;
5089 }
5090
5091 // In each SimplifyCFG run, only the current processed block can be erased.
5092 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5093 // of erasing TrivialBB, we only remove the branch to the common resume
5094 // block so that we can later erase the resume block since it has no
5095 // predecessors.
5096 TrivialBB->getTerminator()->eraseFromParent();
5097 new UnreachableInst(RI->getContext(), TrivialBB);
5098 if (DTU)
5099 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5100 }
5101
5102 // Delete the resume block if all its predecessors have been removed.
5103 if (pred_empty(BB))
5104 DeleteDeadBlock(BB, DTU);
5105
5106 return !TrivialUnwindBlocks.empty();
5107}
5108
5109// Simplify resume that is only used by a single (non-phi) landing pad.
5110bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5111 BasicBlock *BB = RI->getParent();
5112 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5113 assert(RI->getValue() == LPInst &&
5114 "Resume must unwind the exception that caused control to here");
5115
5116 // Check that there are no other instructions except for debug intrinsics.
5118 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5119 return false;
5120
5121 // Turn all invokes that unwind here into calls and delete the basic block.
5123 removeUnwindEdge(Pred, DTU);
5124 ++NumInvokes;
5125 }
5126
5127 // The landingpad is now unreachable. Zap it.
5128 DeleteDeadBlock(BB, DTU);
5129 return true;
5130}
5131
5133 // If this is a trivial cleanup pad that executes no instructions, it can be
5134 // eliminated. If the cleanup pad continues to the caller, any predecessor
5135 // that is an EH pad will be updated to continue to the caller and any
5136 // predecessor that terminates with an invoke instruction will have its invoke
5137 // instruction converted to a call instruction. If the cleanup pad being
5138 // simplified does not continue to the caller, each predecessor will be
5139 // updated to continue to the unwind destination of the cleanup pad being
5140 // simplified.
5141 BasicBlock *BB = RI->getParent();
5142 CleanupPadInst *CPInst = RI->getCleanupPad();
5143 if (CPInst->getParent() != BB)
5144 // This isn't an empty cleanup.
5145 return false;
5146
5147 // We cannot kill the pad if it has multiple uses. This typically arises
5148 // from unreachable basic blocks.
5149 if (!CPInst->hasOneUse())
5150 return false;
5151
5152 // Check that there are no other instructions except for benign intrinsics.
5154 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5155 return false;
5156
5157 // If the cleanup return we are simplifying unwinds to the caller, this will
5158 // set UnwindDest to nullptr.
5159 BasicBlock *UnwindDest = RI->getUnwindDest();
5160 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5161
5162 // We're about to remove BB from the control flow. Before we do, sink any
5163 // PHINodes into the unwind destination. Doing this before changing the
5164 // control flow avoids some potentially slow checks, since we can currently
5165 // be certain that UnwindDest and BB have no common predecessors (since they
5166 // are both EH pads).
5167 if (UnwindDest) {
5168 // First, go through the PHI nodes in UnwindDest and update any nodes that
5169 // reference the block we are removing
5170 for (PHINode &DestPN : UnwindDest->phis()) {
5171 int Idx = DestPN.getBasicBlockIndex(BB);
5172 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5173 assert(Idx != -1);
5174 // This PHI node has an incoming value that corresponds to a control
5175 // path through the cleanup pad we are removing. If the incoming
5176 // value is in the cleanup pad, it must be a PHINode (because we
5177 // verified above that the block is otherwise empty). Otherwise, the
5178 // value is either a constant or a value that dominates the cleanup
5179 // pad being removed.
5180 //
5181 // Because BB and UnwindDest are both EH pads, all of their
5182 // predecessors must unwind to these blocks, and since no instruction
5183 // can have multiple unwind destinations, there will be no overlap in
5184 // incoming blocks between SrcPN and DestPN.
5185 Value *SrcVal = DestPN.getIncomingValue(Idx);
5186 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5187
5188 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5189 for (auto *Pred : predecessors(BB)) {
5190 Value *Incoming =
5191 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5192 DestPN.addIncoming(Incoming, Pred);
5193 }
5194 }
5195
5196 // Sink any remaining PHI nodes directly into UnwindDest.
5197 Instruction *InsertPt = DestEHPad;
5198 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5199 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5200 // If the PHI node has no uses or all of its uses are in this basic
5201 // block (meaning they are debug or lifetime intrinsics), just leave
5202 // it. It will be erased when we erase BB below.
5203 continue;
5204
5205 // Otherwise, sink this PHI node into UnwindDest.
5206 // Any predecessors to UnwindDest which are not already represented
5207 // must be back edges which inherit the value from the path through
5208 // BB. In this case, the PHI value must reference itself.
5209 for (auto *pred : predecessors(UnwindDest))
5210 if (pred != BB)
5211 PN.addIncoming(&PN, pred);
5212 PN.moveBefore(InsertPt);
5213 // Also, add a dummy incoming value for the original BB itself,
5214 // so that the PHI is well-formed until we drop said predecessor.
5215 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5216 }
5217 }
5218
5219 std::vector<DominatorTree::UpdateType> Updates;
5220
5221 // We use make_early_inc_range here because we will remove all predecessors.
5223 if (UnwindDest == nullptr) {
5224 if (DTU) {
5225 DTU->applyUpdates(Updates);
5226 Updates.clear();
5227 }
5228 removeUnwindEdge(PredBB, DTU);
5229 ++NumInvokes;
5230 } else {
5231 BB->removePredecessor(PredBB);
5232 Instruction *TI = PredBB->getTerminator();
5233 TI->replaceUsesOfWith(BB, UnwindDest);
5234 if (DTU) {
5235 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5236 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5237 }
5238 }
5239 }
5240
5241 if (DTU)
5242 DTU->applyUpdates(Updates);
5243
5244 DeleteDeadBlock(BB, DTU);
5245
5246 return true;
5247}
5248
5249// Try to merge two cleanuppads together.
5251 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5252 // with.
5253 BasicBlock *UnwindDest = RI->getUnwindDest();
5254 if (!UnwindDest)
5255 return false;
5256
5257 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5258 // be safe to merge without code duplication.
5259 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5260 return false;
5261
5262 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5263 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5264 if (!SuccessorCleanupPad)
5265 return false;
5266
5267 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5268 // Replace any uses of the successor cleanupad with the predecessor pad
5269 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5270 // funclet bundle operands.
5271 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5272 // Remove the old cleanuppad.
5273 SuccessorCleanupPad->eraseFromParent();
5274 // Now, we simply replace the cleanupret with a branch to the unwind
5275 // destination.
5276 BranchInst::Create(UnwindDest, RI->getParent());
5277 RI->eraseFromParent();
5278
5279 return true;
5280}
5281
5282bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5283 // It is possible to transiantly have an undef cleanuppad operand because we
5284 // have deleted some, but not all, dead blocks.
5285 // Eventually, this block will be deleted.
5286 if (isa<UndefValue>(RI->getOperand(0)))
5287 return false;
5288
5289 if (mergeCleanupPad(RI))
5290 return true;
5291
5292 if (removeEmptyCleanup(RI, DTU))
5293 return true;
5294
5295 return false;
5296}
5297
5298// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5299bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5300 BasicBlock *BB = UI->getParent();
5301
5302 bool Changed = false;
5303
5304 // Ensure that any debug-info records that used to occur after the Unreachable
5305 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5306 // the block.
5308
5309 // Debug-info records on the unreachable inst itself should be deleted, as
5310 // below we delete everything past the final executable instruction.
5311 UI->dropDbgRecords();
5312
5313 // If there are any instructions immediately before the unreachable that can
5314 // be removed, do so.
5315 while (UI->getIterator() != BB->begin()) {
5317 --BBI;
5318
5320 break; // Can not drop any more instructions. We're done here.
5321 // Otherwise, this instruction can be freely erased,
5322 // even if it is not side-effect free.
5323
5324 // Note that deleting EH's here is in fact okay, although it involves a bit
5325 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5326 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5327 // and we can therefore guarantee this block will be erased.
5328
5329 // If we're deleting this, we're deleting any subsequent debug info, so
5330 // delete DbgRecords.
5331 BBI->dropDbgRecords();
5332
5333 // Delete this instruction (any uses are guaranteed to be dead)
5334 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5335 BBI->eraseFromParent();
5336 Changed = true;
5337 }
5338
5339 // If the unreachable instruction is the first in the block, take a gander
5340 // at all of the predecessors of this instruction, and simplify them.
5341 if (&BB->front() != UI)
5342 return Changed;
5343
5344 std::vector<DominatorTree::UpdateType> Updates;
5345
5347 for (BasicBlock *Predecessor : Preds) {
5348 Instruction *TI = Predecessor->getTerminator();
5349 IRBuilder<> Builder(TI);
5350 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5351 // We could either have a proper unconditional branch,
5352 // or a degenerate conditional branch with matching destinations.
5353 if (all_of(BI->successors(),
5354 [BB](auto *Successor) { return Successor == BB; })) {
5355 new UnreachableInst(TI->getContext(), TI->getIterator());
5356 TI->eraseFromParent();
5357 Changed = true;
5358 } else {
5359 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5360 Value* Cond = BI->getCondition();
5361 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5362 "The destinations are guaranteed to be different here.");
5363 CallInst *Assumption;
5364 if (BI->getSuccessor(0) == BB) {
5365 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5366 Builder.CreateBr(BI->getSuccessor(1));
5367 } else {
5368 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5369 Assumption = Builder.CreateAssumption(Cond);
5370 Builder.CreateBr(BI->getSuccessor(0));
5371 }
5372 if (Options.AC)
5373 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5374
5376 Changed = true;
5377 }
5378 if (DTU)
5379 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5380 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5382 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5383 if (i->getCaseSuccessor() != BB) {
5384 ++i;
5385 continue;
5386 }
5387 BB->removePredecessor(SU->getParent());
5388 i = SU.removeCase(i);
5389 e = SU->case_end();
5390 Changed = true;
5391 }
5392 // Note that the default destination can't be removed!
5393 if (DTU && SI->getDefaultDest() != BB)
5394 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5395 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5396 if (II->getUnwindDest() == BB) {
5397 if (DTU) {
5398 DTU->applyUpdates(Updates);
5399 Updates.clear();
5400 }
5401 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5402 if (!CI->doesNotThrow())
5403 CI->setDoesNotThrow();
5404 Changed = true;
5405 }
5406 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5407 if (CSI->getUnwindDest() == BB) {
5408 if (DTU) {
5409 DTU->applyUpdates(Updates);
5410 Updates.clear();
5411 }
5412 removeUnwindEdge(TI->getParent(), DTU);
5413 Changed = true;
5414 continue;
5415 }
5416
5417 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5418 E = CSI->handler_end();
5419 I != E; ++I) {
5420 if (*I == BB) {
5421 CSI->removeHandler(I);
5422 --I;
5423 --E;
5424 Changed = true;
5425 }
5426 }
5427 if (DTU)
5428 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5429 if (CSI->getNumHandlers() == 0) {
5430 if (CSI->hasUnwindDest()) {
5431 // Redirect all predecessors of the block containing CatchSwitchInst
5432 // to instead branch to the CatchSwitchInst's unwind destination.
5433 if (DTU) {
5434 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5435 Updates.push_back({DominatorTree::Insert,
5436 PredecessorOfPredecessor,
5437 CSI->getUnwindDest()});
5438 Updates.push_back({DominatorTree::Delete,
5439 PredecessorOfPredecessor, Predecessor});
5440 }
5441 }
5442 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5443 } else {
5444 // Rewrite all preds to unwind to caller (or from invoke to call).
5445 if (DTU) {
5446 DTU->applyUpdates(Updates);
5447 Updates.clear();
5448 }
5449 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5450 for (BasicBlock *EHPred : EHPreds)
5451 removeUnwindEdge(EHPred, DTU);
5452 }
5453 // The catchswitch is no longer reachable.
5454 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5455 CSI->eraseFromParent();
5456 Changed = true;
5457 }
5458 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5459 (void)CRI;
5460 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5461 "Expected to always have an unwind to BB.");
5462 if (DTU)
5463 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5464 new UnreachableInst(TI->getContext(), TI->getIterator());
5465 TI->eraseFromParent();
5466 Changed = true;
5467 }
5468 }
5469
5470 if (DTU)
5471 DTU->applyUpdates(Updates);
5472
5473 // If this block is now dead, remove it.
5474 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5475 DeleteDeadBlock(BB, DTU);
5476 return true;
5477 }
5478
5479 return Changed;
5480}
5481
5483 assert(Cases.size() >= 1);
5484
5486 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5487 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5488 return false;
5489 }
5490 return true;
5491}
5492
5494 DomTreeUpdater *DTU,
5495 bool RemoveOrigDefaultBlock = true) {
5496 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5497 auto *BB = Switch->getParent();
5498 auto *OrigDefaultBlock = Switch->getDefaultDest();
5499 if (RemoveOrigDefaultBlock)
5500 OrigDefaultBlock->removePredecessor(BB);
5501 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5502 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5503 OrigDefaultBlock);
5504 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5505 Switch->setDefaultDest(&*NewDefaultBlock);
5506 if (DTU) {
5508 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5509 if (RemoveOrigDefaultBlock &&
5510 !is_contained(successors(BB), OrigDefaultBlock))
5511 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5512 DTU->applyUpdates(Updates);
5513 }
5514}
5515
5516/// Turn a switch into an integer range comparison and branch.
5517/// Switches with more than 2 destinations are ignored.
5518/// Switches with 1 destination are also ignored.
5519bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5520 IRBuilder<> &Builder) {
5521 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5522
5523 bool HasDefault =
5524 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5525
5526 auto *BB = SI->getParent();
5527
5528 // Partition the cases into two sets with different destinations.
5529 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5530 BasicBlock *DestB = nullptr;
5533
5534 for (auto Case : SI->cases()) {
5535 BasicBlock *Dest = Case.getCaseSuccessor();
5536 if (!DestA)
5537 DestA = Dest;
5538 if (Dest == DestA) {
5539 CasesA.push_back(Case.getCaseValue());
5540 continue;
5541 }
5542 if (!DestB)
5543 DestB = Dest;
5544 if (Dest == DestB) {
5545 CasesB.push_back(Case.getCaseValue());
5546 continue;
5547 }
5548 return false; // More than two destinations.
5549 }
5550 if (!DestB)
5551 return false; // All destinations are the same and the default is unreachable
5552
5553 assert(DestA && DestB &&
5554 "Single-destination switch should have been folded.");
5555 assert(DestA != DestB);
5556 assert(DestB != SI->getDefaultDest());
5557 assert(!CasesB.empty() && "There must be non-default cases.");
5558 assert(!CasesA.empty() || HasDefault);
5559
5560 // Figure out if one of the sets of cases form a contiguous range.
5561 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5562 BasicBlock *ContiguousDest = nullptr;
5563 BasicBlock *OtherDest = nullptr;
5564 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5565 ContiguousCases = &CasesA;
5566 ContiguousDest = DestA;
5567 OtherDest = DestB;
5568 } else if (CasesAreContiguous(CasesB)) {
5569 ContiguousCases = &CasesB;
5570 ContiguousDest = DestB;
5571 OtherDest = DestA;
5572 } else
5573 return false;
5574
5575 // Start building the compare and branch.
5576
5577 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5578 Constant *NumCases =
5579 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5580
5581 Value *Sub = SI->getCondition();
5582 if (!Offset->isNullValue())
5583 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5584
5585 Value *Cmp;
5586 // If NumCases overflowed, then all possible values jump to the successor.
5587 if (NumCases->isNullValue() && !ContiguousCases->empty())
5588 Cmp = ConstantInt::getTrue(SI->getContext());
5589 else
5590 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5591 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5592
5593 // Update weight for the newly-created conditional branch.
5594 if (hasBranchWeightMD(*SI)) {
5596 GetBranchWeights(SI, Weights);
5597 if (Weights.size() == 1 + SI->getNumCases()) {
5598 uint64_t TrueWeight = 0;
5599 uint64_t FalseWeight = 0;
5600 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5601 if (SI->getSuccessor(I) == ContiguousDest)
5602 TrueWeight += Weights[I];
5603 else
5604 FalseWeight += Weights[I];
5605 }
5606 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5607 TrueWeight /= 2;
5608 FalseWeight /= 2;
5609 }
5610 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5611 }
5612 }
5613
5614 // Prune obsolete incoming values off the successors' PHI nodes.
5615 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5616 unsigned PreviousEdges = ContiguousCases->size();
5617 if (ContiguousDest == SI->getDefaultDest())
5618 ++PreviousEdges;
5619 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5620 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5621 }
5622 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5623 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5624 if (OtherDest == SI->getDefaultDest())
5625 ++PreviousEdges;
5626 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5627 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5628 }
5629
5630 // Clean up the default block - it may have phis or other instructions before
5631 // the unreachable terminator.
5632 if (!HasDefault)
5634
5635 auto *UnreachableDefault = SI->getDefaultDest();
5636
5637 // Drop the switch.
5638 SI->eraseFromParent();
5639
5640 if (!HasDefault && DTU)
5641 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5642
5643 return true;
5644}
5645
5646/// Compute masked bits for the condition of a switch
5647/// and use it to remove dead cases.
5649 AssumptionCache *AC,
5650 const DataLayout &DL) {
5651 Value *Cond = SI->getCondition();
5652 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5653
5654 // We can also eliminate cases by determining that their values are outside of
5655 // the limited range of the condition based on how many significant (non-sign)
5656 // bits are in the condition value.
5657 unsigned MaxSignificantBitsInCond =
5658 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5659
5660 // Gather dead cases.
5662 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5663 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5664 for (const auto &Case : SI->cases()) {
5665 auto *Successor = Case.getCaseSuccessor();
5666 if (DTU) {
5667 if (!NumPerSuccessorCases.count(Successor))
5668 UniqueSuccessors.push_back(Successor);
5669 ++NumPerSuccessorCases[Successor];
5670 }
5671 const APInt &CaseVal = Case.getCaseValue()->getValue();
5672 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5673 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5674 DeadCases.push_back(Case.getCaseValue());
5675 if (DTU)
5676 --NumPerSuccessorCases[Successor];
5677 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5678 << " is dead.\n");
5679 }
5680 }
5681
5682 // If we can prove that the cases must cover all possible values, the
5683 // default destination becomes dead and we can remove it. If we know some
5684 // of the bits in the value, we can use that to more precisely compute the
5685 // number of possible unique case values.
5686 bool HasDefault =
5687 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5688 const unsigned NumUnknownBits =
5689 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5690 assert(NumUnknownBits <= Known.getBitWidth());
5691 if (HasDefault && DeadCases.empty() &&
5692 NumUnknownBits < 64 /* avoid overflow */) {
5693 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5694 if (SI->getNumCases() == AllNumCases) {
5696 return true;
5697 }
5698 // When only one case value is missing, replace default with that case.
5699 // Eliminating the default branch will provide more opportunities for
5700 // optimization, such as lookup tables.
5701 if (SI->getNumCases() == AllNumCases - 1) {
5702 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5703 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5704 if (CondTy->getIntegerBitWidth() > 64 ||
5705 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5706 return false;
5707
5708 uint64_t MissingCaseVal = 0;
5709 for (const auto &Case : SI->cases())
5710 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5711 auto *MissingCase =
5712 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5714 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5715 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5716 SIW.setSuccessorWeight(0, 0);
5717 return true;
5718 }
5719 }
5720
5721 if (DeadCases.empty())
5722 return false;
5723
5725 for (ConstantInt *DeadCase : DeadCases) {
5726 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5727 assert(CaseI != SI->case_default() &&
5728 "Case was not found. Probably mistake in DeadCases forming.");
5729 // Prune unused values from PHI nodes.
5730 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5731 SIW.removeCase(CaseI);
5732 }
5733
5734 if (DTU) {
5735 std::vector<DominatorTree::UpdateType> Updates;
5736 for (auto *Successor : UniqueSuccessors)
5737 if (NumPerSuccessorCases[Successor] == 0)
5738 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5739 DTU->applyUpdates(Updates);
5740 }
5741
5742 return true;
5743}
5744
5745/// If BB would be eligible for simplification by
5746/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5747/// by an unconditional branch), look at the phi node for BB in the successor
5748/// block and see if the incoming value is equal to CaseValue. If so, return
5749/// the phi node, and set PhiIndex to BB's index in the phi node.
5751 BasicBlock *BB, int *PhiIndex) {
5752 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5753 return nullptr; // BB must be empty to be a candidate for simplification.
5754 if (!BB->getSinglePredecessor())
5755 return nullptr; // BB must be dominated by the switch.
5756
5757 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5758 if (!Branch || !Branch->isUnconditional())
5759 return nullptr; // Terminator must be unconditional branch.
5760
5761 BasicBlock *Succ = Branch->getSuccessor(0);
5762
5763 for (PHINode &PHI : Succ->phis()) {
5764 int Idx = PHI.getBasicBlockIndex(BB);
5765 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5766
5767 Value *InValue = PHI.getIncomingValue(Idx);
5768 if (InValue != CaseValue)
5769 continue;
5770
5771 *PhiIndex = Idx;
5772 return &PHI;
5773 }
5774
5775 return nullptr;
5776}
5777
5778/// Try to forward the condition of a switch instruction to a phi node
5779/// dominated by the switch, if that would mean that some of the destination
5780/// blocks of the switch can be folded away. Return true if a change is made.
5782 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5783
5784 ForwardingNodesMap ForwardingNodes;
5785 BasicBlock *SwitchBlock = SI->getParent();
5786 bool Changed = false;
5787 for (const auto &Case : SI->cases()) {
5788 ConstantInt *CaseValue = Case.getCaseValue();
5789 BasicBlock *CaseDest = Case.getCaseSuccessor();
5790
5791 // Replace phi operands in successor blocks that are using the constant case
5792 // value rather than the switch condition variable:
5793 // switchbb:
5794 // switch i32 %x, label %default [
5795 // i32 17, label %succ
5796 // ...
5797 // succ:
5798 // %r = phi i32 ... [ 17, %switchbb ] ...
5799 // -->
5800 // %r = phi i32 ... [ %x, %switchbb ] ...
5801
5802 for (PHINode &Phi : CaseDest->phis()) {
5803 // This only works if there is exactly 1 incoming edge from the switch to
5804 // a phi. If there is >1, that means multiple cases of the switch map to 1
5805 // value in the phi, and that phi value is not the switch condition. Thus,
5806 // this transform would not make sense (the phi would be invalid because
5807 // a phi can't have different incoming values from the same block).
5808 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5809 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5810 count(Phi.blocks(), SwitchBlock) == 1) {
5811 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5812 Changed = true;
5813 }
5814 }
5815
5816 // Collect phi nodes that are indirectly using this switch's case constants.
5817 int PhiIdx;
5818 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5819 ForwardingNodes[Phi].push_back(PhiIdx);
5820 }
5821
5822 for (auto &ForwardingNode : ForwardingNodes) {
5823 PHINode *Phi = ForwardingNode.first;
5824 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5825 // Check if it helps to fold PHI.
5826 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
5827 continue;
5828
5829 for (int Index : Indexes)
5830 Phi->setIncomingValue(Index, SI->getCondition());
5831 Changed = true;
5832 }
5833
5834 return Changed;
5835}
5836
5837/// Return true if the backend will be able to handle
5838/// initializing an array of constants like C.
5840 if (C->isThreadDependent())
5841 return false;
5842 if (C->isDLLImportDependent())
5843 return false;
5844
5845 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5846 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5847 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5848 return false;
5849
5850 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5851 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5852 // materializing the array of constants.
5853 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5854 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5855 return false;
5856 }
5857
5859 return false;
5860
5861 return true;
5862}
5863
5864/// If V is a Constant, return it. Otherwise, try to look up
5865/// its constant value in ConstantPool, returning 0 if it's not there.
5866static Constant *
5869 if (Constant *C = dyn_cast<Constant>(V))
5870 return C;
5871 return ConstantPool.lookup(V);
5872}
5873
5874/// Try to fold instruction I into a constant. This works for
5875/// simple instructions such as binary operations where both operands are
5876/// constant or can be replaced by constants from the ConstantPool. Returns the
5877/// resulting constant on success, 0 otherwise.
5878static Constant *
5881 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5882 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5883 if (!A)
5884 return nullptr;
5885 if (A->isAllOnesValue())
5886 return LookupConstant(Select->getTrueValue(), ConstantPool);
5887 if (A->isNullValue())
5888 return LookupConstant(Select->getFalseValue(), ConstantPool);
5889 return nullptr;
5890 }
5891
5893 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5894 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5895 COps.push_back(A);
5896 else
5897 return nullptr;
5898 }
5899
5900 return ConstantFoldInstOperands(I, COps, DL);
5901}
5902
5903/// Try to determine the resulting constant values in phi nodes
5904/// at the common destination basic block, *CommonDest, for one of the case
5905/// destionations CaseDest corresponding to value CaseVal (0 for the default
5906/// case), of a switch instruction SI.
5907static bool
5909 BasicBlock **CommonDest,
5910 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5911 const DataLayout &DL, const TargetTransformInfo &TTI) {
5912 // The block from which we enter the common destination.
5913 BasicBlock *Pred = SI->getParent();
5914
5915 // If CaseDest is empty except for some side-effect free instructions through
5916 // which we can constant-propagate the CaseVal, continue to its successor.
5918 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5919 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5920 if (I.isTerminator()) {
5921 // If the terminator is a simple branch, continue to the next block.
5922 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5923 return false;
5924 Pred = CaseDest;
5925 CaseDest = I.getSuccessor(0);
5926 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5927 // Instruction is side-effect free and constant.
5928
5929 // If the instruction has uses outside this block or a phi node slot for
5930 // the block, it is not safe to bypass the instruction since it would then
5931 // no longer dominate all its uses.
5932 for (auto &Use : I.uses()) {
5933 User *User = Use.getUser();
5934 if (Instruction *I = dyn_cast<Instruction>(User))
5935 if (I->getParent() == CaseDest)
5936 continue;
5937 if (PHINode *Phi = dyn_cast<PHINode>(User))
5938 if (Phi->getIncomingBlock(Use) == CaseDest)
5939 continue;
5940 return false;
5941 }
5942
5943 ConstantPool.insert(std::make_pair(&I, C));
5944 } else {
5945 break;
5946 }
5947 }
5948
5949 // If we did not have a CommonDest before, use the current one.
5950 if (!*CommonDest)
5951 *CommonDest = CaseDest;
5952 // If the destination isn't the common one, abort.
5953 if (CaseDest != *CommonDest)
5954 return false;
5955
5956 // Get the values for this case from phi nodes in the destination block.
5957 for (PHINode &PHI : (*CommonDest)->phis()) {
5958 int Idx = PHI.getBasicBlockIndex(Pred);
5959 if (Idx == -1)
5960 continue;
5961
5962 Constant *ConstVal =
5963 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5964 if (!ConstVal)
5965 return false;
5966
5967 // Be conservative about which kinds of constants we support.
5968 if (!ValidLookupTableConstant(ConstVal, TTI))
5969 return false;
5970
5971 Res.push_back(std::make_pair(&PHI, ConstVal));
5972 }
5973
5974 return Res.size() > 0;
5975}
5976
5977// Helper function used to add CaseVal to the list of cases that generate
5978// Result. Returns the updated number of cases that generate this result.
5979static size_t mapCaseToResult(ConstantInt *CaseVal,
5980 SwitchCaseResultVectorTy &UniqueResults,
5981 Constant *Result) {
5982 for (auto &I : UniqueResults) {
5983 if (I.first == Result) {
5984 I.second.push_back(CaseVal);
5985 return I.second.size();
5986 }
5987 }
5988 UniqueResults.push_back(
5989 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5990 return 1;
5991}
5992
5993// Helper function that initializes a map containing
5994// results for the PHI node of the common destination block for a switch
5995// instruction. Returns false if multiple PHI nodes have been found or if
5996// there is not a common destination block for the switch.
5998 BasicBlock *&CommonDest,
5999 SwitchCaseResultVectorTy &UniqueResults,
6000 Constant *&DefaultResult,
6001 const DataLayout &DL,
6002 const TargetTransformInfo &TTI,
6003 uintptr_t MaxUniqueResults) {
6004 for (const auto &I : SI->cases()) {
6005 ConstantInt *CaseVal = I.getCaseValue();
6006
6007 // Resulting value at phi nodes for this case value.
6008 SwitchCaseResultsTy Results;
6009 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6010 DL, TTI))
6011 return false;
6012
6013 // Only one value per case is permitted.
6014 if (Results.size() > 1)
6015 return false;
6016
6017 // Add the case->result mapping to UniqueResults.
6018 const size_t NumCasesForResult =
6019 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6020
6021 // Early out if there are too many cases for this result.
6022 if (NumCasesForResult > MaxSwitchCasesPerResult)
6023 return false;
6024
6025 // Early out if there are too many unique results.
6026 if (UniqueResults.size() > MaxUniqueResults)
6027 return false;
6028
6029 // Check the PHI consistency.
6030 if (!PHI)
6031 PHI = Results[0].first;
6032 else if (PHI != Results[0].first)
6033 return false;
6034 }
6035 // Find the default result value.
6037 BasicBlock *DefaultDest = SI->getDefaultDest();
6038 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6039 DL, TTI);
6040 // If the default value is not found abort unless the default destination
6041 // is unreachable.
6042 DefaultResult =
6043 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6044 if ((!DefaultResult &&
6045 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6046 return false;
6047
6048 return true;
6049}
6050
6051// Helper function that checks if it is possible to transform a switch with only
6052// two cases (or two cases + default) that produces a result into a select.
6053// TODO: Handle switches with more than 2 cases that map to the same result.
6054static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6055 Constant *DefaultResult, Value *Condition,
6056 IRBuilder<> &Builder) {
6057 // If we are selecting between only two cases transform into a simple
6058 // select or a two-way select if default is possible.
6059 // Example:
6060 // switch (a) { %0 = icmp eq i32 %a, 10
6061 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6062 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6063 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6064 // }
6065 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6066 ResultVector[1].second.size() == 1) {
6067 ConstantInt *FirstCase = ResultVector[0].second[0];
6068 ConstantInt *SecondCase = ResultVector[1].second[0];
6069 Value *SelectValue = ResultVector[1].first;
6070 if (DefaultResult) {
6071 Value *ValueCompare =
6072 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6073 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6074 DefaultResult, "switch.select");
6075 }
6076 Value *ValueCompare =
6077 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6078 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6079 SelectValue, "switch.select");
6080 }
6081
6082 // Handle the degenerate case where two cases have the same result value.
6083 if (ResultVector.size() == 1 && DefaultResult) {
6084 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6085 unsigned CaseCount = CaseValues.size();
6086 // n bits group cases map to the same result:
6087 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6088 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6089 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6090 if (isPowerOf2_32(CaseCount)) {
6091 ConstantInt *MinCaseVal = CaseValues[0];
6092 // Find mininal value.
6093 for (auto *Case : CaseValues)
6094 if (Case->getValue().slt(MinCaseVal->getValue()))
6095 MinCaseVal = Case;
6096
6097 // Mark the bits case number touched.
6098 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6099 for (auto *Case : CaseValues)
6100 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6101
6102 // Check if cases with the same result can cover all number
6103 // in touched bits.
6104 if (BitMask.popcount() == Log2_32(CaseCount)) {
6105 if (!MinCaseVal->isNullValue())
6106 Condition = Builder.CreateSub(Condition, MinCaseVal);
6107 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6108 Value *Cmp = Builder.CreateICmpEQ(
6109 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6110 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6111 }
6112 }
6113
6114 // Handle the degenerate case where two cases have the same value.
6115 if (CaseValues.size() == 2) {
6116 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6117 "switch.selectcmp.case1");
6118 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6119 "switch.selectcmp.case2");
6120 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6121 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6122 }
6123 }
6124
6125 return nullptr;
6126}
6127
6128// Helper function to cleanup a switch instruction that has been converted into
6129// a select, fixing up PHI nodes and basic blocks.
6131 Value *SelectValue,
6132 IRBuilder<> &Builder,
6133 DomTreeUpdater *DTU) {
6134 std::vector<DominatorTree::UpdateType> Updates;
6135
6136 BasicBlock *SelectBB = SI->getParent();
6137 BasicBlock *DestBB = PHI->getParent();
6138
6139 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6140 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6141 Builder.CreateBr(DestBB);
6142
6143 // Remove the switch.
6144
6145 PHI->removeIncomingValueIf(
6146 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6147 PHI->addIncoming(SelectValue, SelectBB);
6148
6149 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6150 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6151 BasicBlock *Succ = SI->getSuccessor(i);
6152
6153 if (Succ == DestBB)
6154 continue;
6155 Succ->removePredecessor(SelectBB);
6156 if (DTU && RemovedSuccessors.insert(Succ).second)
6157 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6158 }
6159 SI->eraseFromParent();
6160 if (DTU)
6161 DTU->applyUpdates(Updates);
6162}
6163
6164/// If a switch is only used to initialize one or more phi nodes in a common
6165/// successor block with only two different constant values, try to replace the
6166/// switch with a select. Returns true if the fold was made.
6167static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6168 DomTreeUpdater *DTU, const DataLayout &DL,
6169 const TargetTransformInfo &TTI) {
6170 Value *const Cond = SI->getCondition();
6171 PHINode *PHI = nullptr;
6172 BasicBlock *CommonDest = nullptr;
6173 Constant *DefaultResult;
6174 SwitchCaseResultVectorTy UniqueResults;
6175 // Collect all the cases that will deliver the same value from the switch.
6176 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6177 DL, TTI, /*MaxUniqueResults*/ 2))
6178 return false;
6179
6180 assert(PHI != nullptr && "PHI for value select not found");
6181 Builder.SetInsertPoint(SI);
6182 Value *SelectValue =
6183 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6184 if (!SelectValue)
6185 return false;
6186
6187 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6188 return true;
6189}
6190
6191namespace {
6192
6193/// This class represents a lookup table that can be used to replace a switch.
6194class SwitchLookupTable {
6195public:
6196 /// Create a lookup table to use as a switch replacement with the contents
6197 /// of Values, using DefaultValue to fill any holes in the table.
6198 SwitchLookupTable(
6199 Module &M, uint64_t TableSize, ConstantInt *Offset,
6200 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6201 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6202
6203 /// Build instructions with Builder to retrieve the value at
6204 /// the position given by Index in the lookup table.
6205 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6206
6207 /// Return true if a table with TableSize elements of
6208 /// type ElementType would fit in a target-legal register.
6209 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6210 Type *ElementType);
6211
6212private:
6213 // Depending on the contents of the table, it can be represented in
6214 // different ways.
6215 enum {
6216 // For tables where each element contains the same value, we just have to
6217 // store that single value and return it for each lookup.
6218 SingleValueKind,
6219
6220 // For tables where there is a linear relationship between table index
6221 // and values. We calculate the result with a simple multiplication
6222 // and addition instead of a table lookup.
6223 LinearMapKind,
6224
6225 // For small tables with integer elements, we can pack them into a bitmap
6226 // that fits into a target-legal register. Values are retrieved by
6227 // shift and mask operations.
6228 BitMapKind,
6229
6230 // The table is stored as an array of values. Values are retrieved by load
6231 // instructions from the table.
6232 ArrayKind
6233 } Kind;
6234
6235 // For SingleValueKind, this is the single value.
6236 Constant *SingleValue = nullptr;
6237
6238 // For BitMapKind, this is the bitmap.
6239 ConstantInt *BitMap = nullptr;
6240 IntegerType *BitMapElementTy = nullptr;
6241
6242 // For LinearMapKind, these are the constants used to derive the value.
6243 ConstantInt *LinearOffset = nullptr;
6244 ConstantInt *LinearMultiplier = nullptr;
6245 bool LinearMapValWrapped = false;
6246
6247 // For ArrayKind, this is the array.
6248 GlobalVariable *Array = nullptr;
6249};
6250
6251} // end anonymous namespace
6252
6253SwitchLookupTable::SwitchLookupTable(
6254 Module &M, uint64_t TableSize, ConstantInt *Offset,
6255 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6256 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6257 assert(Values.size() && "Can't build lookup table without values!");
6258 assert(TableSize >= Values.size() && "Can't fit values in table!");
6259
6260 // If all values in the table are equal, this is that value.
6261 SingleValue = Values.begin()->second;
6262
6263 Type *ValueType = Values.begin()->second->getType();
6264
6265 // Build up the table contents.
6266 SmallVector<Constant *, 64> TableContents(TableSize);
6267 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6268 ConstantInt *CaseVal = Values[I].first;
6269 Constant *CaseRes = Values[I].second;
6270 assert(CaseRes->getType() == ValueType);
6271
6272 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6273 TableContents[Idx] = CaseRes;
6274
6275 if (CaseRes != SingleValue)
6276 SingleValue = nullptr;
6277 }
6278
6279 // Fill in any holes in the table with the default result.
6280 if (Values.size() < TableSize) {
6281 assert(DefaultValue &&
6282 "Need a default value to fill the lookup table holes.");
6283 assert(DefaultValue->getType() == ValueType);
6284 for (uint64_t I = 0; I < TableSize; ++I) {
6285 if (!TableContents[I])
6286 TableContents[I] = DefaultValue;
6287 }
6288
6289 if (DefaultValue != SingleValue)
6290 SingleValue = nullptr;
6291 }
6292
6293 // If each element in the table contains the same value, we only need to store
6294 // that single value.
6295 if (SingleValue) {
6296 Kind = SingleValueKind;
6297 return;
6298 }
6299
6300 // Check if we can derive the value with a linear transformation from the
6301 // table index.
6302 if (isa<IntegerType>(ValueType)) {
6303 bool LinearMappingPossible = true;
6304 APInt PrevVal;
6305 APInt DistToPrev;
6306 // When linear map is monotonic and signed overflow doesn't happen on
6307 // maximum index, we can attach nsw on Add and Mul.
6308 bool NonMonotonic = false;
6309 assert(TableSize >= 2 && "Should be a SingleValue table.");
6310 // Check if there is the same distance between two consecutive values.
6311 for (uint64_t I = 0; I < TableSize; ++I) {
6312 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6313 if (!ConstVal) {
6314 // This is an undef. We could deal with it, but undefs in lookup tables
6315 // are very seldom. It's probably not worth the additional complexity.
6316 LinearMappingPossible = false;
6317 break;
6318 }
6319 const APInt &Val = ConstVal->getValue();
6320 if (I != 0) {
6321 APInt Dist = Val - PrevVal;
6322 if (I == 1) {
6323 DistToPrev = Dist;
6324 } else if (Dist != DistToPrev) {
6325 LinearMappingPossible = false;
6326 break;
6327 }
6328 NonMonotonic |=
6329 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6330 }
6331 PrevVal = Val;
6332 }
6333 if (LinearMappingPossible) {
6334 LinearOffset = cast<ConstantInt>(TableContents[0]);
6335 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6336 bool MayWrap = false;
6337 APInt M = LinearMultiplier->getValue();
6338 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6339 LinearMapValWrapped = NonMonotonic || MayWrap;
6340 Kind = LinearMapKind;
6341 ++NumLinearMaps;
6342 return;
6343 }
6344 }
6345
6346 // If the type is integer and the table fits in a register, build a bitmap.
6347 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6348 IntegerType *IT = cast<IntegerType>(ValueType);
6349 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6350 for (uint64_t I = TableSize; I > 0; --I) {
6351 TableInt <<= IT->getBitWidth();
6352 // Insert values into the bitmap. Undef values are set to zero.
6353 if (!isa<UndefValue>(TableContents[I - 1])) {
6354 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6355 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6356 }
6357 }
6358 BitMap = ConstantInt::get(M.getContext(), TableInt);
6359 BitMapElementTy = IT;
6360 Kind = BitMapKind;
6361 ++NumBitMaps;
6362 return;
6363 }
6364
6365 // Store the table in an array.
6366 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6367 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6368
6369 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6370 GlobalVariable::PrivateLinkage, Initializer,
6371 "switch.table." + FuncName);
6372 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6373 // Set the alignment to that of an array items. We will be only loading one
6374 // value out of it.
6375 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6376 Kind = ArrayKind;
6377}
6378
6379Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6380 switch (Kind) {
6381 case SingleValueKind:
6382 return SingleValue;
6383 case LinearMapKind: {
6384 // Derive the result value from the input value.
6385 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6386 false, "switch.idx.cast");
6387 if (!LinearMultiplier->isOne())
6388 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6389 /*HasNUW = */ false,
6390 /*HasNSW = */ !LinearMapValWrapped);
6391
6392 if (!LinearOffset->isZero())
6393 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6394 /*HasNUW = */ false,
6395 /*HasNSW = */ !LinearMapValWrapped);
6396 return Result;
6397 }
6398 case BitMapKind: {
6399 // Type of the bitmap (e.g. i59).
6400 IntegerType *MapTy = BitMap->getIntegerType();
6401
6402 // Cast Index to the same type as the bitmap.
6403 // Note: The Index is <= the number of elements in the table, so
6404 // truncating it to the width of the bitmask is safe.
6405 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6406
6407 // Multiply the shift amount by the element width. NUW/NSW can always be
6408 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6409 // BitMap's bit width.
6410 ShiftAmt = Builder.CreateMul(
6411 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6412 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6413
6414 // Shift down.
6415 Value *DownShifted =
6416 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6417 // Mask off.
6418 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6419 }
6420 case ArrayKind: {
6421 // Make sure the table index will not overflow when treated as signed.
6422 IntegerType *IT = cast<IntegerType>(Index->getType());
6423 uint64_t TableSize =
6424 Array->getInitializer()->getType()->getArrayNumElements();
6425 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6426 Index = Builder.CreateZExt(
6427 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6428 "switch.tableidx.zext");
6429
6430 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6431 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6432 GEPIndices, "switch.gep");
6433 return Builder.CreateLoad(
6434 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6435 "switch.load");
6436 }
6437 }
6438 llvm_unreachable("Unknown lookup table kind!");
6439}
6440
6441bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6442 uint64_t TableSize,
6443 Type *ElementType) {
6444 auto *IT = dyn_cast<IntegerType>(ElementType);
6445 if (!IT)
6446 return false;
6447 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6448 // are <= 15, we could try to narrow the type.
6449
6450 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6451 if (TableSize >= UINT_MAX / IT->getBitWidth())
6452 return false;
6453 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6454}
6455
6457 const DataLayout &DL) {
6458 // Allow any legal type.
6459 if (TTI.isTypeLegal(Ty))
6460 return true;
6461
6462 auto *IT = dyn_cast<IntegerType>(Ty);
6463 if (!IT)
6464 return false;
6465
6466 // Also allow power of 2 integer types that have at least 8 bits and fit in
6467 // a register. These types are common in frontend languages and targets
6468 // usually support loads of these types.
6469 // TODO: We could relax this to any integer that fits in a register and rely
6470 // on ABI alignment and padding in the table to allow the load to be widened.
6471 // Or we could widen the constants and truncate the load.
6472 unsigned BitWidth = IT->getBitWidth();
6473 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6474 DL.fitsInLegalInteger(IT->getBitWidth());
6475}
6476
6477static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6478 // 40% is the default density for building a jump table in optsize/minsize
6479 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6480 // function was based on.
6481 const uint64_t MinDensity = 40;
6482
6483 if (CaseRange >= UINT64_MAX / 100)
6484 return false; // Avoid multiplication overflows below.
6485
6486 return NumCases * 100 >= CaseRange * MinDensity;
6487}
6488
6490 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6491 uint64_t Range = Diff + 1;
6492 if (Range < Diff)
6493 return false; // Overflow.
6494
6495 return isSwitchDense(Values.size(), Range);
6496}
6497
6498/// Determine whether a lookup table should be built for this switch, based on
6499/// the number of cases, size of the table, and the types of the results.
6500// TODO: We could support larger than legal types by limiting based on the
6501// number of loads required and/or table size. If the constants are small we
6502// could use smaller table entries and extend after the load.
6503static bool
6505 const TargetTransformInfo &TTI, const DataLayout &DL,
6506 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6507 if (SI->getNumCases() > TableSize)
6508 return false; // TableSize overflowed.
6509
6510 bool AllTablesFitInRegister = true;
6511 bool HasIllegalType = false;
6512 for (const auto &I : ResultTypes) {
6513 Type *Ty = I.second;
6514
6515 // Saturate this flag to true.
6516 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6517
6518 // Saturate this flag to false.
6519 AllTablesFitInRegister =
6520 AllTablesFitInRegister &&
6521 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6522
6523 // If both flags saturate, we're done. NOTE: This *only* works with
6524 // saturating flags, and all flags have to saturate first due to the
6525 // non-deterministic behavior of iterating over a dense map.
6526 if (HasIllegalType && !AllTablesFitInRegister)
6527 break;
6528 }
6529
6530 // If each table would fit in a register, we should build it anyway.
6531 if (AllTablesFitInRegister)
6532 return true;
6533
6534 // Don't build a table that doesn't fit in-register if it has illegal types.
6535 if (HasIllegalType)
6536 return false;
6537
6538 return isSwitchDense(SI->getNumCases(), TableSize);
6539}
6540
6542 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6543 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6544 const DataLayout &DL, const TargetTransformInfo &TTI) {
6545 if (MinCaseVal.isNullValue())
6546 return true;
6547 if (MinCaseVal.isNegative() ||
6548 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6549 !HasDefaultResults)
6550 return false;
6551 return all_of(ResultTypes, [&](const auto &KV) {
6552 return SwitchLookupTable::WouldFitInRegister(
6553 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6554 KV.second /* ResultType */);
6555 });
6556}
6557
6558/// Try to reuse the switch table index compare. Following pattern:
6559/// \code
6560/// if (idx < tablesize)
6561/// r = table[idx]; // table does not contain default_value
6562/// else
6563/// r = default_value;
6564/// if (r != default_value)
6565/// ...
6566/// \endcode
6567/// Is optimized to:
6568/// \code
6569/// cond = idx < tablesize;
6570/// if (cond)
6571/// r = table[idx];
6572/// else
6573/// r = default_value;
6574/// if (cond)
6575/// ...
6576/// \endcode
6577/// Jump threading will then eliminate the second if(cond).
6579 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6580 Constant *DefaultValue,
6581 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6582 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6583 if (!CmpInst)
6584 return;
6585
6586 // We require that the compare is in the same block as the phi so that jump
6587 // threading can do its work afterwards.
6588 if (CmpInst->getParent() != PhiBlock)
6589 return;
6590
6591 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6592 if (!CmpOp1)
6593 return;
6594
6595 Value *RangeCmp = RangeCheckBranch->getCondition();
6596 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6597 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6598
6599 // Check if the compare with the default value is constant true or false.
6600 const DataLayout &DL = PhiBlock->getDataLayout();
6602 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6603 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6604 return;
6605
6606 // Check if the compare with the case values is distinct from the default
6607 // compare result.
6608 for (auto ValuePair : Values) {
6610 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6611 if (!CaseConst || CaseConst == DefaultConst ||
6612 (CaseConst != TrueConst && CaseConst != FalseConst))
6613 return;
6614 }
6615
6616 // Check if the branch instruction dominates the phi node. It's a simple
6617 // dominance check, but sufficient for our needs.
6618 // Although this check is invariant in the calling loops, it's better to do it
6619 // at this late stage. Practically we do it at most once for a switch.
6620 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6621 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6622 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6623 return;
6624 }
6625
6626 if (DefaultConst == FalseConst) {
6627 // The compare yields the same result. We can replace it.
6628 CmpInst->replaceAllUsesWith(RangeCmp);
6629 ++NumTableCmpReuses;
6630 } else {
6631 // The compare yields the same result, just inverted. We can replace it.
6632 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6633 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6634 RangeCheckBranch->getIterator());
6635 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6636 ++NumTableCmpReuses;
6637 }
6638}
6639
6640/// If the switch is only used to initialize one or more phi nodes in a common
6641/// successor block with different constant values, replace the switch with
6642/// lookup tables.
6644 DomTreeUpdater *DTU, const DataLayout &DL,
6645 const TargetTransformInfo &TTI) {
6646 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6647
6648 BasicBlock *BB = SI->getParent();
6649 Function *Fn = BB->getParent();
6650 // Only build lookup table when we have a target that supports it or the
6651 // attribute is not set.
6653 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6654 return false;
6655
6656 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6657 // split off a dense part and build a lookup table for that.
6658
6659 // FIXME: This creates arrays of GEPs to constant strings, which means each
6660 // GEP needs a runtime relocation in PIC code. We should just build one big
6661 // string and lookup indices into that.
6662
6663 // Ignore switches with less than three cases. Lookup tables will not make
6664 // them faster, so we don't analyze them.
6665 if (SI->getNumCases() < 3)
6666 return false;
6667
6668 // Figure out the corresponding result for each case value and phi node in the
6669 // common destination, as well as the min and max case values.
6670 assert(!SI->cases().empty());
6671 SwitchInst::CaseIt CI = SI->case_begin();
6672 ConstantInt *MinCaseVal = CI->getCaseValue();
6673 ConstantInt *MaxCaseVal = CI->getCaseValue();
6674
6675 BasicBlock *CommonDest = nullptr;
6676
6677 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6679
6683
6684 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6685 ConstantInt *CaseVal = CI->getCaseValue();
6686 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6687 MinCaseVal = CaseVal;
6688 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6689 MaxCaseVal = CaseVal;
6690
6691 // Resulting value at phi nodes for this case value.
6693 ResultsTy Results;
6694 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6695 Results, DL, TTI))
6696 return false;
6697
6698 // Append the result from this case to the list for each phi.
6699 for (const auto &I : Results) {
6700 PHINode *PHI = I.first;
6701 Constant *Value = I.second;
6702 if (!ResultLists.count(PHI))
6703 PHIs.push_back(PHI);
6704 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6705 }
6706 }
6707
6708 // Keep track of the result types.
6709 for (PHINode *PHI : PHIs) {
6710 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6711 }
6712
6713 uint64_t NumResults = ResultLists[PHIs[0]].size();
6714
6715 // If the table has holes, we need a constant result for the default case
6716 // or a bitmask that fits in a register.
6717 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6718 bool HasDefaultResults =
6719 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6720 DefaultResultsList, DL, TTI);
6721
6722 for (const auto &I : DefaultResultsList) {
6723 PHINode *PHI = I.first;
6724 Constant *Result = I.second;
6725 DefaultResults[PHI] = Result;
6726 }
6727
6728 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6729 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6730 uint64_t TableSize;
6731 if (UseSwitchConditionAsTableIndex)
6732 TableSize = MaxCaseVal->getLimitedValue() + 1;
6733 else
6734 TableSize =
6735 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6736
6737 // If the default destination is unreachable, or if the lookup table covers
6738 // all values of the conditional variable, branch directly to the lookup table
6739 // BB. Otherwise, check that the condition is within the case range.
6740 bool DefaultIsReachable = !SI->defaultDestUndefined();
6741
6742 bool TableHasHoles = (NumResults < TableSize);
6743
6744 // If the table has holes but the default destination doesn't produce any
6745 // constant results, the lookup table entries corresponding to the holes will
6746 // contain undefined values.
6747 bool AllHolesAreUndefined = TableHasHoles && !HasDefaultResults;
6748
6749 // If the default destination doesn't produce a constant result but is still
6750 // reachable, and the lookup table has holes, we need to use a mask to
6751 // determine if the current index should load from the lookup table or jump
6752 // to the default case.
6753 // The mask is unnecessary if the table has holes but the default destination
6754 // is unreachable, as in that case the holes must also be unreachable.
6755 bool NeedMask = AllHolesAreUndefined && DefaultIsReachable;
6756 if (NeedMask) {
6757 // As an extra penalty for the validity test we require more cases.
6758 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6759 return false;
6760 if (!DL.fitsInLegalInteger(TableSize))
6761 return false;
6762 }
6763
6764 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6765 return false;
6766
6767 std::vector<DominatorTree::UpdateType> Updates;
6768
6769 // Compute the maximum table size representable by the integer type we are
6770 // switching upon.
6771 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6772 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6773 assert(MaxTableSize >= TableSize &&
6774 "It is impossible for a switch to have more entries than the max "
6775 "representable value of its input integer type's size.");
6776
6777 // Create the BB that does the lookups.
6778 Module &Mod = *CommonDest->getParent()->getParent();
6779 BasicBlock *LookupBB = BasicBlock::Create(
6780 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6781
6782 // Compute the table index value.
6783 Builder.SetInsertPoint(SI);
6784 Value *TableIndex;
6785 ConstantInt *TableIndexOffset;
6786 if (UseSwitchConditionAsTableIndex) {
6787 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6788 TableIndex = SI->getCondition();
6789 } else {
6790 TableIndexOffset = MinCaseVal;
6791 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6792 // we can try to attach nsw.
6793 bool MayWrap = true;
6794 if (!DefaultIsReachable) {
6795 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6796 (void)Res;
6797 }
6798
6799 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6800 "switch.tableidx", /*HasNUW =*/false,
6801 /*HasNSW =*/!MayWrap);
6802 }
6803
6804 BranchInst *RangeCheckBranch = nullptr;
6805
6806 // Grow the table to cover all possible index values to avoid the range check.
6807 // It will use the default result to fill in the table hole later, so make
6808 // sure it exist.
6809 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6810 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6811 // Grow the table shouldn't have any size impact by checking
6812 // WouldFitInRegister.
6813 // TODO: Consider growing the table also when it doesn't fit in a register
6814 // if no optsize is specified.
6815 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6816 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6817 return SwitchLookupTable::WouldFitInRegister(
6818 DL, UpperBound, KV.second /* ResultType */);
6819 })) {
6820 // There may be some case index larger than the UpperBound (unreachable
6821 // case), so make sure the table size does not get smaller.
6822 TableSize = std::max(UpperBound, TableSize);
6823 // The default branch is unreachable after we enlarge the lookup table.
6824 // Adjust DefaultIsReachable to reuse code path.
6825 DefaultIsReachable = false;
6826 }
6827 }
6828
6829 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6830 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6831 Builder.CreateBr(LookupBB);
6832 if (DTU)
6833 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6834 // Note: We call removeProdecessor later since we need to be able to get the
6835 // PHI value for the default case in case we're using a bit mask.
6836 } else {
6837 Value *Cmp = Builder.CreateICmpULT(
6838 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6839 RangeCheckBranch =
6840 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6841 if (DTU)
6842 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6843 }
6844
6845 // Populate the BB that does the lookups.
6846 Builder.SetInsertPoint(LookupBB);
6847
6848 if (NeedMask) {
6849 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6850 // re-purposed to do the hole check, and we create a new LookupBB.
6851 BasicBlock *MaskBB = LookupBB;
6852 MaskBB->setName("switch.hole_check");
6853 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6854 CommonDest->getParent(), CommonDest);
6855
6856 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6857 // unnecessary illegal types.
6858 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6859 APInt MaskInt(TableSizePowOf2, 0);
6860 APInt One(TableSizePowOf2, 1);
6861 // Build bitmask; fill in a 1 bit for every case.
6862 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6863 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6864 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6865 .getLimitedValue();
6866 MaskInt |= One << Idx;
6867 }
6868 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6869
6870 // Get the TableIndex'th bit of the bitmask.
6871 // If this bit is 0 (meaning hole) jump to the default destination,
6872 // else continue with table lookup.
6873 IntegerType *MapTy = TableMask->getIntegerType();
6874 Value *MaskIndex =
6875 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6876 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6877 Value *LoBit = Builder.CreateTrunc(
6878 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6879 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6880 if (DTU) {
6881 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6882 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6883 }
6884 Builder.SetInsertPoint(LookupBB);
6885 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6886 }
6887
6888 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6889 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6890 // do not delete PHINodes here.
6891 SI->getDefaultDest()->removePredecessor(BB,
6892 /*KeepOneInputPHIs=*/true);
6893 if (DTU)
6894 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6895 }
6896
6897 for (PHINode *PHI : PHIs) {
6898 const ResultListTy &ResultList = ResultLists[PHI];
6899
6900 // Use any value to fill the lookup table holes.
6901 Constant *DV =
6902 AllHolesAreUndefined ? ResultLists[PHI][0].second : DefaultResults[PHI];
6903 StringRef FuncName = Fn->getName();
6904 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6905 DL, FuncName);
6906
6907 Value *Result = Table.BuildLookup(TableIndex, Builder);
6908
6909 // Do a small peephole optimization: re-use the switch table compare if
6910 // possible.
6911 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6912 BasicBlock *PhiBlock = PHI->getParent();
6913 // Search for compare instructions which use the phi.
6914 for (auto *User : PHI->users()) {
6915 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6916 }
6917 }
6918
6919 PHI->addIncoming(Result, LookupBB);
6920 }
6921
6922 Builder.CreateBr(CommonDest);
6923 if (DTU)
6924 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6925
6926 // Remove the switch.
6927 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6928 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6929 BasicBlock *Succ = SI->getSuccessor(i);
6930
6931 if (Succ == SI->getDefaultDest())
6932 continue;
6933 Succ->removePredecessor(BB);
6934 if (DTU && RemovedSuccessors.insert(Succ).second)
6935 Updates.push_back({DominatorTree::Delete, BB, Succ});
6936 }
6937 SI->eraseFromParent();
6938
6939 if (DTU)
6940 DTU->applyUpdates(Updates);
6941
6942 ++NumLookupTables;
6943 if (NeedMask)
6944 ++NumLookupTablesHoles;
6945 return true;
6946}
6947
6948/// Try to transform a switch that has "holes" in it to a contiguous sequence
6949/// of cases.
6950///
6951/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6952/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6953///
6954/// This converts a sparse switch into a dense switch which allows better
6955/// lowering and could also allow transforming into a lookup table.
6956static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6957 const DataLayout &DL,
6958 const TargetTransformInfo &TTI) {
6959 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6960 if (CondTy->getIntegerBitWidth() > 64 ||
6961 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6962 return false;
6963 // Only bother with this optimization if there are more than 3 switch cases;
6964 // SDAG will only bother creating jump tables for 4 or more cases.
6965 if (SI->getNumCases() < 4)
6966 return false;
6967
6968 // This transform is agnostic to the signedness of the input or case values. We
6969 // can treat the case values as signed or unsigned. We can optimize more common
6970 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6971 // as signed.
6973 for (const auto &C : SI->cases())
6974 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6975 llvm::sort(Values);
6976
6977 // If the switch is already dense, there's nothing useful to do here.
6978 if (isSwitchDense(Values))
6979 return false;
6980
6981 // First, transform the values such that they start at zero and ascend.
6982 int64_t Base = Values[0];
6983 for (auto &V : Values)
6984 V -= (uint64_t)(Base);
6985
6986 // Now we have signed numbers that have been shifted so that, given enough
6987 // precision, there are no negative values. Since the rest of the transform
6988 // is bitwise only, we switch now to an unsigned representation.
6989
6990 // This transform can be done speculatively because it is so cheap - it
6991 // results in a single rotate operation being inserted.
6992
6993 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6994 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6995 // less than 64.
6996 unsigned Shift = 64;
6997 for (auto &V : Values)
6998 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6999 assert(Shift < 64);
7000 if (Shift > 0)
7001 for (auto &V : Values)
7002 V = (int64_t)((uint64_t)V >> Shift);
7003
7004 if (!isSwitchDense(Values))
7005 // Transform didn't create a dense switch.
7006 return false;
7007
7008 // The obvious transform is to shift the switch condition right and emit a
7009 // check that the condition actually cleanly divided by GCD, i.e.
7010 // C & (1 << Shift - 1) == 0
7011 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7012 //
7013 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7014 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7015 // are nonzero then the switch condition will be very large and will hit the
7016 // default case.
7017
7018 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7019 Builder.SetInsertPoint(SI);
7020 Value *Sub =
7021 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7022 Value *Rot = Builder.CreateIntrinsic(
7023 Ty, Intrinsic::fshl,
7024 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7025 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7026
7027 for (auto Case : SI->cases()) {
7028 auto *Orig = Case.getCaseValue();
7029 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
7030 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7031 }
7032 return true;
7033}
7034
7035/// Tries to transform switch of powers of two to reduce switch range.
7036/// For example, switch like:
7037/// switch (C) { case 1: case 2: case 64: case 128: }
7038/// will be transformed to:
7039/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7040///
7041/// This transformation allows better lowering and could allow transforming into
7042/// a lookup table.
7044 const DataLayout &DL,
7045 const TargetTransformInfo &TTI) {
7046 Value *Condition = SI->getCondition();
7047 LLVMContext &Context = SI->getContext();
7048 auto *CondTy = cast<IntegerType>(Condition->getType());
7049
7050 if (CondTy->getIntegerBitWidth() > 64 ||
7051 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7052 return false;
7053
7054 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7055 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7056 {Condition, ConstantInt::getTrue(Context)}),
7058
7059 if (CttzIntrinsicCost > TTI::TCC_Basic)
7060 // Inserting intrinsic is too expensive.
7061 return false;
7062
7063 // Only bother with this optimization if there are more than 3 switch cases.
7064 // SDAG will only bother creating jump tables for 4 or more cases.
7065 if (SI->getNumCases() < 4)
7066 return false;
7067
7068 // We perform this optimization only for switches with
7069 // unreachable default case.
7070 // This assumtion will save us from checking if `Condition` is a power of two.
7071 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7072 return false;
7073
7074 // Check that switch cases are powers of two.
7076 for (const auto &Case : SI->cases()) {
7077 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7078 if (llvm::has_single_bit(CaseValue))
7079 Values.push_back(CaseValue);
7080 else
7081 return false;
7082 }
7083
7084 // isSwichDense requires case values to be sorted.
7085 llvm::sort(Values);
7086 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7087 llvm::countr_zero(Values.front()) + 1))
7088 // Transform is unable to generate dense switch.
7089 return false;
7090
7091 Builder.SetInsertPoint(SI);
7092
7093 // Replace each case with its trailing zeros number.
7094 for (auto &Case : SI->cases()) {
7095 auto *OrigValue = Case.getCaseValue();
7096 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7097 OrigValue->getValue().countr_zero()));
7098 }
7099
7100 // Replace condition with its trailing zeros number.
7101 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7102 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7103
7104 SI->setCondition(ConditionTrailingZeros);
7105
7106 return true;
7107}
7108
7109bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7110 BasicBlock *BB = SI->getParent();
7111
7112 if (isValueEqualityComparison(SI)) {
7113 // If we only have one predecessor, and if it is a branch on this value,
7114 // see if that predecessor totally determines the outcome of this switch.
7115 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7116 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7117 return requestResimplify();
7118
7119 Value *Cond = SI->getCondition();
7120 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7121 if (SimplifySwitchOnSelect(SI, Select))
7122 return requestResimplify();
7123
7124 // If the block only contains the switch, see if we can fold the block
7125 // away into any preds.
7126 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7127 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7128 return requestResimplify();
7129 }
7130
7131 // Try to transform the switch into an icmp and a branch.
7132 // The conversion from switch to comparison may lose information on
7133 // impossible switch values, so disable it early in the pipeline.
7134 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7135 return requestResimplify();
7136
7137 // Remove unreachable cases.
7138 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7139 return requestResimplify();
7140
7141 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7142 return requestResimplify();
7143
7144 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7145 return requestResimplify();
7146
7147 // The conversion from switch to lookup tables results in difficult-to-analyze
7148 // code and makes pruning branches much harder. This is a problem if the
7149 // switch expression itself can still be restricted as a result of inlining or
7150 // CVP. Therefore, only apply this transformation during late stages of the
7151 // optimisation pipeline.
7152 if (Options.ConvertSwitchToLookupTable &&
7153 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7154 return requestResimplify();
7155
7156 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7157 return requestResimplify();
7158
7159 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7160 return requestResimplify();
7161
7162 if (HoistCommon &&
7163 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7164 return requestResimplify();
7165
7166 return false;
7167}
7168
7169bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7170 BasicBlock *BB = IBI->getParent();
7171 bool Changed = false;
7172
7173 // Eliminate redundant destinations.
7176 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7177 BasicBlock *Dest = IBI->getDestination(i);
7178 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7179 if (!Dest->hasAddressTaken())
7180 RemovedSuccs.insert(Dest);
7181 Dest->removePredecessor(BB);
7182 IBI->removeDestination(i);
7183 --i;
7184 --e;
7185 Changed = true;
7186 }
7187 }
7188
7189 if (DTU) {
7190 std::vector<DominatorTree::UpdateType> Updates;
7191 Updates.reserve(RemovedSuccs.size());
7192 for (auto *RemovedSucc : RemovedSuccs)
7193 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7194 DTU->applyUpdates(Updates);
7195 }
7196
7197 if (IBI->getNumDestinations() == 0) {
7198 // If the indirectbr has no successors, change it to unreachable.
7199 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7201 return true;
7202 }
7203
7204 if (IBI->getNumDestinations() == 1) {
7205 // If the indirectbr has one successor, change it to a direct branch.
7208 return true;
7209 }
7210
7211 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7212 if (SimplifyIndirectBrOnSelect(IBI, SI))
7213 return requestResimplify();
7214 }
7215 return Changed;
7216}
7217
7218/// Given an block with only a single landing pad and a unconditional branch
7219/// try to find another basic block which this one can be merged with. This
7220/// handles cases where we have multiple invokes with unique landing pads, but
7221/// a shared handler.
7222///
7223/// We specifically choose to not worry about merging non-empty blocks
7224/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7225/// practice, the optimizer produces empty landing pad blocks quite frequently
7226/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7227/// sinking in this file)
7228///
7229/// This is primarily a code size optimization. We need to avoid performing
7230/// any transform which might inhibit optimization (such as our ability to
7231/// specialize a particular handler via tail commoning). We do this by not
7232/// merging any blocks which require us to introduce a phi. Since the same
7233/// values are flowing through both blocks, we don't lose any ability to
7234/// specialize. If anything, we make such specialization more likely.
7235///
7236/// TODO - This transformation could remove entries from a phi in the target
7237/// block when the inputs in the phi are the same for the two blocks being
7238/// merged. In some cases, this could result in removal of the PHI entirely.
7240 BasicBlock *BB, DomTreeUpdater *DTU) {
7241 auto Succ = BB->getUniqueSuccessor();
7242 assert(Succ);
7243 // If there's a phi in the successor block, we'd likely have to introduce
7244 // a phi into the merged landing pad block.
7245 if (isa<PHINode>(*Succ->begin()))
7246 return false;
7247
7248 for (BasicBlock *OtherPred : predecessors(Succ)) {
7249 if (BB == OtherPred)
7250 continue;
7251 BasicBlock::iterator I = OtherPred->begin();
7252 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7253 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7254 continue;
7255 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7256 ;
7257 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7258 if (!BI2 || !BI2->isIdenticalTo(BI))
7259 continue;
7260
7261 std::vector<DominatorTree::UpdateType> Updates;
7262
7263 // We've found an identical block. Update our predecessors to take that
7264 // path instead and make ourselves dead.
7266 for (BasicBlock *Pred : UniquePreds) {
7267 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7268 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7269 "unexpected successor");
7270 II->setUnwindDest(OtherPred);
7271 if (DTU) {
7272 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7273 Updates.push_back({DominatorTree::Delete, Pred, BB});
7274 }
7275 }
7276
7277 // The debug info in OtherPred doesn't cover the merged control flow that
7278 // used to go through BB. We need to delete it or update it.
7279 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7280 if (isa<DbgInfoIntrinsic>(Inst))
7281 Inst.eraseFromParent();
7282
7284 for (BasicBlock *Succ : UniqueSuccs) {
7285 Succ->removePredecessor(BB);
7286 if (DTU)
7287 Updates.push_back({DominatorTree::Delete, BB, Succ});
7288 }
7289
7290 IRBuilder<> Builder(BI);
7291 Builder.CreateUnreachable();
7292 BI->eraseFromParent();
7293 if (DTU)
7294 DTU->applyUpdates(Updates);
7295 return true;
7296 }
7297 return false;
7298}
7299
7300bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7301 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7302 : simplifyCondBranch(Branch, Builder);
7303}
7304
7305bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7306 IRBuilder<> &Builder) {
7307 BasicBlock *BB = BI->getParent();
7308 BasicBlock *Succ = BI->getSuccessor(0);
7309
7310 // If the Terminator is the only non-phi instruction, simplify the block.
7311 // If LoopHeader is provided, check if the block or its successor is a loop
7312 // header. (This is for early invocations before loop simplify and
7313 // vectorization to keep canonical loop forms for nested loops. These blocks
7314 // can be eliminated when the pass is invoked later in the back-end.)
7315 // Note that if BB has only one predecessor then we do not introduce new
7316 // backedge, so we can eliminate BB.
7317 bool NeedCanonicalLoop =
7318 Options.NeedCanonicalLoop &&
7319 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7320 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7322 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7323 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7324 return true;
7325
7326 // If the only instruction in the block is a seteq/setne comparison against a
7327 // constant, try to simplify the block.
7328 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7329 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7330 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7331 ;
7332 if (I->isTerminator() &&
7333 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7334 return true;
7335 }
7336
7337 // See if we can merge an empty landing pad block with another which is
7338 // equivalent.
7339 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7340 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7341 ;
7342 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7343 return true;
7344 }
7345
7346 // If this basic block is ONLY a compare and a branch, and if a predecessor
7347 // branches to us and our successor, fold the comparison into the
7348 // predecessor and use logical operations to update the incoming value
7349 // for PHI nodes in common successor.
7350 if (Options.SpeculateBlocks &&
7351 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7352 Options.BonusInstThreshold))
7353 return requestResimplify();
7354 return false;
7355}
7356
7358 BasicBlock *PredPred = nullptr;
7359 for (auto *P : predecessors(BB)) {
7360 BasicBlock *PPred = P->getSinglePredecessor();
7361 if (!PPred || (PredPred && PredPred != PPred))
7362 return nullptr;
7363 PredPred = PPred;
7364 }
7365 return PredPred;
7366}
7367
7368/// Fold the following pattern:
7369/// bb0:
7370/// br i1 %cond1, label %bb1, label %bb2
7371/// bb1:
7372/// br i1 %cond2, label %bb3, label %bb4
7373/// bb2:
7374/// br i1 %cond2, label %bb4, label %bb3
7375/// bb3:
7376/// ...
7377/// bb4:
7378/// ...
7379/// into
7380/// bb0:
7381/// %cond = xor i1 %cond1, %cond2
7382/// br i1 %cond, label %bb4, label %bb3
7383/// bb3:
7384/// ...
7385/// bb4:
7386/// ...
7387/// NOTE: %cond2 always dominates the terminator of bb0.
7389 BasicBlock *BB = BI->getParent();
7390 BasicBlock *BB1 = BI->getSuccessor(0);
7391 BasicBlock *BB2 = BI->getSuccessor(1);
7392 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7393 if (Succ == BB)
7394 return false;
7395 if (&Succ->front() != Succ->getTerminator())
7396 return false;
7397 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7398 if (!SuccBI || !SuccBI->isConditional())
7399 return false;
7400 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7401 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7402 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7403 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7404 };
7405 BranchInst *BB1BI, *BB2BI;
7406 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7407 return false;
7408
7409 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7410 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7411 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7412 return false;
7413
7414 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7415 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7416 IRBuilder<> Builder(BI);
7417 BI->setCondition(
7418 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7419 BB1->removePredecessor(BB);
7420 BI->setSuccessor(0, BB4);
7421 BB2->removePredecessor(BB);
7422 BI->setSuccessor(1, BB3);
7423 if (DTU) {
7425 Updates.push_back({DominatorTree::Delete, BB, BB1});
7426 Updates.push_back({DominatorTree::Insert, BB, BB4});
7427 Updates.push_back({DominatorTree::Delete, BB, BB2});
7428 Updates.push_back({DominatorTree::Insert, BB, BB3});
7429
7430 DTU->applyUpdates(Updates);
7431 }
7432 bool HasWeight = false;
7433 uint64_t BBTWeight, BBFWeight;
7434 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
7435 HasWeight = true;
7436 else
7437 BBTWeight = BBFWeight = 1;
7438 uint64_t BB1TWeight, BB1FWeight;
7439 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
7440 HasWeight = true;
7441 else
7442 BB1TWeight = BB1FWeight = 1;
7443 uint64_t BB2TWeight, BB2FWeight;
7444 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
7445 HasWeight = true;
7446 else
7447 BB2TWeight = BB2FWeight = 1;
7448 if (HasWeight) {
7449 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
7450 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
7451 FitWeights(Weights);
7452 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
7453 }
7454 return true;
7455}
7456
7457bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7458 assert(
7459 !isa<ConstantInt>(BI->getCondition()) &&
7460 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7461 "Tautological conditional branch should have been eliminated already.");
7462
7463 BasicBlock *BB = BI->getParent();
7464 if (!Options.SimplifyCondBranch ||
7465 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7466 return false;
7467
7468 // Conditional branch
7469 if (isValueEqualityComparison(BI)) {
7470 // If we only have one predecessor, and if it is a branch on this value,
7471 // see if that predecessor totally determines the outcome of this
7472 // switch.
7473 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7474 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7475 return requestResimplify();
7476
7477 // This block must be empty, except for the setcond inst, if it exists.
7478 // Ignore dbg and pseudo intrinsics.
7479 auto I = BB->instructionsWithoutDebug(true).begin();
7480 if (&*I == BI) {
7481 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7482 return requestResimplify();
7483 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7484 ++I;
7485 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7486 return requestResimplify();
7487 }
7488 }
7489
7490 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7491 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7492 return true;
7493
7494 // If this basic block has dominating predecessor blocks and the dominating
7495 // blocks' conditions imply BI's condition, we know the direction of BI.
7496 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7497 if (Imp) {
7498 // Turn this into a branch on constant.
7499 auto *OldCond = BI->getCondition();
7500 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7501 : ConstantInt::getFalse(BB->getContext());
7502 BI->setCondition(TorF);
7504 return requestResimplify();
7505 }
7506
7507 // If this basic block is ONLY a compare and a branch, and if a predecessor
7508 // branches to us and one of our successors, fold the comparison into the
7509 // predecessor and use logical operations to pick the right destination.
7510 if (Options.SpeculateBlocks &&
7511 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7512 Options.BonusInstThreshold))
7513 return requestResimplify();
7514
7515 // We have a conditional branch to two blocks that are only reachable
7516 // from BI. We know that the condbr dominates the two blocks, so see if
7517 // there is any identical code in the "then" and "else" blocks. If so, we
7518 // can hoist it up to the branching block.
7519 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7520 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7521 if (HoistCommon && hoistCommonCodeFromSuccessors(
7522 BI->getParent(), !Options.HoistCommonInsts))
7523 return requestResimplify();
7524 } else {
7525 // If Successor #1 has multiple preds, we may be able to conditionally
7526 // execute Successor #0 if it branches to Successor #1.
7527 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7528 if (Succ0TI->getNumSuccessors() == 1 &&
7529 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7530 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7531 return requestResimplify();
7532 }
7533 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7534 // If Successor #0 has multiple preds, we may be able to conditionally
7535 // execute Successor #1 if it branches to Successor #0.
7536 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7537 if (Succ1TI->getNumSuccessors() == 1 &&
7538 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7539 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7540 return requestResimplify();
7541 }
7542
7543 // If this is a branch on something for which we know the constant value in
7544 // predecessors (e.g. a phi node in the current block), thread control
7545 // through this block.
7547 return requestResimplify();
7548
7549 // Scan predecessor blocks for conditional branches.
7550 for (BasicBlock *Pred : predecessors(BB))
7551 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7552 if (PBI != BI && PBI->isConditional())
7553 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7554 return requestResimplify();
7555
7556 // Look for diamond patterns.
7557 if (MergeCondStores)
7559 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7560 if (PBI != BI && PBI->isConditional())
7561 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7562 return requestResimplify();
7563
7564 // Look for nested conditional branches.
7565 if (mergeNestedCondBranch(BI, DTU))
7566 return requestResimplify();
7567
7568 return false;
7569}
7570
7571/// Check if passing a value to an instruction will cause undefined behavior.
7572static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7573 Constant *C = dyn_cast<Constant>(V);
7574 if (!C)
7575 return false;
7576
7577 if (I->use_empty())
7578 return false;
7579
7580 if (C->isNullValue() || isa<UndefValue>(C)) {
7581 // Only look at the first use we can handle, avoid hurting compile time with
7582 // long uselists
7583 auto FindUse = llvm::find_if(I->users(), [](auto *U) {
7584 auto *Use = cast<Instruction>(U);
7585 // Change this list when we want to add new instructions.
7586 switch (Use->getOpcode()) {
7587 default:
7588 return false;
7589 case Instruction::GetElementPtr:
7590 case Instruction::Ret:
7591 case Instruction::BitCast:
7592 case Instruction::Load:
7593 case Instruction::Store:
7594 case Instruction::Call:
7595 case Instruction::CallBr:
7596 case Instruction::Invoke:
7597 return true;
7598 }
7599 });
7600 if (FindUse == I->user_end())
7601 return false;
7602 auto *Use = cast<Instruction>(*FindUse);
7603 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7604 // before I in the block. The latter two can be the case if Use is a
7605 // PHI node.
7606 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7607 return false;
7608
7609 // Now make sure that there are no instructions in between that can alter
7610 // control flow (eg. calls)
7611 auto InstrRange =
7612 make_range(std::next(I->getIterator()), Use->getIterator());
7613 if (any_of(InstrRange, [](Instruction &I) {
7615 }))
7616 return false;
7617
7618 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7619 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7620 if (GEP->getPointerOperand() == I) {
7621 // The current base address is null, there are four cases to consider:
7622 // getelementptr (TY, null, 0) -> null
7623 // getelementptr (TY, null, not zero) -> may be modified
7624 // getelementptr inbounds (TY, null, 0) -> null
7625 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7626 // undefined?
7627 if (!GEP->hasAllZeroIndices() &&
7628 (!GEP->isInBounds() ||
7629 NullPointerIsDefined(GEP->getFunction(),
7630 GEP->getPointerAddressSpace())))
7631 PtrValueMayBeModified = true;
7632 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7633 }
7634
7635 // Look through return.
7636 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7637 bool HasNoUndefAttr =
7638 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7639 // Return undefined to a noundef return value is undefined.
7640 if (isa<UndefValue>(C) && HasNoUndefAttr)
7641 return true;
7642 // Return null to a nonnull+noundef return value is undefined.
7643 if (C->isNullValue() && HasNoUndefAttr &&
7644 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7645 return !PtrValueMayBeModified;
7646 }
7647 }
7648
7649 // Look through bitcasts.
7650 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7651 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7652
7653 // Load from null is undefined.
7654 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7655 if (!LI->isVolatile())
7656 return !NullPointerIsDefined(LI->getFunction(),
7657 LI->getPointerAddressSpace());
7658
7659 // Store to null is undefined.
7660 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7661 if (!SI->isVolatile())
7662 return (!NullPointerIsDefined(SI->getFunction(),
7663 SI->getPointerAddressSpace())) &&
7664 SI->getPointerOperand() == I;
7665
7666 // llvm.assume(false/undef) always triggers immediate UB.
7667 if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
7668 // Ignore assume operand bundles.
7669 if (I == Assume->getArgOperand(0))
7670 return true;
7671 }
7672
7673 if (auto *CB = dyn_cast<CallBase>(Use)) {
7674 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7675 return false;
7676 // A call to null is undefined.
7677 if (CB->getCalledOperand() == I)
7678 return true;
7679
7680 if (C->isNullValue()) {
7681 for (const llvm::Use &Arg : CB->args())
7682 if (Arg == I) {
7683 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7684 if (CB->isPassingUndefUB(ArgIdx) &&
7685 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7686 // Passing null to a nonnnull+noundef argument is undefined.
7687 return !PtrValueMayBeModified;
7688 }
7689 }
7690 } else if (isa<UndefValue>(C)) {
7691 // Passing undef to a noundef argument is undefined.
7692 for (const llvm::Use &Arg : CB->args())
7693 if (Arg == I) {
7694 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7695 if (CB->isPassingUndefUB(ArgIdx)) {
7696 // Passing undef to a noundef argument is undefined.
7697 return true;
7698 }
7699 }
7700 }
7701 }
7702 }
7703 return false;
7704}
7705
7706/// If BB has an incoming value that will always trigger undefined behavior
7707/// (eg. null pointer dereference), remove the branch leading here.
7709 DomTreeUpdater *DTU,
7710 AssumptionCache *AC) {
7711 for (PHINode &PHI : BB->phis())
7712 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7713 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7714 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7715 Instruction *T = Predecessor->getTerminator();
7716 IRBuilder<> Builder(T);
7717 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7718 BB->removePredecessor(Predecessor);
7719 // Turn unconditional branches into unreachables and remove the dead
7720 // destination from conditional branches.
7721 if (BI->isUnconditional())
7722 Builder.CreateUnreachable();
7723 else {
7724 // Preserve guarding condition in assume, because it might not be
7725 // inferrable from any dominating condition.
7726 Value *Cond = BI->getCondition();
7727 CallInst *Assumption;
7728 if (BI->getSuccessor(0) == BB)
7729 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7730 else
7731 Assumption = Builder.CreateAssumption(Cond);
7732 if (AC)
7733 AC->registerAssumption(cast<AssumeInst>(Assumption));
7734 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7735 : BI->getSuccessor(0));
7736 }
7737 BI->eraseFromParent();
7738 if (DTU)
7739 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7740 return true;
7741 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7742 // Redirect all branches leading to UB into
7743 // a newly created unreachable block.
7744 BasicBlock *Unreachable = BasicBlock::Create(
7745 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7746 Builder.SetInsertPoint(Unreachable);
7747 // The new block contains only one instruction: Unreachable
7748 Builder.CreateUnreachable();
7749 for (const auto &Case : SI->cases())
7750 if (Case.getCaseSuccessor() == BB) {
7751 BB->removePredecessor(Predecessor);
7752 Case.setSuccessor(Unreachable);
7753 }
7754 if (SI->getDefaultDest() == BB) {
7755 BB->removePredecessor(Predecessor);
7756 SI->setDefaultDest(Unreachable);
7757 }
7758
7759 if (DTU)
7760 DTU->applyUpdates(
7761 { { DominatorTree::Insert, Predecessor, Unreachable },
7762 { DominatorTree::Delete, Predecessor, BB } });
7763 return true;
7764 }
7765 }
7766
7767 return false;
7768}
7769
7770bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7771 bool Changed = false;
7772
7773 assert(BB && BB->getParent() && "Block not embedded in function!");
7774 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7775
7776 // Remove basic blocks that have no predecessors (except the entry block)...
7777 // or that just have themself as a predecessor. These are unreachable.
7778 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7779 BB->getSinglePredecessor() == BB) {
7780 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7781 DeleteDeadBlock(BB, DTU);
7782 return true;
7783 }
7784
7785 // Check to see if we can constant propagate this terminator instruction
7786 // away...
7787 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7788 /*TLI=*/nullptr, DTU);
7789
7790 // Check for and eliminate duplicate PHI nodes in this block.
7791 Changed |= EliminateDuplicatePHINodes(BB);
7792
7793 // Check for and remove branches that will always cause undefined behavior.
7795 return requestResimplify();
7796
7797 // Merge basic blocks into their predecessor if there is only one distinct
7798 // pred, and if there is only one distinct successor of the predecessor, and
7799 // if there are no PHI nodes.
7800 if (MergeBlockIntoPredecessor(BB, DTU))
7801 return true;
7802
7803 if (SinkCommon && Options.SinkCommonInsts)
7804 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7805 MergeCompatibleInvokes(BB, DTU)) {
7806 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7807 // so we may now how duplicate PHI's.
7808 // Let's rerun EliminateDuplicatePHINodes() first,
7809 // before FoldTwoEntryPHINode() potentially converts them into select's,
7810 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7811 return true;
7812 }
7813
7814 IRBuilder<> Builder(BB);
7815
7816 if (Options.SpeculateBlocks &&
7817 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7818 // If there is a trivial two-entry PHI node in this basic block, and we can
7819 // eliminate it, do so now.
7820 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7821 if (PN->getNumIncomingValues() == 2)
7822 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
7823 Options.SpeculateUnpredictables))
7824 return true;
7825 }
7826
7828 Builder.SetInsertPoint(Terminator);
7829 switch (Terminator->getOpcode()) {
7830 case Instruction::Br:
7831 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7832 break;
7833 case Instruction::Resume:
7834 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7835 break;
7836 case Instruction::CleanupRet:
7837 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7838 break;
7839 case Instruction::Switch:
7840 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7841 break;
7842 case Instruction::Unreachable:
7843 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7844 break;
7845 case Instruction::IndirectBr:
7846 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7847 break;
7848 }
7849
7850 return Changed;
7851}
7852
7853bool SimplifyCFGOpt::run(BasicBlock *BB) {
7854 bool Changed = false;
7855
7856 // Repeated simplify BB as long as resimplification is requested.
7857 do {
7858 Resimplify = false;
7859
7860 // Perform one round of simplifcation. Resimplify flag will be set if
7861 // another iteration is requested.
7862 Changed |= simplifyOnce(BB);
7863 } while (Resimplify);
7864
7865 return Changed;
7866}
7867
7870 ArrayRef<WeakVH> LoopHeaders) {
7871 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
7872 Options)
7873 .run(BB);
7874}
#define Fail
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1181
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1229
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1146
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:336
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1110
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:377
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:507
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:248
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:648
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:365
const Instruction & front() const
Definition: BasicBlock.h:461
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:202
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:479
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:495
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:329
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:465
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:487
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:717
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:294
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:384
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:677
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:483
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:621
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:290
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:514
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:890
BasicBlock * getBasicBlock() const
Definition: Constants.h:919
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1084
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2587
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isNegative() const
Definition: Constants.h:201
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:185
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
iterator begin()
Definition: Function.h:816
size_t size() const
Definition: Function.h:821
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2262
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2044
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1268
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2540
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1879
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1148
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2246
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1125
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1795
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:552
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1808
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1119
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1681
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2201
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1524
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1687
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1366
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:381
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1706
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:932
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:174
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:361
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Align getAlign() const
Definition: Instructions.h:329
bool isSimple() const
Definition: Instructions.h:366
Value * getValueOperand()
Definition: Instructions.h:374
bool isUnordered() const
Definition: Instructions.h:368
Value * getPointerOperand()
Definition: Instructions.h:377
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1808
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:238
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1715
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:272
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2184
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2013
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1768
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:698
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1118
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2102
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1422
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3164
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3611
@ And
Bitwise or logical AND of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4113
auto max_element(R &&Range)
Definition: STLExtras.h:1986
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1607
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1485
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:281
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254